@mui/internal-code-infra 0.0.4-canary.4 → 0.0.4-canary.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -8
- package/build/babel-config.d.mts +11 -3
- package/build/brokenLinksChecker/crawlWorker.d.mts +1 -0
- package/build/brokenLinksChecker/index.d.mts +45 -2
- package/build/changelog/types.d.ts +1 -1
- package/build/cli/cmdArgosPush.d.mts +2 -2
- package/build/cli/cmdBuild.d.mts +2 -2
- package/build/cli/cmdCopyFiles.d.mts +2 -2
- package/build/cli/cmdExtractErrorCodes.d.mts +2 -2
- package/build/cli/cmdGenerateChangelog.d.mts +2 -2
- package/build/cli/cmdGithubAuth.d.mts +2 -2
- package/build/cli/cmdListWorkspaces.d.mts +4 -2
- package/build/cli/cmdNetlifyIgnore.d.mts +2 -2
- package/build/cli/cmdPublish.d.mts +4 -2
- package/build/cli/cmdPublishCanary.d.mts +3 -2
- package/build/cli/cmdPublishNewPackage.d.mts +4 -2
- package/build/cli/cmdSetVersionOverrides.d.mts +2 -2
- package/build/cli/cmdVale.d.mts +46 -0
- package/build/cli/cmdValidateBuiltTypes.d.mts +2 -2
- package/build/eslint/baseConfig.d.mts +3 -1
- package/build/eslint/mui/rules/disallow-react-api-in-server-components.d.mts +2 -2
- package/build/eslint/mui/rules/docgen-ignore-before-comment.d.mts +2 -2
- package/build/eslint/mui/rules/no-guarded-throw.d.mts +31 -0
- package/build/eslint/mui/rules/no-restricted-resolved-imports.d.mts +2 -2
- package/build/eslint/mui/rules/nodeEnvUtils.d.mts +18 -0
- package/build/markdownlint/duplicate-h1.d.mts +1 -1
- package/build/markdownlint/git-diff.d.mts +1 -1
- package/build/markdownlint/index.d.mts +1 -1
- package/build/markdownlint/straight-quotes.d.mts +1 -1
- package/build/markdownlint/table-alignment.d.mts +1 -1
- package/build/markdownlint/terminal-language.d.mts +1 -1
- package/build/remark/config.d.mts +43 -0
- package/build/remark/createLintTester.d.mts +10 -0
- package/build/remark/firstBlockHeading.d.mts +4 -0
- package/build/remark/gitDiff.d.mts +2 -0
- package/build/remark/noSpaceInLinks.d.mts +2 -0
- package/build/remark/straightQuotes.d.mts +2 -0
- package/build/remark/tableAlignment.d.mts +2 -0
- package/build/remark/terminalLanguage.d.mts +2 -0
- package/build/utils/build.d.mts +3 -3
- package/build/utils/github.d.mts +1 -1
- package/build/utils/pnpm.d.mts +68 -2
- package/build/utils/testUtils.d.mts +7 -0
- package/package.json +59 -32
- package/src/babel-config.mjs +9 -3
- package/src/brokenLinksChecker/__fixtures__/static-site/index.html +1 -0
- package/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html +15 -0
- package/src/brokenLinksChecker/crawlWorker.mjs +212 -0
- package/src/brokenLinksChecker/index.mjs +215 -164
- package/src/brokenLinksChecker/index.test.ts +43 -13
- package/src/changelog/categorizeCommits.test.ts +5 -5
- package/src/changelog/fetchChangelogs.mjs +6 -2
- package/src/changelog/parseCommitLabels.test.ts +5 -5
- package/src/changelog/renderChangelog.mjs +1 -1
- package/src/changelog/types.ts +1 -1
- package/src/cli/cmdListWorkspaces.mjs +9 -2
- package/src/cli/cmdNetlifyIgnore.mjs +4 -88
- package/src/cli/cmdPublish.mjs +51 -14
- package/src/cli/cmdPublishCanary.mjs +139 -107
- package/src/cli/cmdPublishNewPackage.mjs +27 -6
- package/src/cli/cmdVale.mjs +513 -0
- package/src/cli/cmdVale.test.mjs +644 -0
- package/src/cli/index.mjs +2 -0
- package/src/eslint/baseConfig.mjs +45 -20
- package/src/eslint/docsConfig.mjs +2 -1
- package/src/eslint/jsonConfig.mjs +2 -1
- package/src/eslint/mui/config.mjs +20 -1
- package/src/eslint/mui/index.mjs +2 -0
- package/src/eslint/mui/rules/no-guarded-throw.mjs +115 -0
- package/src/eslint/mui/rules/no-guarded-throw.test.mjs +206 -0
- package/src/eslint/mui/rules/nodeEnvUtils.mjs +52 -0
- package/src/eslint/mui/rules/require-dev-wrapper.mjs +25 -40
- package/src/eslint/testConfig.mjs +2 -1
- package/src/estree-typescript.d.ts +1 -1
- package/src/remark/config.mjs +157 -0
- package/src/remark/createLintTester.mjs +19 -0
- package/src/remark/firstBlockHeading.mjs +87 -0
- package/src/remark/firstBlockHeading.test.mjs +107 -0
- package/src/remark/gitDiff.mjs +43 -0
- package/src/remark/gitDiff.test.mjs +45 -0
- package/src/remark/noSpaceInLinks.mjs +42 -0
- package/src/remark/noSpaceInLinks.test.mjs +22 -0
- package/src/remark/straightQuotes.mjs +31 -0
- package/src/remark/straightQuotes.test.mjs +25 -0
- package/src/remark/tableAlignment.mjs +23 -0
- package/src/remark/tableAlignment.test.mjs +28 -0
- package/src/remark/terminalLanguage.mjs +19 -0
- package/src/remark/terminalLanguage.test.mjs +17 -0
- package/src/untyped-plugins.d.ts +11 -11
- package/src/utils/build.test.mjs +546 -575
- package/src/utils/pnpm.mjs +192 -3
- package/src/utils/pnpm.test.mjs +580 -0
- package/src/utils/testUtils.mjs +18 -0
- package/src/utils/typescript.test.mjs +249 -272
- package/vale/.vale.ini +1 -0
- package/vale/styles/MUI/CorrectReferenceAllCases.yml +43 -0
- package/vale/styles/MUI/CorrectRererenceCased.yml +14 -0
- package/vale/styles/MUI/GoogleLatin.yml +11 -0
- package/vale/styles/MUI/MuiBrandName.yml +22 -0
- package/vale/styles/MUI/NoBritish.yml +112 -0
- package/vale/styles/MUI/NoCompanyName.yml +17 -0
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
/* eslint-disable no-console */
|
|
2
2
|
import { execaCommand } from 'execa';
|
|
3
3
|
import timers from 'node:timers/promises';
|
|
4
|
-
import { parse } from 'node-html-parser';
|
|
5
4
|
import * as fs from 'node:fs/promises';
|
|
6
5
|
import * as path from 'node:path';
|
|
6
|
+
import { pathToFileURL } from 'node:url';
|
|
7
7
|
import chalk from 'chalk';
|
|
8
8
|
import { Transform } from 'node:stream';
|
|
9
|
-
import
|
|
10
|
-
import { unified } from 'unified';
|
|
11
|
-
import remarkParse from 'remark-parse';
|
|
12
|
-
import remarkGfm from 'remark-gfm';
|
|
13
|
-
import remarkRehype from 'remark-rehype';
|
|
14
|
-
import rehypeSlug from 'rehype-slug';
|
|
15
|
-
import rehypeStringify from 'rehype-stringify';
|
|
9
|
+
import { Worker } from 'node:worker_threads';
|
|
16
10
|
|
|
17
11
|
const DEFAULT_CONCURRENCY = 4;
|
|
18
12
|
|
|
13
|
+
const crawlWorkerUrl = new URL('./crawlWorker.mjs', import.meta.url);
|
|
14
|
+
|
|
19
15
|
/**
|
|
20
16
|
* Creates a Transform stream that prefixes each line with a given string.
|
|
21
17
|
* Useful for distinguishing server logs from other output.
|
|
@@ -105,6 +101,30 @@ function deserializeLinkStructure(data) {
|
|
|
105
101
|
return linkStructure;
|
|
106
102
|
}
|
|
107
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Input data passed to the crawl worker via workerData.
|
|
106
|
+
* @typedef {Object} CrawlWorkerInput
|
|
107
|
+
* @property {string} pageUrl - The page URL to crawl
|
|
108
|
+
* @property {ResolvedCrawlOptions} options - Fully resolved crawl options
|
|
109
|
+
*/
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Serialized page data returned by the crawl worker (uses arrays instead of Sets for structured clone).
|
|
113
|
+
* @typedef {Object} CrawlWorkerPageData
|
|
114
|
+
* @property {string} url - The normalized page URL
|
|
115
|
+
* @property {number} status - HTTP status code
|
|
116
|
+
* @property {string[]} targets - Array of anchor targets (e.g., '#intro')
|
|
117
|
+
* @property {string} contentType - Content-type of the page
|
|
118
|
+
*/
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Output message posted by the crawl worker.
|
|
122
|
+
* @typedef {Object} CrawlWorkerOutput
|
|
123
|
+
* @property {CrawlWorkerPageData} pageData - Serialized page data
|
|
124
|
+
* @property {Link[]} links - Links discovered on the page
|
|
125
|
+
* @property {{ pageUrl: string, results: import('html-validate').Result[] } | null} htmlValidateResults - HTML validation results, or null if validation was skipped/passed
|
|
126
|
+
*/
|
|
127
|
+
|
|
108
128
|
/**
|
|
109
129
|
* Data about a crawled page including its URL, HTTP status, and available link targets.
|
|
110
130
|
* @typedef {Object} PageData
|
|
@@ -131,77 +151,6 @@ async function writePagesToFile(pages, outPath) {
|
|
|
131
151
|
await fs.writeFile(outPath, JSON.stringify(fileContent, null, 2), 'utf-8');
|
|
132
152
|
}
|
|
133
153
|
|
|
134
|
-
/**
|
|
135
|
-
* Computes the accessible name of an element according to ARIA rules.
|
|
136
|
-
* Polyfill for `node.computedName` available only in Chrome v112+.
|
|
137
|
-
* Checks in order: aria-label, aria-labelledby, label[for], img alt, innerText.
|
|
138
|
-
* @param {import('node-html-parser').HTMLElement | null} elm - Element to compute name for
|
|
139
|
-
* @param {import('node-html-parser').HTMLElement} ownerDocument - Document containing the element
|
|
140
|
-
* @returns {string} The computed accessible name, or empty string if none found
|
|
141
|
-
*/
|
|
142
|
-
function getAccessibleName(elm, ownerDocument) {
|
|
143
|
-
if (!elm) {
|
|
144
|
-
return '';
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
// 1. aria-label
|
|
148
|
-
const ariaLabel = elm.getAttribute('aria-label')?.trim();
|
|
149
|
-
if (ariaLabel) {
|
|
150
|
-
return ariaLabel;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// 2. aria-labelledby
|
|
154
|
-
const labelledby = elm.getAttribute('aria-labelledby');
|
|
155
|
-
if (labelledby) {
|
|
156
|
-
const labels = [];
|
|
157
|
-
for (const id of labelledby.split(/\s+/)) {
|
|
158
|
-
const label = getAccessibleName(ownerDocument.getElementById(id), ownerDocument);
|
|
159
|
-
if (label) {
|
|
160
|
-
labels.push(label);
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
const label = labels.join(' ').trim();
|
|
164
|
-
if (label) {
|
|
165
|
-
return label;
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// 3. <label for="id">
|
|
170
|
-
if (elm.id) {
|
|
171
|
-
const label = ownerDocument.querySelector(`label[for="${elm.id}"]`);
|
|
172
|
-
if (label) {
|
|
173
|
-
return getAccessibleName(label, ownerDocument);
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// 4. <img alt="">
|
|
178
|
-
if (elm.tagName === 'IMG') {
|
|
179
|
-
const alt = elm.getAttribute('alt')?.trim();
|
|
180
|
-
if (alt) {
|
|
181
|
-
return alt;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// 5. Fallback: visible text
|
|
186
|
-
return elm.innerText.trim();
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/**
|
|
190
|
-
* Converts markdown content to HTML using unified pipeline.
|
|
191
|
-
* @param {string} markdown - Raw markdown content
|
|
192
|
-
* @returns {Promise<string>} Converted HTML string
|
|
193
|
-
*/
|
|
194
|
-
async function markdownToHtml(markdown) {
|
|
195
|
-
const result = await unified()
|
|
196
|
-
.use(remarkParse)
|
|
197
|
-
.use(remarkGfm)
|
|
198
|
-
.use(remarkRehype)
|
|
199
|
-
.use(rehypeSlug)
|
|
200
|
-
.use(rehypeStringify)
|
|
201
|
-
.process(markdown);
|
|
202
|
-
return String(result);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
154
|
/**
|
|
206
155
|
* Generic concurrent task queue with configurable concurrency limit.
|
|
207
156
|
* Processes tasks in FIFO order with a maximum number of concurrent workers.
|
|
@@ -402,11 +351,30 @@ function shouldIgnoreLink(link, ignores) {
|
|
|
402
351
|
* @property {number} [concurrency] - Number of concurrent page fetches (defaults to 4)
|
|
403
352
|
* @property {string[]} [seedUrls] - Starting URLs for the crawl (defaults to ['/'])
|
|
404
353
|
* @property {IgnoreRule[]} [ignores] - Rules to ignore broken links. Each rule can have path, href, contentType, and/or has properties. All specified properties must match (AND logic). Within a property, multiple values use OR logic.
|
|
354
|
+
* @property {HtmlValidateOption} [htmlValidate] - Enable HTML validation on crawled pages. `false` (default): disabled. `true`: validate with recommended rules. Object: use as html-validate config — `extends` defaults to `['mui:recommended']` when omitted, so most callers only need to set `rules`. Array: per-path config overrides — every entry whose `path` matches the page URL contributes to the merged config (later entries win on conflicting rule keys); an entry without `path` matches every page (use as a baseline and layer more specific overrides on top). If no entry matches, the page is not validated.
|
|
355
|
+
* @property {boolean} [verbose] - Log extra diagnostics during crawling (e.g. resolved html-validate config per page). Defaults to `false`.
|
|
356
|
+
*/
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Per-page HTML validation override entry.
|
|
360
|
+
* @typedef {Object} HtmlValidateOverride
|
|
361
|
+
* @property {(string | RegExp) | (string | RegExp)[]} [path] - Pattern(s) to match the page URL. Strings use exact match. Omit to match every page.
|
|
362
|
+
* @property {true | import('html-validate').ConfigData} config - html-validate config (or `true` for `mui:recommended`).
|
|
363
|
+
*/
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Public shape of the htmlValidate option.
|
|
367
|
+
* @typedef {boolean | import('html-validate').ConfigData | HtmlValidateOverride[]} HtmlValidateOption
|
|
368
|
+
*/
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Resolved per-page HTML validation entry. Empty array means validation is disabled.
|
|
372
|
+
* @typedef {{ path: (string | RegExp)[] | undefined, config: import('html-validate').ConfigData }} ResolvedHtmlValidateEntry
|
|
405
373
|
*/
|
|
406
374
|
|
|
407
375
|
/**
|
|
408
376
|
* Fully resolved configuration with all optional fields filled with defaults.
|
|
409
|
-
* @typedef {Omit<Required<CrawlOptions>, 'ignores'> & { ignores: NormalizedIgnoreRule[] }} ResolvedCrawlOptions
|
|
377
|
+
* @typedef {Omit<Required<CrawlOptions>, 'ignores' | 'htmlValidate'> & { ignores: NormalizedIgnoreRule[], htmlValidate: ResolvedHtmlValidateEntry[] }} ResolvedCrawlOptions
|
|
410
378
|
*/
|
|
411
379
|
|
|
412
380
|
/**
|
|
@@ -422,6 +390,40 @@ function validateIgnoreRule(rule) {
|
|
|
422
390
|
}
|
|
423
391
|
}
|
|
424
392
|
|
|
393
|
+
/**
|
|
394
|
+
* Normalizes a single config value to a non-null html-validate config object.
|
|
395
|
+
* Defaults `extends` to `['mui:recommended']` when the caller did not provide
|
|
396
|
+
* one, so overrides typically only need to specify the `rules` they want to
|
|
397
|
+
* change. To opt out of the default, pass `extends: []` explicitly.
|
|
398
|
+
* @param {true | import('html-validate').ConfigData} config
|
|
399
|
+
* @returns {import('html-validate').ConfigData}
|
|
400
|
+
*/
|
|
401
|
+
function normalizeHtmlValidateConfig(config) {
|
|
402
|
+
if (config === true) {
|
|
403
|
+
return { extends: ['mui:recommended'] };
|
|
404
|
+
}
|
|
405
|
+
return { extends: ['mui:recommended'], ...config };
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Resolves the htmlValidate option into an array of per-page entries.
|
|
410
|
+
* An empty array means validation is disabled.
|
|
411
|
+
* @param {HtmlValidateOption | undefined} option
|
|
412
|
+
* @returns {ResolvedHtmlValidateEntry[]}
|
|
413
|
+
*/
|
|
414
|
+
function resolveHtmlValidateConfig(option) {
|
|
415
|
+
if (!option) {
|
|
416
|
+
return [];
|
|
417
|
+
}
|
|
418
|
+
if (option === true || !Array.isArray(option)) {
|
|
419
|
+
return [{ path: undefined, config: normalizeHtmlValidateConfig(option) }];
|
|
420
|
+
}
|
|
421
|
+
return option.map((entry) => ({
|
|
422
|
+
path: normalizeToArray(entry.path),
|
|
423
|
+
config: normalizeHtmlValidateConfig(entry.config),
|
|
424
|
+
}));
|
|
425
|
+
}
|
|
426
|
+
|
|
425
427
|
/**
|
|
426
428
|
* Resolves partial crawl options by filling in defaults for all optional fields.
|
|
427
429
|
* @param {CrawlOptions} rawOptions - Partial options from user
|
|
@@ -447,6 +449,8 @@ function resolveOptions(rawOptions) {
|
|
|
447
449
|
concurrency: rawOptions.concurrency ?? DEFAULT_CONCURRENCY,
|
|
448
450
|
seedUrls: rawOptions.seedUrls ?? ['/'],
|
|
449
451
|
ignores: normalizedIgnores,
|
|
452
|
+
htmlValidate: resolveHtmlValidateConfig(rawOptions.htmlValidate),
|
|
453
|
+
verbose: rawOptions.verbose ?? false,
|
|
450
454
|
};
|
|
451
455
|
}
|
|
452
456
|
|
|
@@ -506,25 +510,42 @@ async function resolveKnownTargets(options) {
|
|
|
506
510
|
|
|
507
511
|
/**
|
|
508
512
|
* Represents a broken link or broken link target discovered during crawling.
|
|
509
|
-
* @typedef {Object}
|
|
513
|
+
* @typedef {Object} BrokenLinkIssue
|
|
510
514
|
* @property {'broken-link' | 'broken-target'} type - Type of issue: 'broken-link' for 404 pages, 'broken-target' for missing anchors
|
|
511
515
|
* @property {string} message - Human-readable description of the issue (e.g., 'Target not found', 'Page returned error 404')
|
|
512
516
|
* @property {Link} link - The link object that has the issue
|
|
513
517
|
*/
|
|
514
518
|
|
|
519
|
+
/**
|
|
520
|
+
* Represents an HTML validation issue found on a crawled page.
|
|
521
|
+
* @typedef {Object} HtmlValidateIssue
|
|
522
|
+
* @property {'html-validate'} type - Issue type discriminator
|
|
523
|
+
* @property {string} message - Human-readable description of the issue
|
|
524
|
+
* @property {string} pageUrl - The page URL where the issue was found
|
|
525
|
+
* @property {string} ruleId - The html-validate rule that triggered this issue (e.g., 'no-dup-id')
|
|
526
|
+
* @property {number} severity - Severity level (1 = warning, 2 = error)
|
|
527
|
+
* @property {{ line: number, column: number }} location - Source location of the issue
|
|
528
|
+
* @property {string | null} selector - DOM selector for the element, or null
|
|
529
|
+
*/
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Any issue discovered during crawling.
|
|
533
|
+
* @typedef {BrokenLinkIssue | HtmlValidateIssue} Issue
|
|
534
|
+
*/
|
|
535
|
+
|
|
515
536
|
/**
|
|
516
537
|
* Results from a complete crawl operation.
|
|
517
538
|
* @typedef {Object} CrawlResult
|
|
518
539
|
* @property {Set<Link>} links - All links discovered during the crawl
|
|
519
540
|
* @property {Map<string, PageData>} pages - All pages crawled, keyed by normalized URL
|
|
520
|
-
* @property {Issue[]} issues - All broken links
|
|
541
|
+
* @property {Issue[]} issues - All issues found (broken links, broken targets, and HTML validation issues)
|
|
521
542
|
*/
|
|
522
543
|
|
|
523
544
|
/**
|
|
524
545
|
* Reports broken links to stderr, grouped by source page for better readability.
|
|
525
|
-
* @param {
|
|
546
|
+
* @param {BrokenLinkIssue[]} issuesList - Array of broken link issues to report
|
|
526
547
|
*/
|
|
527
|
-
function
|
|
548
|
+
function reportBrokenLinks(issuesList) {
|
|
528
549
|
if (issuesList.length === 0) {
|
|
529
550
|
return;
|
|
530
551
|
}
|
|
@@ -532,7 +553,7 @@ function reportIssues(issuesList) {
|
|
|
532
553
|
console.error('\nBroken links found:\n');
|
|
533
554
|
|
|
534
555
|
// Group issues by source URL
|
|
535
|
-
/** @type {Map<string,
|
|
556
|
+
/** @type {Map<string, BrokenLinkIssue[]>} */
|
|
536
557
|
const issuesBySource = new Map();
|
|
537
558
|
for (const issue of issuesList) {
|
|
538
559
|
const sourceUrl = issue.link.src ?? '(unknown)';
|
|
@@ -553,6 +574,39 @@ function reportIssues(issuesList) {
|
|
|
553
574
|
}
|
|
554
575
|
}
|
|
555
576
|
|
|
577
|
+
/**
|
|
578
|
+
* Reports HTML validation issues to stderr, grouped by page URL.
|
|
579
|
+
* @param {HtmlValidateIssue[]} htmlIssues - Array of HTML validation issues to report
|
|
580
|
+
*/
|
|
581
|
+
function reportHtmlValidation(htmlIssues) {
|
|
582
|
+
if (htmlIssues.length === 0) {
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
console.error('\nHTML validation issues:\n');
|
|
587
|
+
|
|
588
|
+
// Group by page URL
|
|
589
|
+
/** @type {Map<string, HtmlValidateIssue[]>} */
|
|
590
|
+
const issuesByPage = new Map();
|
|
591
|
+
for (const issue of htmlIssues) {
|
|
592
|
+
const pageIssues = issuesByPage.get(issue.pageUrl) ?? [];
|
|
593
|
+
if (pageIssues.length === 0) {
|
|
594
|
+
issuesByPage.set(issue.pageUrl, pageIssues);
|
|
595
|
+
}
|
|
596
|
+
pageIssues.push(issue);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
for (const [pageUrl, pageIssues] of issuesByPage.entries()) {
|
|
600
|
+
console.error(`Page ${chalk.cyan(pageUrl)}:`);
|
|
601
|
+
for (const issue of pageIssues) {
|
|
602
|
+
const severityLabel = issue.severity === 2 ? chalk.red('error') : chalk.yellow('warning');
|
|
603
|
+
console.error(
|
|
604
|
+
` ${issue.location.line}:${issue.location.column} ${severityLabel} ${issue.message} ${chalk.gray(issue.ruleId)}`,
|
|
605
|
+
);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
556
610
|
/**
|
|
557
611
|
* Crawls a website starting from seed URLs, discovering all internal links and checking for broken links/targets.
|
|
558
612
|
* @param {CrawlOptions} rawOptions - Configuration options for the crawl
|
|
@@ -594,6 +648,35 @@ export async function crawl(rawOptions) {
|
|
|
594
648
|
const crawledPages = new Map();
|
|
595
649
|
/** @type {Set<Link>} */
|
|
596
650
|
const crawledLinks = new Set();
|
|
651
|
+
/** @type {Issue[]} */
|
|
652
|
+
const issues = [];
|
|
653
|
+
/**
|
|
654
|
+
* Spawns a crawl worker for a page URL.
|
|
655
|
+
* @param {string} pageUrl - The page URL to crawl
|
|
656
|
+
* @returns {Promise<{ pageData: PageData, links: Link[], htmlValidateResults: CrawlWorkerOutput['htmlValidateResults'] }>}
|
|
657
|
+
*/
|
|
658
|
+
function crawlInWorker(pageUrl) {
|
|
659
|
+
return new Promise((resolve, reject) => {
|
|
660
|
+
/** @type {CrawlWorkerInput} */
|
|
661
|
+
const input = { pageUrl, options };
|
|
662
|
+
const worker = new Worker(crawlWorkerUrl, {
|
|
663
|
+
workerData: input,
|
|
664
|
+
});
|
|
665
|
+
worker.on('message', (/** @type {CrawlWorkerOutput} */ msg) => {
|
|
666
|
+
resolve({
|
|
667
|
+
pageData: {
|
|
668
|
+
url: msg.pageData.url,
|
|
669
|
+
status: msg.pageData.status,
|
|
670
|
+
targets: new Set(msg.pageData.targets),
|
|
671
|
+
contentType: msg.pageData.contentType,
|
|
672
|
+
},
|
|
673
|
+
links: msg.links,
|
|
674
|
+
htmlValidateResults: msg.htmlValidateResults,
|
|
675
|
+
});
|
|
676
|
+
});
|
|
677
|
+
worker.on('error', (err) => reject(err));
|
|
678
|
+
});
|
|
679
|
+
}
|
|
597
680
|
|
|
598
681
|
const queue = new Queue(async (/** @type {Link} */ link) => {
|
|
599
682
|
crawledLinks.add(link);
|
|
@@ -611,78 +694,30 @@ export async function crawl(rawOptions) {
|
|
|
611
694
|
return;
|
|
612
695
|
}
|
|
613
696
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
/** @type {PageData} */
|
|
633
|
-
const pageData = {
|
|
634
|
-
url: pageUrl,
|
|
635
|
-
status: res.status,
|
|
636
|
-
targets: new Set(),
|
|
637
|
-
contentType: type,
|
|
638
|
-
};
|
|
639
|
-
|
|
640
|
-
if (pageData.status < 200 || pageData.status >= 400) {
|
|
641
|
-
console.warn(chalk.yellow(`Warning: ${pageUrl} returned status ${pageData.status}`));
|
|
642
|
-
return pageData;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
if (type.startsWith('image/')) {
|
|
646
|
-
// Skip images
|
|
647
|
-
return pageData;
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
if (type !== 'text/html' && type !== 'text/markdown') {
|
|
651
|
-
console.warn(chalk.yellow(`Warning: ${pageUrl} returned non-HTML content-type: ${type}`));
|
|
652
|
-
return pageData;
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
const rawContent = await res.text();
|
|
656
|
-
const content = type === 'text/markdown' ? await markdownToHtml(rawContent) : rawContent;
|
|
657
|
-
|
|
658
|
-
const dom = parse(content, { parseNoneClosedTags: true });
|
|
659
|
-
|
|
660
|
-
let ignoredSelector = ':not(*)'; // matches nothing
|
|
661
|
-
if (options.ignoredContent.length > 0) {
|
|
662
|
-
ignoredSelector = Array.from(options.ignoredContent)
|
|
663
|
-
.flatMap((selector) => [selector, `${selector} *`])
|
|
664
|
-
.join(',');
|
|
665
|
-
}
|
|
666
|
-
const linksSelector = `a[href]:not(${ignoredSelector})`;
|
|
667
|
-
|
|
668
|
-
const pageLinks = dom.querySelectorAll(linksSelector).map((a) => ({
|
|
669
|
-
src: pageUrl,
|
|
670
|
-
text: getAccessibleName(a, dom),
|
|
671
|
-
href: a.getAttribute('href') ?? '',
|
|
672
|
-
contentType: type,
|
|
673
|
-
}));
|
|
674
|
-
|
|
675
|
-
for (const target of dom.querySelectorAll('*[id]')) {
|
|
676
|
-
if (!options.ignoredTargets.has(target.id)) {
|
|
677
|
-
pageData.targets.add(`#${target.id}`);
|
|
697
|
+
console.log(`Crawling ${chalk.cyan(pageUrl)}...`);
|
|
698
|
+
const workerPromise = crawlInWorker(pageUrl);
|
|
699
|
+
const pagePromise = workerPromise.then((result) => {
|
|
700
|
+
if (result.htmlValidateResults) {
|
|
701
|
+
for (const validationResult of result.htmlValidateResults.results) {
|
|
702
|
+
for (const msg of validationResult.messages) {
|
|
703
|
+
issues.push({
|
|
704
|
+
type: 'html-validate',
|
|
705
|
+
message: msg.message,
|
|
706
|
+
pageUrl: result.htmlValidateResults.pageUrl,
|
|
707
|
+
ruleId: msg.ruleId,
|
|
708
|
+
severity: msg.severity,
|
|
709
|
+
location: { line: msg.line, column: msg.column },
|
|
710
|
+
selector: msg.selector,
|
|
711
|
+
});
|
|
712
|
+
}
|
|
678
713
|
}
|
|
679
714
|
}
|
|
680
715
|
|
|
681
|
-
for (const
|
|
682
|
-
queue.add(
|
|
716
|
+
for (const discoveredLink of result.links) {
|
|
717
|
+
queue.add(discoveredLink);
|
|
683
718
|
}
|
|
684
719
|
|
|
685
|
-
return pageData;
|
|
720
|
+
return result.pageData;
|
|
686
721
|
});
|
|
687
722
|
|
|
688
723
|
crawledPages.set(pageUrl, pagePromise);
|
|
@@ -711,10 +746,6 @@ export async function crawl(rawOptions) {
|
|
|
711
746
|
await writePagesToFile(results, options.outPath);
|
|
712
747
|
}
|
|
713
748
|
|
|
714
|
-
/** Array to collect all issues found during validation */
|
|
715
|
-
/** @type {Issue[]} */
|
|
716
|
-
const issues = [];
|
|
717
|
-
|
|
718
749
|
/** Count of links ignored due to ignores configuration */
|
|
719
750
|
let ignoredCount = 0;
|
|
720
751
|
|
|
@@ -771,11 +802,24 @@ export async function crawl(rawOptions) {
|
|
|
771
802
|
}
|
|
772
803
|
}
|
|
773
804
|
|
|
774
|
-
|
|
805
|
+
// Split issues by type for reporting
|
|
806
|
+
/** @type {BrokenLinkIssue[]} */
|
|
807
|
+
const brokenLinkIssues = /** @type {BrokenLinkIssue[]} */ (
|
|
808
|
+
issues.filter((issue) => issue.type === 'broken-link' || issue.type === 'broken-target')
|
|
809
|
+
);
|
|
810
|
+
/** @type {HtmlValidateIssue[]} */
|
|
811
|
+
const htmlValidateIssues = /** @type {HtmlValidateIssue[]} */ (
|
|
812
|
+
issues.filter((issue) => issue.type === 'html-validate')
|
|
813
|
+
);
|
|
814
|
+
|
|
815
|
+
reportBrokenLinks(brokenLinkIssues);
|
|
816
|
+
reportHtmlValidation(htmlValidateIssues);
|
|
775
817
|
|
|
776
818
|
// Derive counts from issues
|
|
777
|
-
const brokenLinks =
|
|
778
|
-
const brokenLinkTargets =
|
|
819
|
+
const brokenLinks = brokenLinkIssues.filter((issue) => issue.type === 'broken-link').length;
|
|
820
|
+
const brokenLinkTargets = brokenLinkIssues.filter(
|
|
821
|
+
(issue) => issue.type === 'broken-target',
|
|
822
|
+
).length;
|
|
779
823
|
|
|
780
824
|
const endTime = Date.now();
|
|
781
825
|
const durationSeconds = (endTime - startTime) / 1000;
|
|
@@ -784,14 +828,21 @@ export async function crawl(rawOptions) {
|
|
|
784
828
|
unit: 'second',
|
|
785
829
|
maximumFractionDigits: 2,
|
|
786
830
|
}).format(durationSeconds);
|
|
831
|
+
const fmt = new Intl.NumberFormat('en-US').format;
|
|
787
832
|
console.log(chalk.blue(`\nCrawl completed in ${duration}`));
|
|
788
|
-
console.log(` Total links found: ${chalk.cyan(crawledLinks.size)}`);
|
|
789
|
-
console.log(` Total broken links: ${chalk.cyan(brokenLinks)}`);
|
|
790
|
-
console.log(` Total broken link targets: ${chalk.cyan(brokenLinkTargets)}`);
|
|
791
|
-
console.log(` Total ignored: ${chalk.cyan(ignoredCount)}`);
|
|
833
|
+
console.log(` Total links found: ${chalk.cyan(fmt(crawledLinks.size))}`);
|
|
834
|
+
console.log(` Total broken links: ${chalk.cyan(fmt(brokenLinks))}`);
|
|
835
|
+
console.log(` Total broken link targets: ${chalk.cyan(fmt(brokenLinkTargets))}`);
|
|
836
|
+
console.log(` Total ignored: ${chalk.cyan(fmt(ignoredCount))}`);
|
|
837
|
+
if (options.htmlValidate.length > 0) {
|
|
838
|
+
const pagesWithHtmlIssues = new Set(htmlValidateIssues.map((issue) => issue.pageUrl)).size;
|
|
839
|
+
console.log(
|
|
840
|
+
` HTML validation issues: ${chalk.cyan(fmt(htmlValidateIssues.length))} across ${chalk.cyan(fmt(pagesWithHtmlIssues))} ${pagesWithHtmlIssues === 1 ? 'page' : 'pages'}`,
|
|
841
|
+
);
|
|
842
|
+
}
|
|
792
843
|
|
|
793
844
|
if (options.outPath) {
|
|
794
|
-
console.log(chalk.blue(`Output written to: ${options.outPath}`));
|
|
845
|
+
console.log(chalk.blue(`Output written to: ${pathToFileURL(options.outPath)}`));
|
|
795
846
|
}
|
|
796
847
|
|
|
797
848
|
return { links: crawledLinks, pages: results, issues };
|
|
@@ -2,12 +2,18 @@ import path from 'node:path';
|
|
|
2
2
|
import getPort from 'get-port';
|
|
3
3
|
import { describe, expect, it } from 'vitest';
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
type
|
|
9
|
-
|
|
10
|
-
|
|
5
|
+
import {
|
|
6
|
+
crawl,
|
|
7
|
+
type BrokenLinkIssue,
|
|
8
|
+
type HtmlValidateIssue,
|
|
9
|
+
type Issue,
|
|
10
|
+
type Link,
|
|
11
|
+
// eslint-disable-next-line import/extensions
|
|
12
|
+
} from './index.mjs';
|
|
13
|
+
|
|
14
|
+
type ExpectedBrokenLinkIssue = Omit<Partial<BrokenLinkIssue>, 'link'> & { link?: Partial<Link> };
|
|
15
|
+
|
|
16
|
+
function objectMatchingIssue(expectedIssue: ExpectedBrokenLinkIssue) {
|
|
11
17
|
return expect.objectContaining({
|
|
12
18
|
...expectedIssue,
|
|
13
19
|
...(expectedIssue.link ? { link: expect.objectContaining(expectedIssue.link) } : {}),
|
|
@@ -15,16 +21,16 @@ function objectMatchingIssue(expectedIssue: ExpectedIssue) {
|
|
|
15
21
|
}
|
|
16
22
|
|
|
17
23
|
/**
|
|
18
|
-
* Helper to assert that
|
|
24
|
+
* Helper to assert that a broken link issue with matching properties exists in the issues array
|
|
19
25
|
*/
|
|
20
|
-
function expectIssue(issues: Issue[], expectedIssue:
|
|
26
|
+
function expectIssue(issues: Issue[], expectedIssue: ExpectedBrokenLinkIssue) {
|
|
21
27
|
expect(issues).toEqual(expect.arrayContaining([objectMatchingIssue(expectedIssue)]));
|
|
22
28
|
}
|
|
23
29
|
|
|
24
30
|
/**
|
|
25
|
-
* Helper to assert that no issue with matching properties exists in the issues array
|
|
31
|
+
* Helper to assert that no broken link issue with matching properties exists in the issues array
|
|
26
32
|
*/
|
|
27
|
-
function expectNotIssue(issues: Issue[], notExpectedIssue:
|
|
33
|
+
function expectNotIssue(issues: Issue[], notExpectedIssue: ExpectedBrokenLinkIssue) {
|
|
28
34
|
expect(issues).not.toEqual(expect.arrayContaining([objectMatchingIssue(notExpectedIssue)]));
|
|
29
35
|
}
|
|
30
36
|
|
|
@@ -56,12 +62,24 @@ describe('Broken Links Checker', () => {
|
|
|
56
62
|
// Test href-only rule (matches from any page) - note: matches the actual href value
|
|
57
63
|
{ href: 'broken-relative.html' },
|
|
58
64
|
],
|
|
65
|
+
// Exercise the array form with union semantics: every matching entry
|
|
66
|
+
// contributes to the page's config. The baseline entry (no `path`)
|
|
67
|
+
// turns off `no-raw-characters` everywhere; the path-specific entry
|
|
68
|
+
// turns off `no-dup-id` only on /invalid-html.html. Both rules are
|
|
69
|
+
// silenced on that page because the configs are merged, not replaced.
|
|
70
|
+
htmlValidate: [
|
|
71
|
+
{ config: { rules: { 'no-raw-characters': 'off' } } },
|
|
72
|
+
{ path: '/invalid-html.html', config: { rules: { 'no-dup-id': 'off' } } },
|
|
73
|
+
],
|
|
59
74
|
});
|
|
60
75
|
|
|
61
|
-
expect(result.links).toHaveLength(
|
|
62
|
-
//
|
|
76
|
+
expect(result.links).toHaveLength(67);
|
|
77
|
+
// Broken link issue count: original 11, minus ignored ones (broken-from-markdown via contentType,
|
|
63
78
|
// broken-relative via href-only rule)
|
|
64
|
-
|
|
79
|
+
const brokenLinkIssues = result.issues.filter(
|
|
80
|
+
(issue) => issue.type === 'broken-link' || issue.type === 'broken-target',
|
|
81
|
+
);
|
|
82
|
+
expect(brokenLinkIssues).toHaveLength(9);
|
|
65
83
|
|
|
66
84
|
// Test ignores: these broken links should be ignored (not in issues)
|
|
67
85
|
expectNotIssue(result.issues, {
|
|
@@ -257,5 +275,17 @@ describe('Broken Links Checker', () => {
|
|
|
257
275
|
// Test contentType is stored on pageData
|
|
258
276
|
expect(result.pages.get('/example.md')?.contentType).toBe('text/markdown');
|
|
259
277
|
expect(result.pages.get('/')?.contentType).toBe('text/html');
|
|
278
|
+
|
|
279
|
+
// Test htmlValidate union semantics: invalid-html.html has both a duplicate
|
|
280
|
+
// ID (no-dup-id) and a raw `&` (no-raw-characters). The path-specific
|
|
281
|
+
// entry silences no-dup-id; the baseline entry silences no-raw-characters.
|
|
282
|
+
// Under union semantics both apply, so the page reports zero issues.
|
|
283
|
+
const htmlValidateIssues = result.issues.filter(
|
|
284
|
+
(issue): issue is HtmlValidateIssue => issue.type === 'html-validate',
|
|
285
|
+
);
|
|
286
|
+
const invalidHtmlIssues = htmlValidateIssues.filter(
|
|
287
|
+
(issue) => issue.pageUrl === '/invalid-html.html',
|
|
288
|
+
);
|
|
289
|
+
expect(invalidHtmlIssues).toEqual([]);
|
|
260
290
|
}, 30000);
|
|
261
291
|
});
|
|
@@ -84,15 +84,15 @@ describe('categorizeCommits', () => {
|
|
|
84
84
|
labels: {
|
|
85
85
|
...baseLabelConfig,
|
|
86
86
|
categoryOverrides: {
|
|
87
|
-
'all components': 'General changes',
|
|
87
|
+
'scope: all components': 'General changes',
|
|
88
88
|
},
|
|
89
89
|
},
|
|
90
90
|
};
|
|
91
91
|
|
|
92
92
|
const commits = [
|
|
93
|
-
createCommit(1, ['component: Button', 'all components']),
|
|
93
|
+
createCommit(1, ['component: Button', 'scope: all components']),
|
|
94
94
|
createCommit(2, ['component: Checkbox']),
|
|
95
|
-
createCommit(3, ['component: Button', 'all components']),
|
|
95
|
+
createCommit(3, ['component: Button', 'scope: all components']),
|
|
96
96
|
];
|
|
97
97
|
|
|
98
98
|
const result = categorizeCommits(commits, configWithOverrides);
|
|
@@ -229,13 +229,13 @@ describe('categorizeCommits', () => {
|
|
|
229
229
|
labels: {
|
|
230
230
|
...baseLabelConfig,
|
|
231
231
|
categoryOverrides: {
|
|
232
|
-
'all
|
|
232
|
+
'scope: all components': 'General changes',
|
|
233
233
|
},
|
|
234
234
|
},
|
|
235
235
|
};
|
|
236
236
|
|
|
237
237
|
const commits = [
|
|
238
|
-
createCommit(1, ['scope: data grid', 'all
|
|
238
|
+
createCommit(1, ['scope: data grid', 'scope: all components']),
|
|
239
239
|
createCommit(2, ['scope: charts']),
|
|
240
240
|
];
|
|
241
241
|
|
|
@@ -93,8 +93,12 @@ async function fetchCommitsRest({ octokit, repo, lastRelease, release, org = 'mu
|
|
|
93
93
|
}
|
|
94
94
|
|
|
95
95
|
const promises = results.map(async (commit) => {
|
|
96
|
-
const
|
|
97
|
-
|
|
96
|
+
const matches = [...commit.commit.message.matchAll(/#(\d+)/g)];
|
|
97
|
+
// The PR number is always the last match.
|
|
98
|
+
// Sometimes the PR titles include an issue number like this:
|
|
99
|
+
// [tag] PR title (#00001) (#00002)
|
|
100
|
+
const prMatch = matches.at(-1);
|
|
101
|
+
if (!prMatch) {
|
|
98
102
|
return null;
|
|
99
103
|
}
|
|
100
104
|
|