@mui/internal-code-infra 0.0.4-canary.5 → 0.0.4-canary.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +19 -8
  2. package/build/babel-config.d.mts +11 -3
  3. package/build/brokenLinksChecker/crawlWorker.d.mts +1 -0
  4. package/build/brokenLinksChecker/index.d.mts +45 -2
  5. package/build/changelog/types.d.ts +1 -1
  6. package/build/cli/cmdArgosPush.d.mts +2 -2
  7. package/build/cli/cmdBuild.d.mts +2 -2
  8. package/build/cli/cmdCopyFiles.d.mts +2 -2
  9. package/build/cli/cmdExtractErrorCodes.d.mts +2 -2
  10. package/build/cli/cmdGenerateChangelog.d.mts +2 -2
  11. package/build/cli/cmdGithubAuth.d.mts +2 -2
  12. package/build/cli/cmdListWorkspaces.d.mts +4 -2
  13. package/build/cli/cmdNetlifyIgnore.d.mts +2 -2
  14. package/build/cli/cmdPublish.d.mts +4 -2
  15. package/build/cli/cmdPublishCanary.d.mts +3 -3
  16. package/build/cli/cmdPublishNewPackage.d.mts +4 -2
  17. package/build/cli/cmdSetVersionOverrides.d.mts +2 -2
  18. package/build/cli/cmdVale.d.mts +46 -0
  19. package/build/cli/cmdValidateBuiltTypes.d.mts +2 -2
  20. package/build/eslint/baseConfig.d.mts +3 -1
  21. package/build/eslint/mui/rules/disallow-react-api-in-server-components.d.mts +2 -2
  22. package/build/eslint/mui/rules/docgen-ignore-before-comment.d.mts +2 -2
  23. package/build/eslint/mui/rules/no-guarded-throw.d.mts +31 -0
  24. package/build/eslint/mui/rules/no-presentation-role.d.mts +5 -0
  25. package/build/eslint/mui/rules/no-restricted-resolved-imports.d.mts +2 -2
  26. package/build/eslint/mui/rules/nodeEnvUtils.d.mts +18 -0
  27. package/build/markdownlint/duplicate-h1.d.mts +1 -1
  28. package/build/markdownlint/git-diff.d.mts +1 -1
  29. package/build/markdownlint/index.d.mts +1 -1
  30. package/build/markdownlint/straight-quotes.d.mts +1 -1
  31. package/build/markdownlint/table-alignment.d.mts +1 -1
  32. package/build/markdownlint/terminal-language.d.mts +1 -1
  33. package/build/remark/config.d.mts +43 -0
  34. package/build/remark/createLintTester.d.mts +10 -0
  35. package/build/remark/firstBlockHeading.d.mts +4 -0
  36. package/build/remark/gitDiff.d.mts +2 -0
  37. package/build/remark/noSpaceInLinks.d.mts +2 -0
  38. package/build/remark/straightQuotes.d.mts +2 -0
  39. package/build/remark/tableAlignment.d.mts +2 -0
  40. package/build/remark/terminalLanguage.d.mts +2 -0
  41. package/build/utils/babel.d.mts +1 -1
  42. package/build/utils/build.d.mts +4 -4
  43. package/build/utils/github.d.mts +1 -1
  44. package/build/utils/pnpm.d.mts +68 -2
  45. package/build/utils/testUtils.d.mts +7 -0
  46. package/build/utils/typescript.d.mts +2 -2
  47. package/package.json +62 -35
  48. package/src/babel-config.mjs +9 -3
  49. package/src/brokenLinksChecker/__fixtures__/static-site/index.html +1 -0
  50. package/src/brokenLinksChecker/__fixtures__/static-site/invalid-html.html +15 -0
  51. package/src/brokenLinksChecker/crawlWorker.mjs +217 -0
  52. package/src/brokenLinksChecker/index.mjs +217 -164
  53. package/src/brokenLinksChecker/index.test.ts +50 -13
  54. package/src/changelog/categorizeCommits.test.ts +5 -5
  55. package/src/changelog/fetchChangelogs.mjs +6 -2
  56. package/src/changelog/parseCommitLabels.test.ts +5 -5
  57. package/src/changelog/renderChangelog.mjs +1 -1
  58. package/src/changelog/types.ts +1 -1
  59. package/src/cli/cmdListWorkspaces.mjs +9 -2
  60. package/src/cli/cmdNetlifyIgnore.mjs +4 -88
  61. package/src/cli/cmdPublish.mjs +51 -14
  62. package/src/cli/cmdPublishCanary.mjs +128 -132
  63. package/src/cli/cmdPublishNewPackage.mjs +27 -6
  64. package/src/cli/cmdVale.mjs +513 -0
  65. package/src/cli/cmdVale.test.mjs +644 -0
  66. package/src/cli/index.mjs +2 -0
  67. package/src/cli/packageJson.d.ts +1 -1
  68. package/src/eslint/baseConfig.mjs +45 -20
  69. package/src/eslint/docsConfig.mjs +2 -1
  70. package/src/eslint/jsonConfig.mjs +2 -1
  71. package/src/eslint/mui/config.mjs +21 -1
  72. package/src/eslint/mui/index.mjs +4 -0
  73. package/src/eslint/mui/rules/no-guarded-throw.mjs +115 -0
  74. package/src/eslint/mui/rules/no-guarded-throw.test.mjs +206 -0
  75. package/src/eslint/mui/rules/no-presentation-role.mjs +60 -0
  76. package/src/eslint/mui/rules/no-presentation-role.test.mjs +33 -0
  77. package/src/eslint/mui/rules/nodeEnvUtils.mjs +52 -0
  78. package/src/eslint/mui/rules/require-dev-wrapper.mjs +25 -40
  79. package/src/eslint/testConfig.mjs +2 -1
  80. package/src/estree-typescript.d.ts +1 -1
  81. package/src/remark/config.mjs +157 -0
  82. package/src/remark/createLintTester.mjs +19 -0
  83. package/src/remark/firstBlockHeading.mjs +87 -0
  84. package/src/remark/firstBlockHeading.test.mjs +107 -0
  85. package/src/remark/gitDiff.mjs +43 -0
  86. package/src/remark/gitDiff.test.mjs +45 -0
  87. package/src/remark/noSpaceInLinks.mjs +42 -0
  88. package/src/remark/noSpaceInLinks.test.mjs +22 -0
  89. package/src/remark/straightQuotes.mjs +31 -0
  90. package/src/remark/straightQuotes.test.mjs +25 -0
  91. package/src/remark/tableAlignment.mjs +23 -0
  92. package/src/remark/tableAlignment.test.mjs +28 -0
  93. package/src/remark/terminalLanguage.mjs +19 -0
  94. package/src/remark/terminalLanguage.test.mjs +17 -0
  95. package/src/untyped-plugins.d.ts +11 -11
  96. package/src/utils/build.mjs +18 -1
  97. package/src/utils/build.test.mjs +585 -575
  98. package/src/utils/pnpm.mjs +192 -3
  99. package/src/utils/pnpm.test.mjs +580 -0
  100. package/src/utils/testUtils.mjs +18 -0
  101. package/src/utils/typescript.test.mjs +249 -272
  102. package/vale/.vale.ini +1 -0
  103. package/vale/styles/MUI/CorrectReferenceAllCases.yml +43 -0
  104. package/vale/styles/MUI/CorrectRererenceCased.yml +14 -0
  105. package/vale/styles/MUI/GoogleLatin.yml +11 -0
  106. package/vale/styles/MUI/MuiBrandName.yml +22 -0
  107. package/vale/styles/MUI/NoBritish.yml +112 -0
  108. package/vale/styles/MUI/NoCompanyName.yml +17 -0
@@ -1,21 +1,17 @@
1
1
  /* eslint-disable no-console */
2
2
  import { execaCommand } from 'execa';
3
3
  import timers from 'node:timers/promises';
4
- import { parse } from 'node-html-parser';
5
4
  import * as fs from 'node:fs/promises';
6
5
  import * as path from 'node:path';
6
+ import { pathToFileURL } from 'node:url';
7
7
  import chalk from 'chalk';
8
8
  import { Transform } from 'node:stream';
9
- import contentType from 'content-type';
10
- import { unified } from 'unified';
11
- import remarkParse from 'remark-parse';
12
- import remarkGfm from 'remark-gfm';
13
- import remarkRehype from 'remark-rehype';
14
- import rehypeSlug from 'rehype-slug';
15
- import rehypeStringify from 'rehype-stringify';
9
+ import { Worker } from 'node:worker_threads';
16
10
 
17
11
  const DEFAULT_CONCURRENCY = 4;
18
12
 
13
+ const crawlWorkerUrl = new URL('./crawlWorker.mjs', import.meta.url);
14
+
19
15
  /**
20
16
  * Creates a Transform stream that prefixes each line with a given string.
21
17
  * Useful for distinguishing server logs from other output.
@@ -105,6 +101,30 @@ function deserializeLinkStructure(data) {
105
101
  return linkStructure;
106
102
  }
107
103
 
104
+ /**
105
+ * Input data passed to the crawl worker via workerData.
106
+ * @typedef {Object} CrawlWorkerInput
107
+ * @property {string} pageUrl - The page URL to crawl
108
+ * @property {ResolvedCrawlOptions} options - Fully resolved crawl options
109
+ */
110
+
111
+ /**
112
+ * Serialized page data returned by the crawl worker (uses arrays instead of Sets for structured clone).
113
+ * @typedef {Object} CrawlWorkerPageData
114
+ * @property {string} url - The normalized page URL
115
+ * @property {number} status - HTTP status code
116
+ * @property {string[]} targets - Array of anchor targets (e.g., '#intro')
117
+ * @property {string} contentType - Content-type of the page
118
+ */
119
+
120
+ /**
121
+ * Output message posted by the crawl worker.
122
+ * @typedef {Object} CrawlWorkerOutput
123
+ * @property {CrawlWorkerPageData} pageData - Serialized page data
124
+ * @property {Link[]} links - Links discovered on the page
125
+ * @property {{ pageUrl: string, results: import('html-validate').Result[] } | null} htmlValidateResults - HTML validation results, or null if validation was skipped/passed
126
+ */
127
+
108
128
  /**
109
129
  * Data about a crawled page including its URL, HTTP status, and available link targets.
110
130
  * @typedef {Object} PageData
@@ -131,77 +151,6 @@ async function writePagesToFile(pages, outPath) {
131
151
  await fs.writeFile(outPath, JSON.stringify(fileContent, null, 2), 'utf-8');
132
152
  }
133
153
 
134
- /**
135
- * Computes the accessible name of an element according to ARIA rules.
136
- * Polyfill for `node.computedName` available only in Chrome v112+.
137
- * Checks in order: aria-label, aria-labelledby, label[for], img alt, innerText.
138
- * @param {import('node-html-parser').HTMLElement | null} elm - Element to compute name for
139
- * @param {import('node-html-parser').HTMLElement} ownerDocument - Document containing the element
140
- * @returns {string} The computed accessible name, or empty string if none found
141
- */
142
- function getAccessibleName(elm, ownerDocument) {
143
- if (!elm) {
144
- return '';
145
- }
146
-
147
- // 1. aria-label
148
- const ariaLabel = elm.getAttribute('aria-label')?.trim();
149
- if (ariaLabel) {
150
- return ariaLabel;
151
- }
152
-
153
- // 2. aria-labelledby
154
- const labelledby = elm.getAttribute('aria-labelledby');
155
- if (labelledby) {
156
- const labels = [];
157
- for (const id of labelledby.split(/\s+/)) {
158
- const label = getAccessibleName(ownerDocument.getElementById(id), ownerDocument);
159
- if (label) {
160
- labels.push(label);
161
- }
162
- }
163
- const label = labels.join(' ').trim();
164
- if (label) {
165
- return label;
166
- }
167
- }
168
-
169
- // 3. <label for="id">
170
- if (elm.id) {
171
- const label = ownerDocument.querySelector(`label[for="${elm.id}"]`);
172
- if (label) {
173
- return getAccessibleName(label, ownerDocument);
174
- }
175
- }
176
-
177
- // 4. <img alt="">
178
- if (elm.tagName === 'IMG') {
179
- const alt = elm.getAttribute('alt')?.trim();
180
- if (alt) {
181
- return alt;
182
- }
183
- }
184
-
185
- // 5. Fallback: visible text
186
- return elm.innerText.trim();
187
- }
188
-
189
- /**
190
- * Converts markdown content to HTML using unified pipeline.
191
- * @param {string} markdown - Raw markdown content
192
- * @returns {Promise<string>} Converted HTML string
193
- */
194
- async function markdownToHtml(markdown) {
195
- const result = await unified()
196
- .use(remarkParse)
197
- .use(remarkGfm)
198
- .use(remarkRehype)
199
- .use(rehypeSlug)
200
- .use(rehypeStringify)
201
- .process(markdown);
202
- return String(result);
203
- }
204
-
205
154
  /**
206
155
  * Generic concurrent task queue with configurable concurrency limit.
207
156
  * Processes tasks in FIFO order with a maximum number of concurrent workers.
@@ -402,11 +351,30 @@ function shouldIgnoreLink(link, ignores) {
402
351
  * @property {number} [concurrency] - Number of concurrent page fetches (defaults to 4)
403
352
  * @property {string[]} [seedUrls] - Starting URLs for the crawl (defaults to ['/'])
404
353
  * @property {IgnoreRule[]} [ignores] - Rules to ignore broken links. Each rule can have path, href, contentType, and/or has properties. All specified properties must match (AND logic). Within a property, multiple values use OR logic.
354
+ * @property {HtmlValidateOption} [htmlValidate] - Enable HTML validation on crawled pages. `false` (default): disabled. `true`: validate with recommended rules. Object: use as html-validate config — `mui:recommended` is always applied as the baseline, so most callers only need to set `rules`. Array: per-path config overrides — `mui:recommended` is applied once as the baseline and every entry whose `path` matches the page URL is layered on top; later matching entries win on conflicting rule keys. If an entry omits `extends`, it behaves like a rule patch and typically only changes the rules it names. If an entry includes `extends` (for example, re-extending `mui:recommended`), it can re-introduce or reset baseline presets rather than acting as a pure patch. An entry without `path` matches every page. If no entry matches, the page is not validated.
355
+ * @property {boolean} [verbose] - Log extra diagnostics during crawling (e.g. resolved html-validate config per page). Defaults to `false`.
356
+ */
357
+
358
+ /**
359
+ * Per-page HTML validation override entry.
360
+ * @typedef {Object} HtmlValidateOverride
361
+ * @property {(string | RegExp) | (string | RegExp)[]} [path] - Pattern(s) to match the page URL. Strings use exact match. Omit to match every page.
362
+ * @property {true | import('html-validate').ConfigData} config - html-validate config (or `true` for `mui:recommended`).
363
+ */
364
+
365
+ /**
366
+ * Public shape of the htmlValidate option.
367
+ * @typedef {boolean | import('html-validate').ConfigData | HtmlValidateOverride[]} HtmlValidateOption
368
+ */
369
+
370
+ /**
371
+ * Resolved per-page HTML validation entry. Empty array means validation is disabled.
372
+ * @typedef {{ path: (string | RegExp)[] | undefined, config: import('html-validate').ConfigData }} ResolvedHtmlValidateEntry
405
373
  */
406
374
 
407
375
  /**
408
376
  * Fully resolved configuration with all optional fields filled with defaults.
409
- * @typedef {Omit<Required<CrawlOptions>, 'ignores'> & { ignores: NormalizedIgnoreRule[] }} ResolvedCrawlOptions
377
+ * @typedef {Omit<Required<CrawlOptions>, 'ignores' | 'htmlValidate'> & { ignores: NormalizedIgnoreRule[], htmlValidate: ResolvedHtmlValidateEntry[] }} ResolvedCrawlOptions
410
378
  */
411
379
 
412
380
  /**
@@ -422,6 +390,42 @@ function validateIgnoreRule(rule) {
422
390
  }
423
391
  }
424
392
 
393
+ /**
394
+ * Normalizes a single config value to a non-null html-validate config object.
395
+ * Each config is registered as a pure rule patch; `mui:recommended` is pulled
396
+ * in once by the page's root config (ahead of every patch), so callers only
397
+ * need to specify the `rules` they want to change and never restate the
398
+ * recommended ruleset. `true` means "recommended only" (an empty patch). An
399
+ * explicit `extends` is still honored if a caller wants extra presets.
400
+ * @param {true | import('html-validate').ConfigData} config
401
+ * @returns {import('html-validate').ConfigData}
402
+ */
403
+ function normalizeHtmlValidateConfig(config) {
404
+ if (config === true) {
405
+ return {};
406
+ }
407
+ return config;
408
+ }
409
+
410
+ /**
411
+ * Resolves the htmlValidate option into an array of per-page entries.
412
+ * An empty array means validation is disabled.
413
+ * @param {HtmlValidateOption | undefined} option
414
+ * @returns {ResolvedHtmlValidateEntry[]}
415
+ */
416
+ function resolveHtmlValidateConfig(option) {
417
+ if (!option) {
418
+ return [];
419
+ }
420
+ if (option === true || !Array.isArray(option)) {
421
+ return [{ path: undefined, config: normalizeHtmlValidateConfig(option) }];
422
+ }
423
+ return option.map((entry) => ({
424
+ path: normalizeToArray(entry.path),
425
+ config: normalizeHtmlValidateConfig(entry.config),
426
+ }));
427
+ }
428
+
425
429
  /**
426
430
  * Resolves partial crawl options by filling in defaults for all optional fields.
427
431
  * @param {CrawlOptions} rawOptions - Partial options from user
@@ -447,6 +451,8 @@ function resolveOptions(rawOptions) {
447
451
  concurrency: rawOptions.concurrency ?? DEFAULT_CONCURRENCY,
448
452
  seedUrls: rawOptions.seedUrls ?? ['/'],
449
453
  ignores: normalizedIgnores,
454
+ htmlValidate: resolveHtmlValidateConfig(rawOptions.htmlValidate),
455
+ verbose: rawOptions.verbose ?? false,
450
456
  };
451
457
  }
452
458
 
@@ -506,25 +512,42 @@ async function resolveKnownTargets(options) {
506
512
 
507
513
  /**
508
514
  * Represents a broken link or broken link target discovered during crawling.
509
- * @typedef {Object} Issue
515
+ * @typedef {Object} BrokenLinkIssue
510
516
  * @property {'broken-link' | 'broken-target'} type - Type of issue: 'broken-link' for 404 pages, 'broken-target' for missing anchors
511
517
  * @property {string} message - Human-readable description of the issue (e.g., 'Target not found', 'Page returned error 404')
512
518
  * @property {Link} link - The link object that has the issue
513
519
  */
514
520
 
521
+ /**
522
+ * Represents an HTML validation issue found on a crawled page.
523
+ * @typedef {Object} HtmlValidateIssue
524
+ * @property {'html-validate'} type - Issue type discriminator
525
+ * @property {string} message - Human-readable description of the issue
526
+ * @property {string} pageUrl - The page URL where the issue was found
527
+ * @property {string} ruleId - The html-validate rule that triggered this issue (e.g., 'no-dup-id')
528
+ * @property {number} severity - Severity level (1 = warning, 2 = error)
529
+ * @property {{ line: number, column: number }} location - Source location of the issue
530
+ * @property {string | null} selector - DOM selector for the element, or null
531
+ */
532
+
533
+ /**
534
+ * Any issue discovered during crawling.
535
+ * @typedef {BrokenLinkIssue | HtmlValidateIssue} Issue
536
+ */
537
+
515
538
  /**
516
539
  * Results from a complete crawl operation.
517
540
  * @typedef {Object} CrawlResult
518
541
  * @property {Set<Link>} links - All links discovered during the crawl
519
542
  * @property {Map<string, PageData>} pages - All pages crawled, keyed by normalized URL
520
- * @property {Issue[]} issues - All broken links and broken targets found
543
+ * @property {Issue[]} issues - All issues found (broken links, broken targets, and HTML validation issues)
521
544
  */
522
545
 
523
546
  /**
524
547
  * Reports broken links to stderr, grouped by source page for better readability.
525
- * @param {Issue[]} issuesList - Array of issues to report
548
+ * @param {BrokenLinkIssue[]} issuesList - Array of broken link issues to report
526
549
  */
527
- function reportIssues(issuesList) {
550
+ function reportBrokenLinks(issuesList) {
528
551
  if (issuesList.length === 0) {
529
552
  return;
530
553
  }
@@ -532,7 +555,7 @@ function reportIssues(issuesList) {
532
555
  console.error('\nBroken links found:\n');
533
556
 
534
557
  // Group issues by source URL
535
- /** @type {Map<string, Issue[]>} */
558
+ /** @type {Map<string, BrokenLinkIssue[]>} */
536
559
  const issuesBySource = new Map();
537
560
  for (const issue of issuesList) {
538
561
  const sourceUrl = issue.link.src ?? '(unknown)';
@@ -553,6 +576,39 @@ function reportIssues(issuesList) {
553
576
  }
554
577
  }
555
578
 
579
+ /**
580
+ * Reports HTML validation issues to stderr, grouped by page URL.
581
+ * @param {HtmlValidateIssue[]} htmlIssues - Array of HTML validation issues to report
582
+ */
583
+ function reportHtmlValidation(htmlIssues) {
584
+ if (htmlIssues.length === 0) {
585
+ return;
586
+ }
587
+
588
+ console.error('\nHTML validation issues:\n');
589
+
590
+ // Group by page URL
591
+ /** @type {Map<string, HtmlValidateIssue[]>} */
592
+ const issuesByPage = new Map();
593
+ for (const issue of htmlIssues) {
594
+ const pageIssues = issuesByPage.get(issue.pageUrl) ?? [];
595
+ if (pageIssues.length === 0) {
596
+ issuesByPage.set(issue.pageUrl, pageIssues);
597
+ }
598
+ pageIssues.push(issue);
599
+ }
600
+
601
+ for (const [pageUrl, pageIssues] of issuesByPage.entries()) {
602
+ console.error(`Page ${chalk.cyan(pageUrl)}:`);
603
+ for (const issue of pageIssues) {
604
+ const severityLabel = issue.severity === 2 ? chalk.red('error') : chalk.yellow('warning');
605
+ console.error(
606
+ ` ${issue.location.line}:${issue.location.column} ${severityLabel} ${issue.message} ${chalk.gray(issue.ruleId)}`,
607
+ );
608
+ }
609
+ }
610
+ }
611
+
556
612
  /**
557
613
  * Crawls a website starting from seed URLs, discovering all internal links and checking for broken links/targets.
558
614
  * @param {CrawlOptions} rawOptions - Configuration options for the crawl
@@ -594,6 +650,35 @@ export async function crawl(rawOptions) {
594
650
  const crawledPages = new Map();
595
651
  /** @type {Set<Link>} */
596
652
  const crawledLinks = new Set();
653
+ /** @type {Issue[]} */
654
+ const issues = [];
655
+ /**
656
+ * Spawns a crawl worker for a page URL.
657
+ * @param {string} pageUrl - The page URL to crawl
658
+ * @returns {Promise<{ pageData: PageData, links: Link[], htmlValidateResults: CrawlWorkerOutput['htmlValidateResults'] }>}
659
+ */
660
+ function crawlInWorker(pageUrl) {
661
+ return new Promise((resolve, reject) => {
662
+ /** @type {CrawlWorkerInput} */
663
+ const input = { pageUrl, options };
664
+ const worker = new Worker(crawlWorkerUrl, {
665
+ workerData: input,
666
+ });
667
+ worker.on('message', (/** @type {CrawlWorkerOutput} */ msg) => {
668
+ resolve({
669
+ pageData: {
670
+ url: msg.pageData.url,
671
+ status: msg.pageData.status,
672
+ targets: new Set(msg.pageData.targets),
673
+ contentType: msg.pageData.contentType,
674
+ },
675
+ links: msg.links,
676
+ htmlValidateResults: msg.htmlValidateResults,
677
+ });
678
+ });
679
+ worker.on('error', (err) => reject(err));
680
+ });
681
+ }
597
682
 
598
683
  const queue = new Queue(async (/** @type {Link} */ link) => {
599
684
  crawledLinks.add(link);
@@ -611,78 +696,30 @@ export async function crawl(rawOptions) {
611
696
  return;
612
697
  }
613
698
 
614
- const pagePromise = Promise.resolve().then(async () => {
615
- console.log(`Crawling ${chalk.cyan(pageUrl)}...`);
616
- const res = await fetch(new URL(pageUrl, options.host));
617
-
618
- const contentTypeHeader = res.headers.get('content-type');
619
- let type = 'text/html';
620
-
621
- if (contentTypeHeader) {
622
- try {
623
- const parsed = contentType.parse(contentTypeHeader);
624
- type = parsed.type;
625
- } catch {
626
- console.warn(
627
- chalk.yellow(`Warning: ${pageUrl} returned invalid content-type: ${contentTypeHeader}`),
628
- );
629
- }
630
- }
631
-
632
- /** @type {PageData} */
633
- const pageData = {
634
- url: pageUrl,
635
- status: res.status,
636
- targets: new Set(),
637
- contentType: type,
638
- };
639
-
640
- if (pageData.status < 200 || pageData.status >= 400) {
641
- console.warn(chalk.yellow(`Warning: ${pageUrl} returned status ${pageData.status}`));
642
- return pageData;
643
- }
644
-
645
- if (type.startsWith('image/')) {
646
- // Skip images
647
- return pageData;
648
- }
649
-
650
- if (type !== 'text/html' && type !== 'text/markdown') {
651
- console.warn(chalk.yellow(`Warning: ${pageUrl} returned non-HTML content-type: ${type}`));
652
- return pageData;
653
- }
654
-
655
- const rawContent = await res.text();
656
- const content = type === 'text/markdown' ? await markdownToHtml(rawContent) : rawContent;
657
-
658
- const dom = parse(content, { parseNoneClosedTags: true });
659
-
660
- let ignoredSelector = ':not(*)'; // matches nothing
661
- if (options.ignoredContent.length > 0) {
662
- ignoredSelector = Array.from(options.ignoredContent)
663
- .flatMap((selector) => [selector, `${selector} *`])
664
- .join(',');
665
- }
666
- const linksSelector = `a[href]:not(${ignoredSelector})`;
667
-
668
- const pageLinks = dom.querySelectorAll(linksSelector).map((a) => ({
669
- src: pageUrl,
670
- text: getAccessibleName(a, dom),
671
- href: a.getAttribute('href') ?? '',
672
- contentType: type,
673
- }));
674
-
675
- for (const target of dom.querySelectorAll('*[id]')) {
676
- if (!options.ignoredTargets.has(target.id)) {
677
- pageData.targets.add(`#${target.id}`);
699
+ console.log(`Crawling ${chalk.cyan(pageUrl)}...`);
700
+ const workerPromise = crawlInWorker(pageUrl);
701
+ const pagePromise = workerPromise.then((result) => {
702
+ if (result.htmlValidateResults) {
703
+ for (const validationResult of result.htmlValidateResults.results) {
704
+ for (const msg of validationResult.messages) {
705
+ issues.push({
706
+ type: 'html-validate',
707
+ message: msg.message,
708
+ pageUrl: result.htmlValidateResults.pageUrl,
709
+ ruleId: msg.ruleId,
710
+ severity: msg.severity,
711
+ location: { line: msg.line, column: msg.column },
712
+ selector: msg.selector,
713
+ });
714
+ }
678
715
  }
679
716
  }
680
717
 
681
- for (const pageLink of pageLinks) {
682
- queue.add(pageLink);
718
+ for (const discoveredLink of result.links) {
719
+ queue.add(discoveredLink);
683
720
  }
684
721
 
685
- return pageData;
722
+ return result.pageData;
686
723
  });
687
724
 
688
725
  crawledPages.set(pageUrl, pagePromise);
@@ -711,10 +748,6 @@ export async function crawl(rawOptions) {
711
748
  await writePagesToFile(results, options.outPath);
712
749
  }
713
750
 
714
- /** Array to collect all issues found during validation */
715
- /** @type {Issue[]} */
716
- const issues = [];
717
-
718
751
  /** Count of links ignored due to ignores configuration */
719
752
  let ignoredCount = 0;
720
753
 
@@ -771,11 +804,24 @@ export async function crawl(rawOptions) {
771
804
  }
772
805
  }
773
806
 
774
- reportIssues(issues);
807
+ // Split issues by type for reporting
808
+ /** @type {BrokenLinkIssue[]} */
809
+ const brokenLinkIssues = /** @type {BrokenLinkIssue[]} */ (
810
+ issues.filter((issue) => issue.type === 'broken-link' || issue.type === 'broken-target')
811
+ );
812
+ /** @type {HtmlValidateIssue[]} */
813
+ const htmlValidateIssues = /** @type {HtmlValidateIssue[]} */ (
814
+ issues.filter((issue) => issue.type === 'html-validate')
815
+ );
816
+
817
+ reportBrokenLinks(brokenLinkIssues);
818
+ reportHtmlValidation(htmlValidateIssues);
775
819
 
776
820
  // Derive counts from issues
777
- const brokenLinks = issues.filter((issue) => issue.type === 'broken-link').length;
778
- const brokenLinkTargets = issues.filter((issue) => issue.type === 'broken-target').length;
821
+ const brokenLinks = brokenLinkIssues.filter((issue) => issue.type === 'broken-link').length;
822
+ const brokenLinkTargets = brokenLinkIssues.filter(
823
+ (issue) => issue.type === 'broken-target',
824
+ ).length;
779
825
 
780
826
  const endTime = Date.now();
781
827
  const durationSeconds = (endTime - startTime) / 1000;
@@ -784,14 +830,21 @@ export async function crawl(rawOptions) {
784
830
  unit: 'second',
785
831
  maximumFractionDigits: 2,
786
832
  }).format(durationSeconds);
833
+ const fmt = new Intl.NumberFormat('en-US').format;
787
834
  console.log(chalk.blue(`\nCrawl completed in ${duration}`));
788
- console.log(` Total links found: ${chalk.cyan(crawledLinks.size)}`);
789
- console.log(` Total broken links: ${chalk.cyan(brokenLinks)}`);
790
- console.log(` Total broken link targets: ${chalk.cyan(brokenLinkTargets)}`);
791
- console.log(` Total ignored: ${chalk.cyan(ignoredCount)}`);
835
+ console.log(` Total links found: ${chalk.cyan(fmt(crawledLinks.size))}`);
836
+ console.log(` Total broken links: ${chalk.cyan(fmt(brokenLinks))}`);
837
+ console.log(` Total broken link targets: ${chalk.cyan(fmt(brokenLinkTargets))}`);
838
+ console.log(` Total ignored: ${chalk.cyan(fmt(ignoredCount))}`);
839
+ if (options.htmlValidate.length > 0) {
840
+ const pagesWithHtmlIssues = new Set(htmlValidateIssues.map((issue) => issue.pageUrl)).size;
841
+ console.log(
842
+ ` HTML validation issues: ${chalk.cyan(fmt(htmlValidateIssues.length))} across ${chalk.cyan(fmt(pagesWithHtmlIssues))} ${pagesWithHtmlIssues === 1 ? 'page' : 'pages'}`,
843
+ );
844
+ }
792
845
 
793
846
  if (options.outPath) {
794
- console.log(chalk.blue(`Output written to: ${options.outPath}`));
847
+ console.log(chalk.blue(`Output written to: ${pathToFileURL(options.outPath)}`));
795
848
  }
796
849
 
797
850
  return { links: crawledLinks, pages: results, issues };
@@ -2,12 +2,18 @@ import path from 'node:path';
2
2
  import getPort from 'get-port';
3
3
  import { describe, expect, it } from 'vitest';
4
4
 
5
- // eslint-disable-next-line import/extensions
6
- import { crawl, Issue, Link } from './index.mjs';
7
-
8
- type ExpectedIssue = Omit<Partial<Issue>, 'link'> & { link?: Partial<Link> };
9
-
10
- function objectMatchingIssue(expectedIssue: ExpectedIssue) {
5
+ import {
6
+ crawl,
7
+ type BrokenLinkIssue,
8
+ type HtmlValidateIssue,
9
+ type Issue,
10
+ type Link,
11
+ // eslint-disable-next-line import/extensions
12
+ } from './index.mjs';
13
+
14
+ type ExpectedBrokenLinkIssue = Omit<Partial<BrokenLinkIssue>, 'link'> & { link?: Partial<Link> };
15
+
16
+ function objectMatchingIssue(expectedIssue: ExpectedBrokenLinkIssue) {
11
17
  return expect.objectContaining({
12
18
  ...expectedIssue,
13
19
  ...(expectedIssue.link ? { link: expect.objectContaining(expectedIssue.link) } : {}),
@@ -15,16 +21,16 @@ function objectMatchingIssue(expectedIssue: ExpectedIssue) {
15
21
  }
16
22
 
17
23
  /**
18
- * Helper to assert that an issue with matching properties exists in the issues array
24
+ * Helper to assert that a broken link issue with matching properties exists in the issues array
19
25
  */
20
- function expectIssue(issues: Issue[], expectedIssue: ExpectedIssue) {
26
+ function expectIssue(issues: Issue[], expectedIssue: ExpectedBrokenLinkIssue) {
21
27
  expect(issues).toEqual(expect.arrayContaining([objectMatchingIssue(expectedIssue)]));
22
28
  }
23
29
 
24
30
  /**
25
- * Helper to assert that no issue with matching properties exists in the issues array
31
+ * Helper to assert that no broken link issue with matching properties exists in the issues array
26
32
  */
27
- function expectNotIssue(issues: Issue[], notExpectedIssue: ExpectedIssue) {
33
+ function expectNotIssue(issues: Issue[], notExpectedIssue: ExpectedBrokenLinkIssue) {
28
34
  expect(issues).not.toEqual(expect.arrayContaining([objectMatchingIssue(notExpectedIssue)]));
29
35
  }
30
36
 
@@ -56,12 +62,30 @@ describe('Broken Links Checker', () => {
56
62
  // Test href-only rule (matches from any page) - note: matches the actual href value
57
63
  { href: 'broken-relative.html' },
58
64
  ],
65
+ // Exercise the array form with union semantics: every matching entry
66
+ // contributes to the page's config. The baseline entry (no `path`)
67
+ // turns off `no-dup-id` everywhere; the path-specific entry turns off
68
+ // `no-raw-characters` only on /invalid-html.html. Both rules are
69
+ // silenced on that page because the configs are merged, not replaced.
70
+ //
71
+ // This also guards against the path-specific entry clobbering the
72
+ // baseline: the path entry only names `no-raw-characters`, so it must
73
+ // not re-introduce the recommended ruleset and re-enable the
74
+ // `no-dup-id` that the baseline silenced (which /invalid-html.html
75
+ // violates). If it did, that page would report `no-dup-id` below.
76
+ htmlValidate: [
77
+ { config: { rules: { 'no-dup-id': 'off' } } },
78
+ { path: '/invalid-html.html', config: { rules: { 'no-raw-characters': 'off' } } },
79
+ ],
59
80
  });
60
81
 
61
- expect(result.links).toHaveLength(66);
62
- // Issue count: original 11, minus ignored ones (broken-from-markdown via contentType,
82
+ expect(result.links).toHaveLength(67);
83
+ // Broken link issue count: original 11, minus ignored ones (broken-from-markdown via contentType,
63
84
  // broken-relative via href-only rule)
64
- expect(result.issues).toHaveLength(9);
85
+ const brokenLinkIssues = result.issues.filter(
86
+ (issue) => issue.type === 'broken-link' || issue.type === 'broken-target',
87
+ );
88
+ expect(brokenLinkIssues).toHaveLength(9);
65
89
 
66
90
  // Test ignores: these broken links should be ignored (not in issues)
67
91
  expectNotIssue(result.issues, {
@@ -257,5 +281,18 @@ describe('Broken Links Checker', () => {
257
281
  // Test contentType is stored on pageData
258
282
  expect(result.pages.get('/example.md')?.contentType).toBe('text/markdown');
259
283
  expect(result.pages.get('/')?.contentType).toBe('text/html');
284
+
285
+ // Test htmlValidate union semantics: invalid-html.html has both a duplicate
286
+ // ID (no-dup-id) and a raw `&` (no-raw-characters). The baseline entry
287
+ // silences no-dup-id; the path-specific entry silences no-raw-characters.
288
+ // Under union semantics both apply, so the page reports zero issues — and
289
+ // the path-specific entry must not clobber the baseline's no-dup-id.
290
+ const htmlValidateIssues = result.issues.filter(
291
+ (issue): issue is HtmlValidateIssue => issue.type === 'html-validate',
292
+ );
293
+ const invalidHtmlIssues = htmlValidateIssues.filter(
294
+ (issue) => issue.pageUrl === '/invalid-html.html',
295
+ );
296
+ expect(invalidHtmlIssues).toEqual([]);
260
297
  }, 30000);
261
298
  });
@@ -84,15 +84,15 @@ describe('categorizeCommits', () => {
84
84
  labels: {
85
85
  ...baseLabelConfig,
86
86
  categoryOverrides: {
87
- 'all components': 'General changes',
87
+ 'scope: all components': 'General changes',
88
88
  },
89
89
  },
90
90
  };
91
91
 
92
92
  const commits = [
93
- createCommit(1, ['component: Button', 'all components']),
93
+ createCommit(1, ['component: Button', 'scope: all components']),
94
94
  createCommit(2, ['component: Checkbox']),
95
- createCommit(3, ['component: Button', 'all components']),
95
+ createCommit(3, ['component: Button', 'scope: all components']),
96
96
  ];
97
97
 
98
98
  const result = categorizeCommits(commits, configWithOverrides);
@@ -229,13 +229,13 @@ describe('categorizeCommits', () => {
229
229
  labels: {
230
230
  ...baseLabelConfig,
231
231
  categoryOverrides: {
232
- 'all packages': 'General changes',
232
+ 'scope: all components': 'General changes',
233
233
  },
234
234
  },
235
235
  };
236
236
 
237
237
  const commits = [
238
- createCommit(1, ['scope: data grid', 'all packages']),
238
+ createCommit(1, ['scope: data grid', 'scope: all components']),
239
239
  createCommit(2, ['scope: charts']),
240
240
  ];
241
241
 
@@ -93,8 +93,12 @@ async function fetchCommitsRest({ octokit, repo, lastRelease, release, org = 'mu
93
93
  }
94
94
 
95
95
  const promises = results.map(async (commit) => {
96
- const prMatch = commit.commit.message.match(/#(\d+)/);
97
- if (prMatch === null) {
96
+ const matches = [...commit.commit.message.matchAll(/#(\d+)/g)];
97
+ // The PR number is always the last match.
98
+ // Sometimes the PR titles include an issue number like this:
99
+ // [tag] PR title (#00001) (#00002)
100
+ const prMatch = matches.at(-1);
101
+ if (!prMatch) {
98
102
  return null;
99
103
  }
100
104