@mui/internal-code-infra 0.0.4-canary.37 → 0.0.4-canary.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -54,11 +54,20 @@ export type CrawlOptions = {
|
|
|
54
54
|
concurrency?: number;
|
|
55
55
|
seedUrls?: string[];
|
|
56
56
|
ignores?: IgnoreRule[];
|
|
57
|
-
htmlValidate?:
|
|
57
|
+
htmlValidate?: HtmlValidateOption;
|
|
58
|
+
};
|
|
59
|
+
export type HtmlValidateOverride = {
|
|
60
|
+
path?: (string | RegExp) | (string | RegExp)[];
|
|
61
|
+
config: true | import('html-validate').ConfigData;
|
|
62
|
+
};
|
|
63
|
+
export type HtmlValidateOption = boolean | import('html-validate').ConfigData | HtmlValidateOverride[];
|
|
64
|
+
export type ResolvedHtmlValidateEntry = {
|
|
65
|
+
path: (string | RegExp)[] | undefined;
|
|
66
|
+
config: import('html-validate').ConfigData;
|
|
58
67
|
};
|
|
59
68
|
export type ResolvedCrawlOptions = Omit<Required<CrawlOptions>, 'ignores' | 'htmlValidate'> & {
|
|
60
69
|
ignores: NormalizedIgnoreRule[];
|
|
61
|
-
htmlValidate:
|
|
70
|
+
htmlValidate: ResolvedHtmlValidateEntry[];
|
|
62
71
|
};
|
|
63
72
|
export type BrokenLinkIssue = {
|
|
64
73
|
type: 'broken-link' | 'broken-target';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mui/internal-code-infra",
|
|
3
|
-
"version": "0.0.4-canary.
|
|
3
|
+
"version": "0.0.4-canary.39",
|
|
4
4
|
"author": "MUI Team",
|
|
5
5
|
"description": "Infra scripts and configs to be used across MUI repos.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -168,7 +168,7 @@
|
|
|
168
168
|
"publishConfig": {
|
|
169
169
|
"access": "public"
|
|
170
170
|
},
|
|
171
|
-
"gitSha": "
|
|
171
|
+
"gitSha": "f49ed5478f1a68caa26733b9f33e8d9f8a1a5e6a",
|
|
172
172
|
"scripts": {
|
|
173
173
|
"build": "tsgo -p tsconfig.build.json",
|
|
174
174
|
"typescript": "tsgo -noEmit",
|
|
@@ -12,6 +12,23 @@ import rehypeStringify from 'rehype-stringify';
|
|
|
12
12
|
/** @type {import('./index.mjs').CrawlWorkerInput} */
|
|
13
13
|
const { pageUrl, options } = workerData;
|
|
14
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Tests if a value matches any of the patterns in the array.
|
|
17
|
+
* Returns true if patterns is undefined/empty (wildcard behavior).
|
|
18
|
+
* Strings use exact match, RegExp uses .test().
|
|
19
|
+
* @param {string} value
|
|
20
|
+
* @param {(string | RegExp)[] | undefined} patterns
|
|
21
|
+
* @returns {boolean}
|
|
22
|
+
*/
|
|
23
|
+
function matchesAnyPattern(value, patterns) {
|
|
24
|
+
if (!patterns || patterns.length === 0) {
|
|
25
|
+
return true;
|
|
26
|
+
}
|
|
27
|
+
return patterns.some((pattern) =>
|
|
28
|
+
typeof pattern === 'string' ? value === pattern : pattern.test(value),
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
15
32
|
/**
|
|
16
33
|
* Posts the crawl result back to the parent thread.
|
|
17
34
|
* @param {import('./index.mjs').CrawlWorkerOutput} output
|
|
@@ -143,28 +160,38 @@ if (pageData.status < 200 || pageData.status >= 400) {
|
|
|
143
160
|
contentType: type,
|
|
144
161
|
}));
|
|
145
162
|
|
|
146
|
-
// HTML validation
|
|
163
|
+
// HTML validation. Walk every entry and remember the last one whose path
|
|
164
|
+
// matches the current page — last match wins, so callers can layer
|
|
165
|
+
// specific overrides after a default entry.
|
|
147
166
|
/** @type {{ pageUrl: string, results: import('html-validate').Result[] } | null} */
|
|
148
167
|
let htmlValidateResults = null;
|
|
149
|
-
if (
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
168
|
+
if (type === 'text/html' && options.htmlValidate.length > 0) {
|
|
169
|
+
/** @type {import('./index.mjs').ResolvedHtmlValidateEntry | null} */
|
|
170
|
+
let matchedEntry = null;
|
|
171
|
+
for (const entry of options.htmlValidate) {
|
|
172
|
+
if (matchesAnyPattern(pageUrl, entry.path)) {
|
|
173
|
+
matchedEntry = entry;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (matchedEntry) {
|
|
178
|
+
const muiHtmlValidateResolver = staticResolver({
|
|
179
|
+
configs: {
|
|
180
|
+
'mui:recommended': {
|
|
181
|
+
extends: ['html-validate:standard', 'html-validate:document', 'html-validate:browser'],
|
|
182
|
+
rules: {
|
|
183
|
+
// TODO: Enable when subresource integrity is adopted across projects
|
|
184
|
+
'require-sri': 'off',
|
|
185
|
+
},
|
|
157
186
|
},
|
|
158
187
|
},
|
|
159
|
-
}
|
|
160
|
-
});
|
|
188
|
+
});
|
|
161
189
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
190
|
+
const htmlValidator = new HtmlValidate(
|
|
191
|
+
new StaticConfigLoader([muiHtmlValidateResolver], matchedEntry.config),
|
|
192
|
+
);
|
|
165
193
|
|
|
166
|
-
|
|
167
|
-
if (!report.valid) {
|
|
194
|
+
const report = await htmlValidator.validateString(rawContent, pageUrl);
|
|
168
195
|
htmlValidateResults = { pageUrl, results: report.results };
|
|
169
196
|
}
|
|
170
197
|
}
|
|
@@ -351,12 +351,29 @@ function shouldIgnoreLink(link, ignores) {
|
|
|
351
351
|
* @property {number} [concurrency] - Number of concurrent page fetches (defaults to 4)
|
|
352
352
|
* @property {string[]} [seedUrls] - Starting URLs for the crawl (defaults to ['/'])
|
|
353
353
|
* @property {IgnoreRule[]} [ignores] - Rules to ignore broken links. Each rule can have path, href, contentType, and/or has properties. All specified properties must match (AND logic). Within a property, multiple values use OR logic.
|
|
354
|
-
* @property {
|
|
354
|
+
* @property {HtmlValidateOption} [htmlValidate] - Enable HTML validation on crawled pages. `false` (default): disabled. `true`: validate with recommended rules. Object: use as html-validate config — `extends` defaults to `['mui:recommended']` when omitted, so most callers only need to set `rules`. Array: per-path config overrides — entries are walked in order and the **last** entry whose `path` matches the page URL wins; an entry without `path` matches every page (use as a default and put more specific overrides after it). If no entry matches, the page is not validated.
|
|
355
|
+
*/
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Per-page HTML validation override entry.
|
|
359
|
+
* @typedef {Object} HtmlValidateOverride
|
|
360
|
+
* @property {(string | RegExp) | (string | RegExp)[]} [path] - Pattern(s) to match the page URL. Strings use exact match. Omit to match every page.
|
|
361
|
+
* @property {true | import('html-validate').ConfigData} config - html-validate config (or `true` for `mui:recommended`).
|
|
362
|
+
*/
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Public shape of the htmlValidate option.
|
|
366
|
+
* @typedef {boolean | import('html-validate').ConfigData | HtmlValidateOverride[]} HtmlValidateOption
|
|
367
|
+
*/
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Resolved per-page HTML validation entry. Empty array means validation is disabled.
|
|
371
|
+
* @typedef {{ path: (string | RegExp)[] | undefined, config: import('html-validate').ConfigData }} ResolvedHtmlValidateEntry
|
|
355
372
|
*/
|
|
356
373
|
|
|
357
374
|
/**
|
|
358
375
|
* Fully resolved configuration with all optional fields filled with defaults.
|
|
359
|
-
* @typedef {Omit<Required<CrawlOptions>, 'ignores' | 'htmlValidate'> & { ignores: NormalizedIgnoreRule[], htmlValidate:
|
|
376
|
+
* @typedef {Omit<Required<CrawlOptions>, 'ignores' | 'htmlValidate'> & { ignores: NormalizedIgnoreRule[], htmlValidate: ResolvedHtmlValidateEntry[] }} ResolvedCrawlOptions
|
|
360
377
|
*/
|
|
361
378
|
|
|
362
379
|
/**
|
|
@@ -373,18 +390,37 @@ function validateIgnoreRule(rule) {
|
|
|
373
390
|
}
|
|
374
391
|
|
|
375
392
|
/**
|
|
376
|
-
*
|
|
377
|
-
*
|
|
378
|
-
*
|
|
393
|
+
* Normalizes a single config value to a non-null html-validate config object.
|
|
394
|
+
* Defaults `extends` to `['mui:recommended']` when the caller did not provide
|
|
395
|
+
* one, so overrides typically only need to specify the `rules` they want to
|
|
396
|
+
* change. To opt out of the default, pass `extends: []` explicitly.
|
|
397
|
+
* @param {true | import('html-validate').ConfigData} config
|
|
398
|
+
* @returns {import('html-validate').ConfigData}
|
|
399
|
+
*/
|
|
400
|
+
function normalizeHtmlValidateConfig(config) {
|
|
401
|
+
if (config === true) {
|
|
402
|
+
return { extends: ['mui:recommended'] };
|
|
403
|
+
}
|
|
404
|
+
return { extends: ['mui:recommended'], ...config };
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Resolves the htmlValidate option into an array of per-page entries.
|
|
409
|
+
* An empty array means validation is disabled.
|
|
410
|
+
* @param {HtmlValidateOption | undefined} option
|
|
411
|
+
* @returns {ResolvedHtmlValidateEntry[]}
|
|
379
412
|
*/
|
|
380
413
|
function resolveHtmlValidateConfig(option) {
|
|
381
414
|
if (!option) {
|
|
382
|
-
return
|
|
415
|
+
return [];
|
|
383
416
|
}
|
|
384
|
-
if (option === true) {
|
|
385
|
-
return {
|
|
417
|
+
if (option === true || !Array.isArray(option)) {
|
|
418
|
+
return [{ path: undefined, config: normalizeHtmlValidateConfig(option) }];
|
|
386
419
|
}
|
|
387
|
-
return option
|
|
420
|
+
return option.map((entry) => ({
|
|
421
|
+
path: normalizeToArray(entry.path),
|
|
422
|
+
config: normalizeHtmlValidateConfig(entry.config),
|
|
423
|
+
}));
|
|
388
424
|
}
|
|
389
425
|
|
|
390
426
|
/**
|
|
@@ -796,7 +832,7 @@ export async function crawl(rawOptions) {
|
|
|
796
832
|
console.log(` Total broken links: ${chalk.cyan(fmt(brokenLinks))}`);
|
|
797
833
|
console.log(` Total broken link targets: ${chalk.cyan(fmt(brokenLinkTargets))}`);
|
|
798
834
|
console.log(` Total ignored: ${chalk.cyan(fmt(ignoredCount))}`);
|
|
799
|
-
if (options.htmlValidate) {
|
|
835
|
+
if (options.htmlValidate.length > 0) {
|
|
800
836
|
const pagesWithHtmlIssues = new Set(htmlValidateIssues.map((issue) => issue.pageUrl)).size;
|
|
801
837
|
console.log(
|
|
802
838
|
` HTML validation issues: ${chalk.cyan(fmt(htmlValidateIssues.length))} across ${chalk.cyan(fmt(pagesWithHtmlIssues))} ${pagesWithHtmlIssues === 1 ? 'page' : 'pages'}`,
|
|
@@ -62,12 +62,20 @@ describe('Broken Links Checker', () => {
|
|
|
62
62
|
// Test href-only rule (matches from any page) - note: matches the actual href value
|
|
63
63
|
{ href: 'broken-relative.html' },
|
|
64
64
|
],
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
// Exercise the array form. Three entries all match /invalid-html.html;
|
|
66
|
+
// the last one wins, so its rules apply (no-dup-id ON, no-raw-characters
|
|
67
|
+
// OFF). The middle entry's `no-dup-id: off` is shadowed by the regex
|
|
68
|
+
// entry below it, demonstrating last-match-wins. The default entry
|
|
69
|
+
// applies to every other page (markdown, etc.) since the regex only
|
|
70
|
+
// matches `.html`.
|
|
71
|
+
htmlValidate: [
|
|
72
|
+
{ config: { rules: { 'no-raw-characters': 'off' } } },
|
|
73
|
+
{
|
|
74
|
+
path: '/invalid-html.html',
|
|
75
|
+
config: { rules: { 'no-dup-id': 'off', 'no-raw-characters': 'off' } },
|
|
69
76
|
},
|
|
70
|
-
|
|
77
|
+
{ path: /\.html$/, config: { rules: { 'no-raw-characters': 'off' } } },
|
|
78
|
+
],
|
|
71
79
|
});
|
|
72
80
|
|
|
73
81
|
expect(result.links).toHaveLength(67);
|