@pseolint/core 0.3.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -1
- package/dist/ai/triage.d.ts.map +1 -1
- package/dist/ai/triage.js +8 -1
- package/dist/ai/triage.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +566 -136
- package/dist/auditor.js.map +1 -1
- package/dist/backpressure.d.ts +68 -0
- package/dist/backpressure.d.ts.map +1 -0
- package/dist/backpressure.js +81 -0
- package/dist/backpressure.js.map +1 -0
- package/dist/cache.d.ts +73 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +258 -19
- package/dist/cache.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +1 -14
- package/dist/enrich-findings.js.map +1 -1
- package/dist/fetch-observer.d.ts +97 -0
- package/dist/fetch-observer.d.ts.map +1 -0
- package/dist/fetch-observer.js +124 -0
- package/dist/fetch-observer.js.map +1 -0
- package/dist/formatters/console.d.ts +7 -9
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +218 -254
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts +5 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +352 -570
- package/dist/formatters/html.js.map +1 -1
- package/dist/formatters/index.d.ts +4 -1
- package/dist/formatters/index.d.ts.map +1 -1
- package/dist/formatters/index.js +1 -1
- package/dist/formatters/index.js.map +1 -1
- package/dist/formatters/json.d.ts +11 -1
- package/dist/formatters/json.d.ts.map +1 -1
- package/dist/formatters/json.js +5 -1
- package/dist/formatters/json.js.map +1 -1
- package/dist/formatters/markdown.d.ts +7 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +77 -70
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/index.d.ts +13 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -7
- package/dist/index.js.map +1 -1
- package/dist/page-filter.d.ts +50 -0
- package/dist/page-filter.d.ts.map +1 -0
- package/dist/page-filter.js +86 -0
- package/dist/page-filter.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +0 -6
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +1 -0
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +6 -14
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts +9 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.js +14 -5
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -1
- package/dist/safe-mode-preset.d.ts +27 -0
- package/dist/safe-mode-preset.d.ts.map +1 -0
- package/dist/safe-mode-preset.js +54 -0
- package/dist/safe-mode-preset.js.map +1 -0
- package/dist/site-classifier.d.ts +83 -0
- package/dist/site-classifier.d.ts.map +1 -0
- package/dist/site-classifier.js +205 -0
- package/dist/site-classifier.js.map +1 -0
- package/dist/ssrf-guard.d.ts +96 -0
- package/dist/ssrf-guard.d.ts.map +1 -0
- package/dist/ssrf-guard.js +268 -0
- package/dist/ssrf-guard.js.map +1 -0
- package/dist/types.d.ts +202 -19
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +2 -1
- package/dist/types.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Page-skip detection helpers used by the auditor pipeline (v0.4.1).
|
|
3
|
+
*
|
|
4
|
+
* Two policy filters live here, each gated by an AuditOption:
|
|
5
|
+
* - `detectNoindex(page)` honours `<meta name="robots" content="noindex">`
|
|
6
|
+
* and `X-Robots-Tag: noindex` HTTP headers. The site owner already told
|
|
7
|
+
* Google not to index these pages — auditing them produces noise the
|
|
8
|
+
* reader can't act on.
|
|
9
|
+
* - `detectAuthPage(page)` heuristically classifies pages as
|
|
10
|
+
* login / signup / password-reset based on three signals (password input
|
|
11
|
+
* in a thin body, title matches the auth regex, H1 matches the auth
|
|
12
|
+
* regex). Two signals are required for a positive verdict, which keeps
|
|
13
|
+
* the false-positive rate low: a marketing landing page with a single
|
|
14
|
+
* password input or a single auth-shaped heading won't trip it.
|
|
15
|
+
*
|
|
16
|
+
* Both functions are pure and synchronous so they can be called inside the
|
|
17
|
+
* `parsedPages` map step without disrupting the existing pipeline shape.
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Returns true when the page is explicitly noindex'd via either the parsed
|
|
21
|
+
* `<meta name="robots">` content or the `X-Robots-Tag` HTTP response header.
|
|
22
|
+
* Match is case-insensitive and substring-based to tolerate combined
|
|
23
|
+
* directives like `"noindex, nofollow"` or `"index, noindex"` (the latter is
|
|
24
|
+
* a common bug — `noindex` wins per spec).
|
|
25
|
+
*/
|
|
26
|
+
export function detectNoindex(page) {
|
|
27
|
+
const metaRobots = (page.robotsMeta ?? "").toLowerCase();
|
|
28
|
+
const xRobots = (page.httpMeta?.xRobotsTag ?? "").toLowerCase();
|
|
29
|
+
return metaRobots.includes("noindex") || xRobots.includes("noindex");
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Auth-shaped page title pattern, applied AFTER stripping the brand suffix.
|
|
33
|
+
* Matches "Sign in", "Sign-In", "Log in", "Log-Out", "Register", "Sign up",
|
|
34
|
+
* "Forgot password", "Reset password", "Create account", "Verify email" and
|
|
35
|
+
* common variants. Case-insensitive; whole-string match (anchored).
|
|
36
|
+
*/
|
|
37
|
+
const AUTH_TITLE_REGEX = /^(sign[-\s]?in|sign[-\s]?up|sign[-\s]?out|log[-\s]?in|log[-\s]?out|register|forgot[-\s]password|reset[-\s]password|create[-\s](?:an[-\s])?account|verify[-\s](?:your[-\s])?email|two[-\s]?factor|account[-\s]recovery)$/i;
|
|
38
|
+
/**
|
|
39
|
+
* Common brand-suffix separators. We split on the FIRST occurrence and keep
|
|
40
|
+
* everything before it — typical pattern is `Sign in | MyApp` or
|
|
41
|
+
* `Sign in - MyApp` or `Sign in · MyApp`. Without this strip, the regex
|
|
42
|
+
* would never match because the title would be `Sign in | MyApp`, not just
|
|
43
|
+
* `Sign in`.
|
|
44
|
+
*/
|
|
45
|
+
const BRAND_SEPARATORS = [" | ", " - ", " — ", " · ", " : ", " :: "];
|
|
46
|
+
function stripBrandSuffix(title) {
|
|
47
|
+
let cut = title.length;
|
|
48
|
+
for (const sep of BRAND_SEPARATORS) {
|
|
49
|
+
const idx = title.indexOf(sep);
|
|
50
|
+
if (idx >= 0 && idx < cut)
|
|
51
|
+
cut = idx;
|
|
52
|
+
}
|
|
53
|
+
return title.slice(0, cut).trim();
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Heuristic auth-page detection. Returns `isAuth: true` when 2+ signals
|
|
57
|
+
* fire (high confidence). Single-signal pages return `isAuth: false` but
|
|
58
|
+
* still expose the signal list for diagnostics — useful for tracking
|
|
59
|
+
* borderline cases that might warrant a manual config exclude.
|
|
60
|
+
*/
|
|
61
|
+
export function detectAuthPage(page) {
|
|
62
|
+
const signals = [];
|
|
63
|
+
const wordCount = (page.contentText ?? "").split(/\s+/).filter(Boolean).length;
|
|
64
|
+
const h2Count = (page.headings?.h2 ?? []).length;
|
|
65
|
+
const hasPasswordInput = /<input\b[^>]*\btype\s*=\s*["']?password["']?/i.test(page.html ?? "");
|
|
66
|
+
if (hasPasswordInput && wordCount < 200 && h2Count < 3) {
|
|
67
|
+
signals.push("password-input");
|
|
68
|
+
}
|
|
69
|
+
const cleanedTitle = stripBrandSuffix(page.title ?? "");
|
|
70
|
+
if (cleanedTitle && AUTH_TITLE_REGEX.test(cleanedTitle)) {
|
|
71
|
+
signals.push("auth-title");
|
|
72
|
+
}
|
|
73
|
+
const firstH1 = (page.headings?.h1 ?? [])[0]?.trim() ?? "";
|
|
74
|
+
if (firstH1 && AUTH_TITLE_REGEX.test(firstH1)) {
|
|
75
|
+
signals.push("auth-h1");
|
|
76
|
+
}
|
|
77
|
+
return { isAuth: signals.length >= 2, signals };
|
|
78
|
+
}
|
|
79
|
+
export function pageSkipReason(page, options) {
|
|
80
|
+
if (options.respectNoindex && detectNoindex(page))
|
|
81
|
+
return "noindex";
|
|
82
|
+
if (options.skipDetectedAuth && detectAuthPage(page).isAuth)
|
|
83
|
+
return "auth-detected";
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
//# sourceMappingURL=page-filter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"page-filter.js","sourceRoot":"","sources":["../src/page-filter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAIH;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,IAAgB;IAC5C,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACzD,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAChE,OAAO,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;AACvE,CAAC;AAED;;;;;GAKG;AACH,MAAM,gBAAgB,GACpB,0NAA0N,CAAC;AAE7N;;;;;;GAMG;AACH,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC;AAErE,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,GAAG,GAAG;YAAE,GAAG,GAAG,GAAG,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACpC,CAAC;AAOD;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,IAAgB;IAC7C,MAAM,OAAO,GAAmC,EAAE,CAAC;IAEnD,MAAM,SAAS,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC/E,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,gBAAgB,GAAG,+CAA+C,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IAC/F,IAAI,gBAAgB,IAAI,SAAS,GAAG,GAAG,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QACvD,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,YAAY,GAAG,gBAAgB,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;IACxD,IAAI,YAAY,IAAI,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC7B,CAAC;IAED,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAC3D,IAAI,OAAO,IAAI,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC;AAClD,CAAC;AAaD,MAAM,UAAU,cAAc,CAC5B,IAAgB,EAChB,OAAwB;IAExB,IAAI,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC;IACpE,IAAI,OAAO,CAAC,gBAAgB,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC,MAAM;QAAE,OAAO,eAAe,CAAC;IACpF,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAuClD,CAAC"}
|
package/dist/rule-references.js
CHANGED
|
@@ -9,13 +9,11 @@ export const RULE_REFERENCES = {
|
|
|
9
9
|
"spam/template-coverage": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
|
|
10
10
|
"content/unique-value": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
11
11
|
"content/meta-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#meta-descriptions",
|
|
12
|
-
"content/heading-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#headings",
|
|
13
12
|
"content/missing-author": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
|
|
14
13
|
"content/eeat-signals": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
|
|
15
14
|
"links/orphan-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
16
15
|
"links/dead-ends": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
17
16
|
"links/cluster-connectivity": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
18
|
-
"links/hub-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
19
17
|
"links/link-depth": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
20
18
|
"tech/robots-compliance": "https://developers.google.com/search/docs/crawling-indexing/robots/intro",
|
|
21
19
|
"tech/canonical-consistency": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
@@ -24,13 +22,10 @@ export const RULE_REFERENCES = {
|
|
|
24
22
|
"tech/sitemap-completeness": "https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview",
|
|
25
23
|
"tech/redirect-chain": "https://developers.google.com/search/docs/crawling-indexing/301-redirects",
|
|
26
24
|
"tech/soft-404": "https://developers.google.com/search/docs/crawling-indexing/soft-404-errors",
|
|
27
|
-
"tech/og-completeness": "https://developers.google.com/search/docs/appearance/snippet",
|
|
28
25
|
"tech/hreflang-consistency": "https://developers.google.com/search/docs/specialty/international/managing-multi-regional-sites",
|
|
29
26
|
"schema/json-ld-valid": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
30
27
|
"schema/required-fields": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
31
28
|
"schema/consistency": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
32
|
-
"cannibal/title-overlap": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
33
|
-
"cannibal/keyword-collision": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
34
29
|
"cannibal/url-pattern": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
35
30
|
"data/missing-binding": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
|
|
36
31
|
"data/identical-across-pages": "https://developers.google.com/search/docs/essentials/spam-policies#scraped-content",
|
|
@@ -40,7 +35,6 @@ export const RULE_REFERENCES = {
|
|
|
40
35
|
"aeo/faq-coverage": "https://developers.google.com/search/docs/appearance/structured-data/faqpage",
|
|
41
36
|
"aeo/answer-first": "https://developers.google.com/search/docs/appearance/featured-snippets",
|
|
42
37
|
"aeo/citable-facts": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
43
|
-
"aeo/non-replicable-value": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
44
38
|
"aeo/content-modularity": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
45
39
|
"aeo/summary-bait": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
46
40
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,
|
|
1
|
+
{"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,wBAAwB,EAAE,sFAAsF;IAChH,sBAAsB,EAAE,sFAAsF;IAC9G,oBAAoB,EAAE,6EAA6E;IACnG,iBAAiB,EAAE,6EAA6E;IAChG,4BAA4B,EAAE,6EAA6E;IAC3G,kBAAkB,EAAE,6EAA6E;IACjG,wBAAwB,EAAE,0EAA0E;IACpG,4BAA4B,EAAE,wFAAwF;IACtH,iCAAiC,EAAE,wFAAwF;IAC3H,8BAA8B,EAAE,4EAA4E;IAC5G,2BAA2B,EAAE,+EAA+E;IAC5G,qBAAqB,EAAE,2EAA2E;IAClG,eAAe,EAAE,6EAA6E;IAC9F,2BAA2B,EAAE,iGAAiG;IAC9H,sBAAsB,EAAE,4FAA4F;IACpH,wBAAwB,EAAE,4FAA4F;IACtH,oBAAoB,EAAE,4FAA4F;IAClH,sBAAsB,EAAE,wFAAwF;IAChH,sBAAsB,EAAE,iFAAiF;IACzG,6BAA6B,EAAE,oFAAoF;IACnH,cAAc,EAAE,qBAAqB;IACrC,oBAAoB,EAAE,0CAA0C;IAChE,uBAAuB,EAAE,wEAAwE;IACjG,kBAAkB,EAAE,8EAA8E;IAClG,kBAAkB,EAAE,wEAAwE;IAC5F,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,4FAA4F;IACtH,kBAAkB,EAAE,iFAAiF;CACtG,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAM7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,cAAc,EAAE,MAAM,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAM7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,cAAc,EAAE,MAAM,GAAG,UAAU,EAAE,CA2BzF"}
|
|
@@ -17,6 +17,7 @@ export function uniqueValueRule(pages, minUniqueWords) {
|
|
|
17
17
|
ruleId: "content/unique-value",
|
|
18
18
|
severity: "error",
|
|
19
19
|
message: `${page.url} has only ${uniqueCount} unique words (min ${minUniqueWords}).`,
|
|
20
|
+
pageUrl: page.url,
|
|
20
21
|
fix: `Add ${minUniqueWords - uniqueCount} more words of content not found on any other page.`
|
|
21
22
|
});
|
|
22
23
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAEA,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB,EAAE,cAAsB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnE,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;QAC1B,MAAM,WAAW,GAAG,IAAI,GAAG,CACzB,UAAU,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CACvE,CAAC,IAAI,CAAC;QACP,IAAI,WAAW,GAAG,cAAc,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,aAAa,WAAW,sBAAsB,cAAc,IAAI;gBACpF,GAAG,EAAE,OAAO,cAAc,GAAG,WAAW,qDAAqD;aAC9F,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
1
|
+
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAEA,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB,EAAE,cAAsB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnE,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;QAC1B,MAAM,WAAW,GAAG,IAAI,GAAG,CACzB,UAAU,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CACvE,CAAC,IAAI,CAAC;QACP,IAAI,WAAW,GAAG,cAAc,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,aAAa,WAAW,sBAAsB,cAAc,IAAI;gBACpF,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,OAAO,cAAc,GAAG,WAAW,qDAAqD;aAC9F,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CA6DhD,CAAC;AAEF,4GAA4G;AAC5G,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE3D"}
|
package/dist/rules/scope.js
CHANGED
|
@@ -10,26 +10,22 @@ export const RULE_SCOPE = {
|
|
|
10
10
|
"spam/template-coverage": "corpus",
|
|
11
11
|
// content
|
|
12
12
|
"content/unique-value": "corpus",
|
|
13
|
-
"content/heading-uniqueness": "corpus",
|
|
14
13
|
"content/meta-uniqueness": "corpus",
|
|
15
14
|
"content/missing-author": "page",
|
|
16
15
|
"content/eeat-signals": "page",
|
|
17
|
-
// links
|
|
16
|
+
// links
|
|
18
17
|
"links/orphan-pages": "corpus",
|
|
19
18
|
"links/dead-ends": "corpus",
|
|
20
19
|
"links/cluster-connectivity": "corpus",
|
|
21
|
-
"links/hub-pages": "corpus",
|
|
22
20
|
"links/link-depth": "corpus",
|
|
23
21
|
"links/unreachable-from-root": "corpus",
|
|
24
|
-
|
|
25
|
-
// tech (per-page except sitemap / robots / canonical-consistency which need knownUrls)
|
|
22
|
+
// tech
|
|
26
23
|
"tech/canonical-consistency": "corpus",
|
|
27
24
|
"tech/canonical-noindex-conflict": "page",
|
|
28
25
|
"tech/robots-noindex-conflict": "corpus",
|
|
29
26
|
"tech/sitemap-completeness": "corpus",
|
|
30
27
|
"tech/redirect-chain": "page",
|
|
31
28
|
"tech/soft-404": "page",
|
|
32
|
-
"tech/og-completeness": "page",
|
|
33
29
|
"tech/hreflang-consistency": "corpus",
|
|
34
30
|
"tech/robots-compliance": "corpus",
|
|
35
31
|
"tech/robots-sitemap-presence": "corpus",
|
|
@@ -37,26 +33,22 @@ export const RULE_SCOPE = {
|
|
|
37
33
|
"schema/json-ld-valid": "page",
|
|
38
34
|
"schema/required-fields": "page",
|
|
39
35
|
"schema/consistency": "corpus",
|
|
40
|
-
// cannibal (
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
// cannibal — only url-pattern survives in v0.4 (title-overlap and
|
|
37
|
+
// keyword-collision dropped due to high false-positive rates; see
|
|
38
|
+
// 2026-04-29 v0.4 redesign spec §4.3).
|
|
43
39
|
"cannibal/url-pattern": "corpus",
|
|
44
40
|
// data binding
|
|
45
41
|
"data/missing-binding": "page",
|
|
46
42
|
"data/identical-across-pages": "corpus",
|
|
47
43
|
// audit-internal
|
|
48
44
|
"audit/duplicate-url": "corpus",
|
|
49
|
-
// AEO (Answer Engine Optimization)
|
|
50
|
-
// shipping from a parallel core-engine change. `llms-txt` and `crawler-access`
|
|
51
|
-
// are site-wide (need host config / robots surface); the rest evaluate a
|
|
52
|
-
// single page's content structure and must run in daily diff-audits.
|
|
45
|
+
// AEO (Answer Engine Optimization).
|
|
53
46
|
"aeo/llms-txt": "corpus",
|
|
54
47
|
"aeo/crawler-access": "corpus",
|
|
55
48
|
"aeo/freshness-signals": "page",
|
|
56
49
|
"aeo/faq-coverage": "page",
|
|
57
50
|
"aeo/answer-first": "page",
|
|
58
51
|
"aeo/citable-facts": "page",
|
|
59
|
-
"aeo/non-replicable-value": "page",
|
|
60
52
|
"aeo/content-modularity": "page",
|
|
61
53
|
"aeo/summary-bait": "page",
|
|
62
54
|
};
|
package/dist/rules/scope.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,
|
|
1
|
+
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAE9B,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IAEvC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IAExC,SAAS;IACT,sBAAsB,EAAE,MAAM;IAC9B,wBAAwB,EAAE,MAAM;IAChC,oBAAoB,EAAE,QAAQ;IAE9B,kEAAkE;IAClE,kEAAkE;IAClE,uCAAuC;IACvC,sBAAsB,EAAE,QAAQ;IAEhC,eAAe;IACf,sBAAsB,EAAE,MAAM;IAC9B,6BAA6B,EAAE,QAAQ;IAEvC,iBAAiB;IACjB,qBAAqB,EAAE,QAAQ;IAE/B,oCAAoC;IACpC,cAAc,EAAE,QAAQ;IACxB,oBAAoB,EAAE,QAAQ;IAC9B,uBAAuB,EAAE,MAAM;IAC/B,kBAAkB,EAAE,MAAM;IAC1B,kBAAkB,EAAE,MAAM;IAC1B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,MAAM;IAChC,kBAAkB,EAAE,MAAM;CAC3B,CAAC;AAEF,4GAA4G;AAC5G,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,KAAK,MAAM,CAAC;AACrD,CAAC"}
|
|
@@ -6,7 +6,15 @@ export declare function robotsSitemapPresenceRule(source: string): Promise<RuleR
|
|
|
6
6
|
*/
|
|
7
7
|
/** Parse `Crawl-delay: N` from the `User-agent: *` block. Returns seconds, or 0 if unset. */
|
|
8
8
|
export declare function parseCrawlDelaySeconds(robotsTxt: string): number;
|
|
9
|
-
|
|
9
|
+
/**
|
|
10
|
+
* Parse Disallow patterns for the given user-agents. Merges the `User-agent: *`
|
|
11
|
+
* block with any named UA blocks; a hostile site that specifically disallows
|
|
12
|
+
* `pseolint` without a wildcard still gets honored.
|
|
13
|
+
*
|
|
14
|
+
* The default UA list (`["*"]`) preserves the pre-v0.3.2 behavior for callers
|
|
15
|
+
* that don't opt into UA-specific matching.
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseDisallowPatterns(robotsTxt: string, userAgents?: readonly string[]): string[];
|
|
10
18
|
/**
|
|
11
19
|
* Returns true if `urlPath` (e.g. "/blog/post") is blocked by a single Disallow pattern.
|
|
12
20
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"robots-sitemap-presence.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/robots-sitemap-presence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAc7D,wBAAsB,yBAAyB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAqDrF;AAED;;;GAGG;AACH,6FAA6F;AAC7F,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAmBhE;AAED,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"robots-sitemap-presence.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/robots-sitemap-presence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAc7D,wBAAsB,yBAAyB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAqDrF;AAED;;;GAGG;AACH,6FAA6F;AAC7F,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAmBhE;AAED;;;;;;;GAOG;AACH,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,GAAE,SAAS,MAAM,EAAU,GAAG,MAAM,EAAE,CA+BxG;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CA8B5E;AAED,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EACxB,gBAAgB,EAAE,MAAM,GACvB,UAAU,EAAE,CA+Bd"}
|
|
@@ -86,10 +86,19 @@ export function parseCrawlDelaySeconds(robotsTxt) {
|
|
|
86
86
|
}
|
|
87
87
|
return 0;
|
|
88
88
|
}
|
|
89
|
-
|
|
89
|
+
/**
|
|
90
|
+
* Parse Disallow patterns for the given user-agents. Merges the `User-agent: *`
|
|
91
|
+
* block with any named UA blocks; a hostile site that specifically disallows
|
|
92
|
+
* `pseolint` without a wildcard still gets honored.
|
|
93
|
+
*
|
|
94
|
+
* The default UA list (`["*"]`) preserves the pre-v0.3.2 behavior for callers
|
|
95
|
+
* that don't opt into UA-specific matching.
|
|
96
|
+
*/
|
|
97
|
+
export function parseDisallowPatterns(robotsTxt, userAgents = ["*"]) {
|
|
90
98
|
const lines = robotsTxt.split(/\r?\n/);
|
|
91
99
|
const patterns = [];
|
|
92
|
-
|
|
100
|
+
const uaSet = new Set(userAgents.map((u) => u.toLowerCase()));
|
|
101
|
+
let inMatchingBlock = false;
|
|
93
102
|
for (const rawLine of lines) {
|
|
94
103
|
const line = rawLine.trim();
|
|
95
104
|
// Skip comments and blank lines
|
|
@@ -97,11 +106,11 @@ export function parseDisallowPatterns(robotsTxt) {
|
|
|
97
106
|
continue;
|
|
98
107
|
// Detect User-agent directive
|
|
99
108
|
if (/^user-agent\s*:/i.test(line)) {
|
|
100
|
-
const value = line.replace(/^user-agent\s*:\s*/i, "").trim();
|
|
101
|
-
|
|
109
|
+
const value = line.replace(/^user-agent\s*:\s*/i, "").trim().toLowerCase();
|
|
110
|
+
inMatchingBlock = uaSet.has(value);
|
|
102
111
|
continue;
|
|
103
112
|
}
|
|
104
|
-
if (!
|
|
113
|
+
if (!inMatchingBlock)
|
|
105
114
|
continue;
|
|
106
115
|
if (/^disallow\s*:/i.test(line)) {
|
|
107
116
|
const value = line.replace(/^disallow\s*:\s*/i, "").trim();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"robots-sitemap-presence.js","sourceRoot":"","sources":["../../../src/rules/tech/robots-sitemap-presence.ts"],"names":[],"mappings":"AAEA,KAAK,UAAU,aAAa,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,MAAc;IAC5D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;IAChC,MAAM,SAAS,GAAG,GAAG,MAAM,aAAa,CAAC;IACzC,MAAM,UAAU,GAAG,GAAG,MAAM,cAAc,CAAC;IAC3C,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,SAAS,GAAG;YACxC,GAAG,EAAE,+BAA+B,SAAS,4DAA4D;SAC1G,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3C,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,GAAG,SAAS,wCAAwC;YAC7D,GAAG,EAAE,8BAA8B,SAAS,oBAAoB,UAAU,EAAE;SAC7E,CAAC,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,UAAU,CAAC,CAAC;IACpD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,UAAU,GAAG;YACzC,GAAG,EAAE,4BAA4B,UAAU,wBAAwB,SAAS,GAAG;SAChF,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QACvE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,UAAU,kDAAkD;YACxE,GAAG,EAAE,UAAU,UAAU,6EAA6E;SACvG,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,6FAA6F;AAC7F,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;YAChC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,eAAe;YAAE,SAAS;QAC/B,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,SAAiB;
|
|
1
|
+
{"version":3,"file":"robots-sitemap-presence.js","sourceRoot":"","sources":["../../../src/rules/tech/robots-sitemap-presence.ts"],"names":[],"mappings":"AAEA,KAAK,UAAU,aAAa,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,MAAc;IAC5D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;IAChC,MAAM,SAAS,GAAG,GAAG,MAAM,aAAa,CAAC;IACzC,MAAM,UAAU,GAAG,GAAG,MAAM,cAAc,CAAC;IAC3C,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,SAAS,GAAG;YACxC,GAAG,EAAE,+BAA+B,SAAS,4DAA4D;SAC1G,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3C,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,GAAG,SAAS,wCAAwC;YAC7D,GAAG,EAAE,8BAA8B,SAAS,oBAAoB,UAAU,EAAE;SAC7E,CAAC,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,UAAU,CAAC,CAAC;IACpD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,UAAU,GAAG;YACzC,GAAG,EAAE,4BAA4B,UAAU,wBAAwB,SAAS,GAAG;SAChF,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QACvE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,UAAU,kDAAkD;YACxE,GAAG,EAAE,UAAU,UAAU,6EAA6E;SACvG,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,6FAA6F;AAC7F,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,IAAI,eAAe,GAAG,KAAK,CAAC;IAC5B,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC7D,eAAe,GAAG,KAAK,KAAK,GAAG,CAAC;YAChC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,eAAe;YAAE,SAAS;QAC/B,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9D,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAiB,EAAE,aAAgC,CAAC,GAAG,CAAC;IAC5F,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAC9D,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAE5B,gCAAgC;QAChC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,8BAA8B;QAC9B,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC3E,eAAe,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YACnC,SAAS;QACX,CAAC;QAED,IAAI,CAAC,eAAe;YAAE,SAAS;QAE/B,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,iDAAiD;YACjD,IAAI,KAAK,EAAE,CAAC;gBACV,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAe,EAAE,OAAe;IACjE,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,yEAAyE;QACzE,2CAA2C;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACZ,8EAA8E;gBAC9E,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;oBAAE,OAAO,KAAK,CAAC;gBAC5C,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;YACpB,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBACvC,IAAI,GAAG,KAAK,CAAC,CAAC;oBAAE,OAAO,KAAK,CAAC;gBAC7B,GAAG,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,+BAA+B;IAC/B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC;IAED,cAAc;IACd,OAAO,OAAO,KAAK,OAAO,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,KAAmB,EACnB,WAAwB,EACxB,gBAAwB;IAExB,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,gBAAgB,CAAC,CAAC;IACjE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7C,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,SAAS;QAEzC,IAAI,UAAkB,CAAC;QACvB,IAAI,CAAC;YACH,UAAU,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;YACvC,IAAI,kBAAkB,CAAC,UAAU,EAAE,OAAO,CAAC,EAAE,CAAC;gBAC5C,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,2DAA2D,OAAO,IAAI;oBAC1F,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qEAAqE;iBAC3E,CAAC,CAAC;gBACH,MAAM,CAAC,iCAAiC;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { SafeMode, AuditOptions } from "./types.js";
|
|
2
|
+
export type SafeModePreset = {
|
|
3
|
+
concurrency?: number;
|
|
4
|
+
sampleSize?: number;
|
|
5
|
+
guardSsrf?: boolean;
|
|
6
|
+
respectRobotsTxt?: boolean;
|
|
7
|
+
followRedirects?: boolean;
|
|
8
|
+
maxCrawlDiscovered?: number;
|
|
9
|
+
maxFetchBytes?: number;
|
|
10
|
+
};
|
|
11
|
+
export type SafeModeKey = SafeMode | "__none";
|
|
12
|
+
/**
|
|
13
|
+
* Presets that flip several safety defaults at once. Individual options on
|
|
14
|
+
* AuditOptions override the preset when explicitly set. `__none` is the
|
|
15
|
+
* sentinel key used when no preset is selected — all fields undefined so the
|
|
16
|
+
* `??` chain falls through to hard-coded defaults.
|
|
17
|
+
*/
|
|
18
|
+
export declare const SAFE_MODE_PRESETS: Record<SafeModeKey, SafeModePreset>;
|
|
19
|
+
/**
|
|
20
|
+
* Pick the preset key for a given source + options combination.
|
|
21
|
+
*
|
|
22
|
+
* explicit `safeMode` → that preset
|
|
23
|
+
* autoDevPreset !== false && localhost URL → "dev"
|
|
24
|
+
* otherwise → "__none"
|
|
25
|
+
*/
|
|
26
|
+
export declare function resolveSafeModeKey(source: string, options?: Pick<AuditOptions, "safeMode" | "autoDevPreset">): SafeModeKey;
|
|
27
|
+
//# sourceMappingURL=safe-mode-preset.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"safe-mode-preset.d.ts","sourceRoot":"","sources":["../src/safe-mode-preset.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAGzD,MAAM,MAAM,cAAc,GAAG;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAE9C;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,WAAW,EAAE,cAAc,CA6BjE,CAAC;AAEF;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,IAAI,CAAC,YAAY,EAAE,UAAU,GAAG,eAAe,CAAC,GACzD,WAAW,CAKb"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { isLocalhostUrl } from "./ssrf-guard.js";
|
|
2
|
+
/**
|
|
3
|
+
* Presets that flip several safety defaults at once. Individual options on
|
|
4
|
+
* AuditOptions override the preset when explicitly set. `__none` is the
|
|
5
|
+
* sentinel key used when no preset is selected — all fields undefined so the
|
|
6
|
+
* `??` chain falls through to hard-coded defaults.
|
|
7
|
+
*/
|
|
8
|
+
export const SAFE_MODE_PRESETS = {
|
|
9
|
+
saas: {
|
|
10
|
+
// Hosted-service defaults. Assume user-submitted URLs are hostile.
|
|
11
|
+
guardSsrf: true,
|
|
12
|
+
respectRobotsTxt: true,
|
|
13
|
+
followRedirects: true,
|
|
14
|
+
maxCrawlDiscovered: 2000,
|
|
15
|
+
maxFetchBytes: 10_000_000,
|
|
16
|
+
},
|
|
17
|
+
cli: {
|
|
18
|
+
// Local / dev defaults. User auditing their own or local site.
|
|
19
|
+
guardSsrf: false,
|
|
20
|
+
respectRobotsTxt: true,
|
|
21
|
+
followRedirects: true,
|
|
22
|
+
maxCrawlDiscovered: 5000,
|
|
23
|
+
maxFetchBytes: 52_428_800,
|
|
24
|
+
},
|
|
25
|
+
dev: {
|
|
26
|
+
// Localhost probe. A cache-cold dev server can amplify every crawled URL
|
|
27
|
+
// into a chain of DB queries — keep the first pass tiny and reversible.
|
|
28
|
+
concurrency: 1,
|
|
29
|
+
sampleSize: 25,
|
|
30
|
+
guardSsrf: false,
|
|
31
|
+
respectRobotsTxt: true,
|
|
32
|
+
followRedirects: true,
|
|
33
|
+
maxCrawlDiscovered: 50,
|
|
34
|
+
maxFetchBytes: 52_428_800,
|
|
35
|
+
},
|
|
36
|
+
__none: {},
|
|
37
|
+
};
|
|
38
|
+
/**
|
|
39
|
+
* Pick the preset key for a given source + options combination.
|
|
40
|
+
*
|
|
41
|
+
* explicit `safeMode` → that preset
|
|
42
|
+
* autoDevPreset !== false && localhost URL → "dev"
|
|
43
|
+
* otherwise → "__none"
|
|
44
|
+
*/
|
|
45
|
+
export function resolveSafeModeKey(source, options) {
|
|
46
|
+
if (options?.safeMode)
|
|
47
|
+
return options.safeMode;
|
|
48
|
+
if (options?.autoDevPreset === false)
|
|
49
|
+
return "__none";
|
|
50
|
+
if (isLocalhostUrl(source))
|
|
51
|
+
return "dev";
|
|
52
|
+
return "__none";
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=safe-mode-preset.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"safe-mode-preset.js","sourceRoot":"","sources":["../src/safe-mode-preset.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAcjD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAwC;IACpE,IAAI,EAAE;QACJ,mEAAmE;QACnE,SAAS,EAAE,IAAI;QACf,gBAAgB,EAAE,IAAI;QACtB,eAAe,EAAE,IAAI;QACrB,kBAAkB,EAAE,IAAI;QACxB,aAAa,EAAE,UAAU;KAC1B;IACD,GAAG,EAAE;QACH,+DAA+D;QAC/D,SAAS,EAAE,KAAK;QAChB,gBAAgB,EAAE,IAAI;QACtB,eAAe,EAAE,IAAI;QACrB,kBAAkB,EAAE,IAAI;QACxB,aAAa,EAAE,UAAU;KAC1B;IACD,GAAG,EAAE;QACH,yEAAyE;QACzE,wEAAwE;QACxE,WAAW,EAAE,CAAC;QACd,UAAU,EAAE,EAAE;QACd,SAAS,EAAE,KAAK;QAChB,gBAAgB,EAAE,IAAI;QACtB,eAAe,EAAE,IAAI;QACrB,kBAAkB,EAAE,EAAE;QACtB,aAAa,EAAE,UAAU;KAC1B;IACD,MAAM,EAAE,EAAE;CACX,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAc,EACd,OAA0D;IAE1D,IAAI,OAAO,EAAE,QAAQ;QAAE,OAAO,OAAO,CAAC,QAAQ,CAAC;IAC/C,IAAI,OAAO,EAAE,aAAa,KAAK,KAAK;QAAE,OAAO,QAAQ,CAAC;IACtD,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v0.4 Site Classifier — runs BEFORE rules to gate which rule set applies.
|
|
3
|
+
*
|
|
4
|
+
* Today the engine runs every rule against every audited site regardless of
|
|
5
|
+
* whether the site is actually programmatic-SEO. A 23-page marketing site
|
|
6
|
+
* gets pSEO-targeted findings that don't apply; a 50,000-page directory
|
|
7
|
+
* gets the same audit shape as a small blog. This pre-flight classifier
|
|
8
|
+
* decides which "kind" of site we're auditing and emits a list of ruleIds
|
|
9
|
+
* that the dispatcher will skip when site type is small-marketing/blog.
|
|
10
|
+
*
|
|
11
|
+
* Heuristics (v1, ships in v0.4):
|
|
12
|
+
* - Sitemap URL count: <50 = bias small/blog; ≥1000 = bias programmatic.
|
|
13
|
+
* - URL-pattern clustering: normalize numeric/slug segments; if top-3
|
|
14
|
+
* templates cover ≥60% of URLs → strong programmatic signal.
|
|
15
|
+
* - Framework signal: pass-through from dev-server detection.
|
|
16
|
+
*
|
|
17
|
+
* Deferred to v0.4.1+ (per spec §4.11):
|
|
18
|
+
* - DOM-skeleton hashing across pages.
|
|
19
|
+
* - Per-cluster classification (mixed pSEO + marketing).
|
|
20
|
+
* - Per-page applicability tagging on findings.
|
|
21
|
+
*/
|
|
22
|
+
export type SiteType = "programmatic-directory" | "small-marketing" | "blog" | "ecommerce" | "unclear";
|
|
23
|
+
export type ClassificationSignal = {
|
|
24
|
+
kind: "sitemap-url-count";
|
|
25
|
+
value: number;
|
|
26
|
+
} | {
|
|
27
|
+
kind: "url-pattern-cluster-coverage";
|
|
28
|
+
topTemplate: string;
|
|
29
|
+
pages: number;
|
|
30
|
+
ratio: number;
|
|
31
|
+
} | {
|
|
32
|
+
kind: "framework-detected";
|
|
33
|
+
value: "nextjs" | "vite" | "astro" | "unknown";
|
|
34
|
+
};
|
|
35
|
+
export interface SiteClassification {
|
|
36
|
+
type: SiteType;
|
|
37
|
+
/** 0–1 confidence. Below 0.6 → treat as `unclear` and run all rules. */
|
|
38
|
+
confidence: number;
|
|
39
|
+
/** Ordered observations that fed the classification. UI surfaces these. */
|
|
40
|
+
signals: ClassificationSignal[];
|
|
41
|
+
/**
|
|
42
|
+
* RuleIds suppressed because of this classification. The dispatcher
|
|
43
|
+
* checks this list before invoking each rule.
|
|
44
|
+
*
|
|
45
|
+
* Empty array when type is `programmatic-directory`, `ecommerce`, or
|
|
46
|
+
* `unclear` — those run all rules.
|
|
47
|
+
*/
|
|
48
|
+
suppressedRules: string[];
|
|
49
|
+
}
|
|
50
|
+
/** Rules suppressed for non-pSEO sites (small-marketing / blog). */
|
|
51
|
+
export declare const PSEO_ONLY_RULE_IDS: readonly string[];
|
|
52
|
+
/**
|
|
53
|
+
* Normalize a pathname into a hashable template by replacing path segments
|
|
54
|
+
* that look like values with type placeholders.
|
|
55
|
+
*
|
|
56
|
+
* Examples:
|
|
57
|
+
* /california/los-angeles/plumbers → /:slug/:slug/:slug
|
|
58
|
+
* /blog/hello-world → /blog/:slug
|
|
59
|
+
* /post/12345 → /post/:n
|
|
60
|
+
* / → /
|
|
61
|
+
*/
|
|
62
|
+
export declare function normalizePathToTemplate(pathname: string): string;
|
|
63
|
+
/** Compute template-cluster ratios from a URL list. Returns top entries first. */
|
|
64
|
+
export declare function clusterUrlTemplates(urls: string[]): Array<{
|
|
65
|
+
template: string;
|
|
66
|
+
count: number;
|
|
67
|
+
ratio: number;
|
|
68
|
+
}>;
|
|
69
|
+
export interface ClassifySiteInput {
|
|
70
|
+
/** All discovered URLs (sitemap + crawl). Unfiltered. */
|
|
71
|
+
urls: string[];
|
|
72
|
+
/** Framework detected via dev-server response headers, if any. */
|
|
73
|
+
framework?: "nextjs" | "vite" | "astro" | "unknown";
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Classify a site from its URL list + framework signal. Pure function.
|
|
77
|
+
*
|
|
78
|
+
* Contract: callers must pass the FULL discovered URL list (sitemap +
|
|
79
|
+
* crawl), not the post-sample list. The classifier needs the raw size
|
|
80
|
+
* signal to distinguish a 5000-page directory from a 25-page sample of one.
|
|
81
|
+
*/
|
|
82
|
+
export declare function classifySite(input: ClassifySiteInput): SiteClassification;
|
|
83
|
+
//# sourceMappingURL=site-classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"site-classifier.d.ts","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,MAAM,QAAQ,GAChB,wBAAwB,GACxB,iBAAiB,GACjB,MAAM,GACN,WAAW,GACX,SAAS,CAAC;AAEd,MAAM,MAAM,oBAAoB,GAC5B;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC5C;IACE,IAAI,EAAE,8BAA8B,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf,GACD;IAAE,IAAI,EAAE,oBAAoB,CAAC;IAAC,KAAK,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAA;CAAE,CAAC;AAEnF,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,QAAQ,CAAC;IACf,wEAAwE;IACxE,UAAU,EAAE,MAAM,CAAC;IACnB,2EAA2E;IAC3E,OAAO,EAAE,oBAAoB,EAAE,CAAC;IAChC;;;;;;OAMG;IACH,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,oEAAoE;AACpE,eAAO,MAAM,kBAAkB,EAAE,SAAS,MAAM,EAK/C,CAAC;AAEF;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAiChE;AAiBD,kFAAkF;AAClF,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAiB7G;AAED,MAAM,WAAW,iBAAiB;IAChC,yDAAyD;IACzD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,kEAAkE;IAClE,SAAS,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC;CACrD;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,iBAAiB,GAAG,kBAAkB,CA6FzE"}
|