@pseolint/core 0.6.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts +46 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -0
- package/dist/algorithms/fact-extraction.js +223 -0
- package/dist/algorithms/fact-extraction.js.map +1 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +55 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/origin-preflight.d.ts +89 -0
- package/dist/origin-preflight.d.ts.map +1 -0
- package/dist/origin-preflight.js +93 -0
- package/dist/origin-preflight.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +1 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +4 -33
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/citation-coverage.d.ts +11 -0
- package/dist/rules/content/citation-coverage.d.ts.map +1 -0
- package/dist/rules/content/citation-coverage.js +43 -0
- package/dist/rules/content/citation-coverage.js.map +1 -0
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +3 -1
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +33 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +1 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +22 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +17 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAE/E,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IACE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC;QACtD,KAAK,IAAI,CAAC,CAAC;IACb,OAAO,KAAK,CAAC;AACf,CAAC;AAYD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,gFAAgF;IAChF,wEAAwE;IACxE,0EAA0E;IAC1E,6EAA6E;IAC7E,yEAAyE;IACzE,qEAAqE;IACrE,uEAAuE;IACvE,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -3,5 +3,11 @@ import type { ParsedPage, RuleResult } from "../../types.js";
|
|
|
3
3
|
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
4
4
|
* internal crawl link to another cluster and no inbound from another cluster.
|
|
5
5
|
*/
|
|
6
|
-
export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string
|
|
6
|
+
export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>,
|
|
7
|
+
/**
|
|
8
|
+
* 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
|
|
9
|
+
* that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
|
|
10
|
+
* unreliable. Only run on a full crawl.
|
|
11
|
+
*/
|
|
12
|
+
sampled?: boolean): RuleResult[];
|
|
7
13
|
//# sourceMappingURL=cluster-connectivity.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC;AACtB;;;;GAIG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CA0Dd"}
|
|
@@ -19,8 +19,14 @@ function hasCrossClusterInbound(clusterDir, urlsInCluster, pages, knownUrls) {
|
|
|
19
19
|
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
20
20
|
* internal crawl link to another cluster and no inbound from another cluster.
|
|
21
21
|
*/
|
|
22
|
-
export function clusterConnectivityRule(pages, knownUrls
|
|
23
|
-
|
|
22
|
+
export function clusterConnectivityRule(pages, knownUrls,
|
|
23
|
+
/**
|
|
24
|
+
* 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
|
|
25
|
+
* that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
|
|
26
|
+
* unreliable. Only run on a full crawl.
|
|
27
|
+
*/
|
|
28
|
+
sampled = false) {
|
|
29
|
+
if (sampled || pages.length < 2) {
|
|
24
30
|
return [];
|
|
25
31
|
}
|
|
26
32
|
const clusterPages = new Map();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;AACtB;;;;GAIG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1,3 +1,10 @@
|
|
|
1
1
|
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
-
export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string
|
|
2
|
+
export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string,
|
|
3
|
+
/**
|
|
4
|
+
* 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
|
|
5
|
+
* given URL is often simply not in the fetched subset, so "0 inbound in this
|
|
6
|
+
* crawl" is not evidence of a real orphan. Orphan detection is only reliable
|
|
7
|
+
* on a full crawl — skip it when sampled rather than flag healthy pages.
|
|
8
|
+
*/
|
|
9
|
+
sampled?: boolean): RuleResult[];
|
|
3
10
|
//# sourceMappingURL=orphan-pages.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM;AAChB;;;;;GAKG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CAqBd"}
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
export function orphanPagesRule(pages, inboundLinks, rootUrl
|
|
1
|
+
export function orphanPagesRule(pages, inboundLinks, rootUrl,
|
|
2
|
+
/**
|
|
3
|
+
* 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
|
|
4
|
+
* given URL is often simply not in the fetched subset, so "0 inbound in this
|
|
5
|
+
* crawl" is not evidence of a real orphan. Orphan detection is only reliable
|
|
6
|
+
* on a full crawl — skip it when sampled rather than flag healthy pages.
|
|
7
|
+
*/
|
|
8
|
+
sampled = false) {
|
|
9
|
+
if (sampled)
|
|
10
|
+
return [];
|
|
2
11
|
const findings = [];
|
|
3
12
|
for (const page of pages) {
|
|
4
13
|
if (rootUrl && page.url === rootUrl) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;
|
|
1
|
+
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;AAChB;;;;;GAKG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA6DvE"}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
export function schemaConsistencyRule(pages) {
|
|
2
2
|
const findings = [];
|
|
3
|
-
|
|
3
|
+
// Group pages by structureSignature so we only compare @type within template clusters.
|
|
4
|
+
// A normal site legitimately mixes types across templates (WebSite on home, Article on
|
|
5
|
+
// blog, Product on listings). Variance is only a problem when pages that share the same
|
|
6
|
+
// template (same structureSignature) use different @type values.
|
|
7
|
+
const clustersBySignature = new Map();
|
|
4
8
|
for (const page of pages) {
|
|
5
9
|
const types = new Set();
|
|
6
10
|
for (const entry of page.jsonLd) {
|
|
@@ -15,30 +19,38 @@ export function schemaConsistencyRule(pages) {
|
|
|
15
19
|
types.add(obj["@type"]);
|
|
16
20
|
}
|
|
17
21
|
}
|
|
18
|
-
if (types.size
|
|
19
|
-
|
|
22
|
+
if (types.size === 0) {
|
|
23
|
+
continue;
|
|
20
24
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
25
|
-
const allTypes = new Set();
|
|
26
|
-
for (const types of typesByPage.values()) {
|
|
27
|
-
for (const t of types) {
|
|
28
|
-
allTypes.add(t);
|
|
25
|
+
const sig = page.structureSignature;
|
|
26
|
+
if (!clustersBySignature.has(sig)) {
|
|
27
|
+
clustersBySignature.set(sig, []);
|
|
29
28
|
}
|
|
29
|
+
clustersBySignature.get(sig).push({ url: page.url, types });
|
|
30
30
|
}
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
// Within each cluster of ≥2 pages, check whether all pages use the same @type set.
|
|
32
|
+
for (const members of clustersBySignature.values()) {
|
|
33
|
+
if (members.length < 2) {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const allTypesInCluster = new Set();
|
|
37
|
+
for (const { types } of members) {
|
|
38
|
+
for (const t of types) {
|
|
39
|
+
allTypesInCluster.add(t);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (allTypesInCluster.size <= 1) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
const typeList = Array.from(allTypesInCluster).sort().join(", ");
|
|
46
|
+
findings.push({
|
|
47
|
+
ruleId: "schema/consistency",
|
|
48
|
+
severity: "info",
|
|
49
|
+
message: `Template pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
50
|
+
relatedUrls: members.map((m) => m.url),
|
|
51
|
+
fix: `Use a consistent @type across all pages that share the same template structure.`
|
|
52
|
+
});
|
|
33
53
|
}
|
|
34
|
-
const typeList = Array.from(allTypes).sort().join(", ");
|
|
35
|
-
findings.push({
|
|
36
|
-
ruleId: "schema/consistency",
|
|
37
|
-
severity: "info",
|
|
38
|
-
message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
39
|
-
relatedUrls: Array.from(typesByPage.keys()),
|
|
40
|
-
fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
|
|
41
|
-
});
|
|
42
54
|
return findings;
|
|
43
55
|
}
|
|
44
56
|
//# sourceMappingURL=consistency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,mFAAmF;IACnF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAU,CAAC;QAC5C,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,0CAA0C,QAAQ,6DAA6D;YACxH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,iFAAiF;SACvF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAmEhD,CAAC;AAEF,4GAA4G;AAC5G,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE3D"}
|
package/dist/rules/scope.js
CHANGED
|
@@ -16,6 +16,7 @@ export const RULE_SCOPE = {
|
|
|
16
16
|
"content/title-uniqueness": "corpus",
|
|
17
17
|
"content/heading-structure": "page",
|
|
18
18
|
"content/image-alt-text": "page",
|
|
19
|
+
"content/citation-coverage": "page",
|
|
19
20
|
// links
|
|
20
21
|
"links/orphan-pages": "corpus",
|
|
21
22
|
"links/dead-ends": "corpus",
|
package/dist/rules/scope.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;
|
|
1
|
+
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;IAChC,2BAA2B,EAAE,MAAM;IAEnC,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IACvC,+BAA+B,EAAE,QAAQ;IAEzC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IACxC,sBAAsB,EAAE,MAAM;IAE9B,SAAS;IACT,sBAAsB,EAAE,MAAM;IAC9B,wBAAwB,EAAE,MAAM;IAChC,oBAAoB,EAAE,QAAQ;IAE9B,kEAAkE;IAClE,kEAAkE;IAClE,uCAAuC;IACvC,sBAAsB,EAAE,QAAQ;IAEhC,eAAe;IACf,sBAAsB,EAAE,MAAM;IAC9B,6BAA6B,EAAE,QAAQ;IAEvC,iBAAiB;IACjB,qBAAqB,EAAE,QAAQ;IAE/B,oCAAoC;IACpC,cAAc,EAAE,QAAQ;IACxB,oBAAoB,EAAE,QAAQ;IAC9B,uBAAuB,EAAE,MAAM;IAC/B,kBAAkB,EAAE,MAAM;IAC1B,kBAAkB,EAAE,MAAM;IAC1B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,MAAM;IAChC,kBAAkB,EAAE,MAAM;CAC3B,CAAC;AAEF,4GAA4G;AAC5G,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,KAAK,MAAM,CAAC;AACrD,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAsBrD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAgDhD"}
|
|
@@ -1,23 +1,65 @@
|
|
|
1
1
|
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
2
|
import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
|
|
3
|
+
/**
|
|
4
|
+
* Compute masking coverage: fraction of pages where at least one entity token
|
|
5
|
+
* was replaced by a placeholder. A page "benefited" from masking when its
|
|
6
|
+
* masked text differs from the original.
|
|
7
|
+
*
|
|
8
|
+
* ponytail: threshold is <20% of pages masked → low coverage (weak entity signal).
|
|
9
|
+
* Zero patterns supplied is a degenerate case and always yields low coverage.
|
|
10
|
+
*/
|
|
11
|
+
function maskingCoverage(pages, patterns) {
|
|
12
|
+
if (patterns.length === 0 || pages.length === 0)
|
|
13
|
+
return 0;
|
|
14
|
+
let touched = 0;
|
|
15
|
+
for (const page of pages) {
|
|
16
|
+
const masked = maskEntities(page.contentText, patterns);
|
|
17
|
+
if (masked !== page.contentText)
|
|
18
|
+
touched += 1;
|
|
19
|
+
}
|
|
20
|
+
return touched / pages.length;
|
|
21
|
+
}
|
|
22
|
+
const LOW_COVERAGE_THRESHOLD = 0.2; // ponytail: <20% pages masked → low-confidence signal
|
|
3
23
|
export function entitySwapRule(pages, patterns, threshold) {
|
|
4
24
|
const findings = [];
|
|
5
25
|
const pairs = [];
|
|
6
26
|
const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
|
|
27
|
+
const coverage = maskingCoverage(pages, patterns);
|
|
28
|
+
const isLowCoverage = coverage < LOW_COVERAGE_THRESHOLD;
|
|
7
29
|
for (let i = 0; i < pages.length; i += 1) {
|
|
8
30
|
for (let j = i + 1; j < pages.length; j += 1) {
|
|
9
31
|
const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
|
|
10
32
|
if (similarity >= threshold) {
|
|
11
33
|
pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
34
|
+
if (isLowCoverage) {
|
|
35
|
+
// Weak/absent entity patterns mean masking barely changed the text;
|
|
36
|
+
// this finding overlaps a plain near-duplicate signal, not a confirmed
|
|
37
|
+
// entity-swap. Downgrade to warning with low confidence.
|
|
38
|
+
findings.push({
|
|
39
|
+
ruleId: "spam/entity-swap",
|
|
40
|
+
severity: "warning",
|
|
41
|
+
confidence: "low",
|
|
42
|
+
message: `${pages[i].url} and ${pages[j].url} are near-identical, but entity masking ` +
|
|
43
|
+
`coverage is too low to confirm an entity-swap pattern (masking touched ` +
|
|
44
|
+
`${Math.round(coverage * 100)}% of pages). ` +
|
|
45
|
+
`Provide entity patterns or treat this as a near-duplicate finding instead.`,
|
|
46
|
+
pageUrl: pages[i].url,
|
|
47
|
+
relatedUrls: [pages[j].url],
|
|
48
|
+
similarity,
|
|
49
|
+
fix: "Supply entity patterns (city names, states, product names) so the rule can confirm whether these pages are entity-swapped templates. If no entity patterns apply, address as near-duplicate spam instead."
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/entity-swap",
|
|
55
|
+
severity: "critical",
|
|
56
|
+
message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
|
|
57
|
+
pageUrl: pages[i].url,
|
|
58
|
+
relatedUrls: [pages[j].url],
|
|
59
|
+
similarity,
|
|
60
|
+
fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
|
|
61
|
+
});
|
|
62
|
+
}
|
|
21
63
|
}
|
|
22
64
|
}
|
|
23
65
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAmB,EAAE,QAA6B;IACzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1D,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,KAAK,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;AAChC,CAAC;AAED,MAAM,sBAAsB,GAAG,GAAG,CAAC,CAAC,sDAAsD;AAE1F,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,QAAQ,GAAG,sBAAsB,CAAC;IAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAE1E,IAAI,aAAa,EAAE,CAAC;oBAClB,oEAAoE;oBACpE,uEAAuE;oBACvE,0DAA0D;oBAC1D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,SAAS;wBACnB,UAAU,EAAE,KAAK;wBACjB,OAAO,EACL,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,0CAA0C;4BAC7E,yEAAyE;4BACzE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,eAAe;4BAC5C,4EAA4E;wBAC9E,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,2MAA2M;qBACjN,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,UAAU;wBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;wBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,gKAAgK;qBACtK,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,CAkC1D"}
|
|
@@ -19,7 +19,11 @@ export function thinContentRule(pages, minWords) {
|
|
|
19
19
|
: "";
|
|
20
20
|
findings.push({
|
|
21
21
|
ruleId: "spam/thin-content",
|
|
22
|
-
|
|
22
|
+
// High confidence (far below the floor) is an error; the medium band — which
|
|
23
|
+
// the rule itself flags as "could legitimately be a short page" — is a
|
|
24
|
+
// warning, not a ship-blocker. The page still joins thinContentUrls either
|
|
25
|
+
// way so spam/doorway-pattern can stack on it.
|
|
26
|
+
severity: confidence === "high" ? "error" : "warning",
|
|
23
27
|
confidence,
|
|
24
28
|
message: `${page.url} has thin content (${words} words).${shortPageNote}`,
|
|
25
29
|
fix: `Add at least ${minWords - words} more words of substantive content relevant to this page's specific topic.`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ,EAAE,OAAO;
|
|
1
|
+
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,6EAA6E;YAC7E,uEAAuE;YACvE,2EAA2E;YAC3E,+CAA+C;YAC/C,QAAQ,EAAE,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACrD,UAAU;YACV,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sBAAsB,KAAK,WAAW,aAAa,EAAE;YACzE,GAAG,EAAE,gBAAgB,QAAQ,GAAG,KAAK,4EAA4E;SAClH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;
|
|
1
|
+
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;AAWD,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,aAAa,EAAE,mBAAmB,GACjC,UAAU,EAAE,CA+Kd"}
|
|
@@ -16,8 +16,22 @@ export function resolveCanonicalUrl(canonical, pageUrl, normalizeOpts) {
|
|
|
16
16
|
}
|
|
17
17
|
return normalizeAuditUrl(resolve(dirname(pageUrl), raw), normalizeOpts);
|
|
18
18
|
}
|
|
19
|
+
/** Extract the hostname from a URL string, or null if unparseable. */
|
|
20
|
+
function extractHost(url) {
|
|
21
|
+
try {
|
|
22
|
+
return new URL(url).hostname;
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
19
28
|
export function canonicalConsistencyRule(pages, knownUrls, normalizeOpts) {
|
|
29
|
+
// ── Pass 1: collect out-of-scope findings per page ──────────────────────────
|
|
30
|
+
// We separate "out-of-scope" (canonical host ≠ page host, and not in knownUrls)
|
|
31
|
+
// from other findings so we can decide whether to collapse them.
|
|
20
32
|
const findings = [];
|
|
33
|
+
// Map from canonical-target-host → array of (pageUrl, canonicalUrl) that had that host
|
|
34
|
+
const outOfScopeByTargetHost = new Map();
|
|
21
35
|
for (const page of pages) {
|
|
22
36
|
if (!page.canonical) {
|
|
23
37
|
findings.push({
|
|
@@ -40,35 +54,137 @@ export function canonicalConsistencyRule(pages, knownUrls, normalizeOpts) {
|
|
|
40
54
|
});
|
|
41
55
|
continue;
|
|
42
56
|
}
|
|
43
|
-
if (canonicalUrl === page.url)
|
|
57
|
+
if (canonicalUrl === page.url) {
|
|
58
|
+
// Self-canonical: still check HTTP header conflict
|
|
59
|
+
if (page.httpMeta?.linkHeader) {
|
|
60
|
+
const linkCanonicalMatch = page.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
|
|
61
|
+
if (linkCanonicalMatch) {
|
|
62
|
+
const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
|
|
63
|
+
const htmlCanonical = resolveCanonicalUrl(page.canonical, page.url, normalizeOpts);
|
|
64
|
+
if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
|
|
65
|
+
findings.push({
|
|
66
|
+
ruleId: "tech/canonical-consistency",
|
|
67
|
+
severity: "error",
|
|
68
|
+
message: `${page.url} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
|
|
69
|
+
pageUrl: page.url,
|
|
70
|
+
relatedUrls: [htmlCanonical, httpCanonical],
|
|
71
|
+
fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
44
76
|
continue;
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
77
|
+
}
|
|
78
|
+
// canonical differs from page.url
|
|
79
|
+
const pageHost = extractHost(page.url);
|
|
80
|
+
const canonicalHost = extractHost(canonicalUrl);
|
|
81
|
+
const isOutOfScope = !knownUrls.has(canonicalUrl);
|
|
82
|
+
const isCrossHost = pageHost !== null && canonicalHost !== null && canonicalHost !== pageHost;
|
|
83
|
+
if (isOutOfScope && isCrossHost) {
|
|
84
|
+
// Candidate for collapsing — defer into the bucket keyed by target host
|
|
85
|
+
const bucket = outOfScopeByTargetHost.get(canonicalHost) ?? [];
|
|
86
|
+
bucket.push({ pageUrl: page.url, canonicalUrl });
|
|
87
|
+
outOfScopeByTargetHost.set(canonicalHost, bucket);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
// Either within-scope (warning) or same-host out-of-scope — emit per-page
|
|
91
|
+
findings.push({
|
|
92
|
+
ruleId: "tech/canonical-consistency",
|
|
93
|
+
severity: knownUrls.has(canonicalUrl) ? "warning" : "info",
|
|
94
|
+
message: knownUrls.has(canonicalUrl)
|
|
95
|
+
? `${page.url} canonicalizes to another crawled page (${canonicalUrl}).`
|
|
96
|
+
: `${page.url} canonicalizes outside the crawl scope (${canonicalUrl}).`,
|
|
97
|
+
pageUrl: page.url,
|
|
98
|
+
relatedUrls: [canonicalUrl],
|
|
99
|
+
fix: "Verify this canonical target is intentional."
|
|
100
|
+
});
|
|
101
|
+
// HTTP header conflict check (only when we haven't already decided to collapse)
|
|
102
|
+
if (page.httpMeta?.linkHeader) {
|
|
103
|
+
const linkCanonicalMatch = page.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
|
|
104
|
+
if (linkCanonicalMatch) {
|
|
105
|
+
const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
|
|
106
|
+
const htmlCanonical = resolveCanonicalUrl(page.canonical, page.url, normalizeOpts);
|
|
107
|
+
if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
|
|
108
|
+
findings.push({
|
|
109
|
+
ruleId: "tech/canonical-consistency",
|
|
110
|
+
severity: "error",
|
|
111
|
+
message: `${page.url} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
|
|
112
|
+
pageUrl: page.url,
|
|
113
|
+
relatedUrls: [htmlCanonical, httpCanonical],
|
|
114
|
+
fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// ── Pass 2: collapse uniform out-of-scope buckets ───────────────────────────
|
|
122
|
+
// Strategy: if ALL pages point to the SAME alternate host (one bucket with all
|
|
123
|
+
// out-of-scope cross-host pages), emit ONE site-level info. If multiple target
|
|
124
|
+
// hosts exist (inconsistent), keep per-page findings.
|
|
125
|
+
const buckets = [...outOfScopeByTargetHost.entries()];
|
|
126
|
+
if (buckets.length === 0) {
|
|
127
|
+
// Nothing to collapse
|
|
128
|
+
}
|
|
129
|
+
else if (buckets.length === 1) {
|
|
130
|
+
// Every cross-host out-of-scope canonical goes to the same alternate host → collapse
|
|
131
|
+
const [targetHost, entries] = buckets[0];
|
|
132
|
+
const count = entries.length;
|
|
133
|
+
// Infer the crawled host from the first page in the bucket
|
|
134
|
+
const crawledHost = extractHost(entries[0].pageUrl) ?? "the crawled host";
|
|
135
|
+
// If there is only ONE page total pointing to the alternate host,
|
|
136
|
+
// still emit a per-page finding (no "site-level" implication with a single page).
|
|
137
|
+
if (count === 1) {
|
|
138
|
+
const { pageUrl, canonicalUrl } = entries[0];
|
|
139
|
+
findings.push({
|
|
140
|
+
ruleId: "tech/canonical-consistency",
|
|
141
|
+
severity: "info",
|
|
142
|
+
message: `${pageUrl} canonicalizes outside the crawl scope (${canonicalUrl}).`,
|
|
143
|
+
pageUrl,
|
|
144
|
+
relatedUrls: [canonicalUrl],
|
|
145
|
+
fix: "Verify this canonical target is intentional."
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
findings.push({
|
|
150
|
+
ruleId: "tech/canonical-consistency",
|
|
151
|
+
severity: "info",
|
|
152
|
+
message: `${count} pages canonicalize to ${targetHost}, outside the crawled host ${crawledHost} — expected if you crawled a staging/preview origin.`,
|
|
153
|
+
relatedUrls: entries.map((e) => e.canonicalUrl).slice(0, 10),
|
|
154
|
+
fix: "If this site is live at the canonical host, the canonicals are correct. If not, verify the canonical URLs."
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// Multiple target hosts — inconsistent cross-host canonicals → per-page findings
|
|
160
|
+
for (const [, entries] of buckets) {
|
|
161
|
+
for (const { pageUrl, canonicalUrl } of entries) {
|
|
162
|
+
findings.push({
|
|
163
|
+
ruleId: "tech/canonical-consistency",
|
|
164
|
+
severity: "info",
|
|
165
|
+
message: `${pageUrl} canonicalizes outside the crawl scope (${canonicalUrl}).`,
|
|
166
|
+
pageUrl,
|
|
167
|
+
relatedUrls: [canonicalUrl],
|
|
168
|
+
fix: "Verify this canonical target is intentional."
|
|
169
|
+
});
|
|
170
|
+
// HTTP header conflict check for deferred pages
|
|
171
|
+
const pageDef = pages.find((p) => p.url === pageUrl);
|
|
172
|
+
if (pageDef?.httpMeta?.linkHeader) {
|
|
173
|
+
const linkCanonicalMatch = pageDef.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
|
|
174
|
+
if (linkCanonicalMatch) {
|
|
175
|
+
const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
|
|
176
|
+
const htmlCanonical = resolveCanonicalUrl(pageDef.canonical, pageDef.url, normalizeOpts);
|
|
177
|
+
if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
|
|
178
|
+
findings.push({
|
|
179
|
+
ruleId: "tech/canonical-consistency",
|
|
180
|
+
severity: "error",
|
|
181
|
+
message: `${pageUrl} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
|
|
182
|
+
pageUrl,
|
|
183
|
+
relatedUrls: [htmlCanonical, httpCanonical],
|
|
184
|
+
fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
72
188
|
}
|
|
73
189
|
}
|
|
74
190
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-consistency.js","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,OAAe,EACf,aAAkC;IAElC,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,iBAAiB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAE5E,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,iBAAiB,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACtE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,CAAC,EAAE,aAAa,CAAC,CAAC;AAC1E,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,KAAmB,EACnB,SAAsB,EACtB,aAAkC;IAElC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8BAA8B;gBAClD,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,mCAAmC,IAAI,CAAC,GAAG,qBAAqB;aACtE,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,kCAAkC,IAAI,CAAC,SAAS,GAAG;gBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,+BAA+B;aACrC,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG;
|
|
1
|
+
{"version":3,"file":"canonical-consistency.js","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,OAAe,EACf,aAAkC;IAElC,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,iBAAiB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAE5E,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,iBAAiB,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACtE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,CAAC,EAAE,aAAa,CAAC,CAAC;AAC1E,CAAC;AAED,sEAAsE;AACtE,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,KAAmB,EACnB,SAAsB,EACtB,aAAkC;IAElC,+EAA+E;IAC/E,gFAAgF;IAChF,iEAAiE;IAEjE,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,MAAM,sBAAsB,GAAG,IAAI,GAAG,EAA4D,CAAC;IAEnG,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8BAA8B;gBAClD,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,mCAAmC,IAAI,CAAC,GAAG,qBAAqB;aACtE,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,kCAAkC,IAAI,CAAC,SAAS,GAAG;gBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,+BAA+B;aACrC,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;YAC9B,mDAAmD;YACnD,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAC3F,IAAI,kBAAkB,EAAE,CAAC;oBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;oBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;oBACnF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;wBACtE,QAAQ,CAAC,IAAI,CAAC;4BACZ,MAAM,EAAE,4BAA4B;4BACpC,QAAQ,EAAE,OAAO;4BACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;4BAC1H,OAAO,EAAE,IAAI,CAAC,GAAG;4BACjB,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;4BAC3C,GAAG,EAAE,yFAAyF;yBAC/F,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YACD,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,aAAa,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;QAChD,MAAM,YAAY,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAClD,MAAM,WAAW,GAAG,QAAQ,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,IAAI,aAAa,KAAK,QAAQ,CAAC;QAE9F,IAAI,YAAY,IAAI,WAAW,EAAE,CAAC;YAChC,wEAAwE;YACxE,MAAM,MAAM,GAAG,sBAAsB,CAAC,GAAG,CAAC,aAAc,CAAC,IAAI,EAAE,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,YAAY,EAAE,CAAC,CAAC;YACjD,sBAAsB,CAAC,GAAG,CAAC,aAAc,EAAE,MAAM,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,0EAA0E;YAC1E,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM;gBAC1D,OAAO,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC;oBAClC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;oBACxE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;gBAC1E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,WAAW,EAAE,CAAC,YAAY,CAAC;gBAC3B,GAAG,EAAE,8CAA8C;aACpD,CAAC,CAAC;YAEH,gFAAgF;YAChF,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAC3F,IAAI,kBAAkB,EAAE,CAAC;oBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;oBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;oBACnF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;wBACtE,QAAQ,CAAC,IAAI,CAAC;4BACZ,MAAM,EAAE,4BAA4B;4BACpC,QAAQ,EAAE,OAAO;4BACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;4BAC1H,OAAO,EAAE,IAAI,CAAC,GAAG;4BACjB,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;4BAC3C,GAAG,EAAE,yFAAyF;yBAC/F,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,+EAA+E;IAC/E,+EAA+E;IAC/E,+EAA+E;IAC/E,sDAAsD;IAEtD,MAAM,OAAO,GAAG,CAAC,GAAG,sBAAsB,CAAC,OAAO,EAAE,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,sBAAsB;IACxB,CAAC;SAAM,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,qFAAqF;QACrF,MAAM,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,2DAA2D;QAC3D,MAAM,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,kBAAkB,CAAC;QAE1E,kEAAkE;QAClE,kFAAkF;QAClF,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YAChB,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,GAAG,OAAO,2CAA2C,YAAY,IAAI;gBAC9E,OAAO;gBACP,WAAW,EAAE,CAAC,YAAY,CAAC;gBAC3B,GAAG,EAAE,8CAA8C;aACpD,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,GAAG,KAAK,0BAA0B,UAAU,8BAA8B,WAAW,sDAAsD;gBACpJ,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBAC5D,GAAG,EAAE,4GAA4G;aAClH,CAAC,CAAC;QACL,CAAC;IACH,CAAC;SAAM,CAAC;QACN,iFAAiF;QACjF,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,OAAO,EAAE,CAAC;YAClC,KAAK,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,IAAI,OAAO,EAAE,CAAC;gBAChD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,4BAA4B;oBACpC,QAAQ,EAAE,MAAM;oBAChB,OAAO,EAAE,GAAG,OAAO,2CAA2C,YAAY,IAAI;oBAC9E,OAAO;oBACP,WAAW,EAAE,CAAC,YAAY,CAAC;oBAC3B,GAAG,EAAE,8CAA8C;iBACpD,CAAC,CAAC;gBAEH,gDAAgD;gBAChD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,OAAO,CAAC,CAAC;gBACrD,IAAI,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;oBAClC,MAAM,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;oBAC9F,IAAI,kBAAkB,EAAE,CAAC;wBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;wBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;wBACzF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;4BACtE,QAAQ,CAAC,IAAI,CAAC;gCACZ,MAAM,EAAE,4BAA4B;gCACpC,QAAQ,EAAE,OAAO;gCACjB,OAAO,EAAE,GAAG,OAAO,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;gCACzH,OAAO;gCACP,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;gCAC3C,GAAG,EAAE,yFAAyF;6BAC/F,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1,3 +1,15 @@
|
|
|
1
|
-
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
-
export
|
|
1
|
+
import type { NormalizeUrlOptions, ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface SitemapCompletenessOptions {
|
|
3
|
+
/**
|
|
4
|
+
* True when the audit ran on a sampled or link-discovery crawl.
|
|
5
|
+
* On sampled crawls it is normal to find pages not listed in the sitemap
|
|
6
|
+
* (they were discovered via links, not sitemap), so the aggregate
|
|
7
|
+
* "missing from sitemap" finding is demoted to `warning`.
|
|
8
|
+
* Wire this from the auditor's `isSampledAudit` flag.
|
|
9
|
+
*/
|
|
10
|
+
sampled?: boolean;
|
|
11
|
+
/** URL normalization options that match what the auditor used when building page.url. */
|
|
12
|
+
normalizeUrlOptions?: NormalizeUrlOptions;
|
|
13
|
+
}
|
|
14
|
+
export declare function sitemapCompletenessRule(pages: ParsedPage[], sitemapUrls: Set<string>, options?: SitemapCompletenessOptions): RuleResult[];
|
|
3
15
|
//# sourceMappingURL=sitemap-completeness.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sitemap-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sitemap-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAElF,MAAM,WAAW,0BAA0B;IACzC;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,yFAAyF;IACzF,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;CAC3C;AAED,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EACxB,OAAO,CAAC,EAAE,0BAA0B,GACnC,UAAU,EAAE,CA6Ed"}
|