@pseolint/core 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -1
- package/dist/algorithms/fact-extraction.js +6 -0
- package/dist/algorithms/fact-extraction.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +39 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +33 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +16 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +109 -93
|
@@ -1,51 +1,58 @@
|
|
|
1
1
|
function tokenize(text) {
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
// the "unique" count (a word that's shared but happens to carry a trailing
|
|
5
|
-
// comma on one page looked unique) — false precision in the shared/unique
|
|
6
|
-
// split this rule now surfaces.
|
|
2
|
+
// Lowercase, split on whitespace, strip edge punctuation so "word", "word."
|
|
3
|
+
// and "(word)" are one token.
|
|
7
4
|
return text
|
|
8
5
|
.toLowerCase()
|
|
9
6
|
.split(/\s+/)
|
|
10
7
|
.map((t) => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
|
|
11
8
|
.filter(Boolean);
|
|
12
9
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
|
|
12
|
+
* token is weighted by normalized IDF (ln(N/df)/ln(N)) — 1 if page-exclusive, ~0
|
|
13
|
+
* if on every page — and averaged over the page's distinct tokens. A near-
|
|
14
|
+
* duplicate / boilerplate page scores low regardless of corpus size or length; a
|
|
15
|
+
* large original page stays high. Continuous, so it doesn't shuffle at the margin.
|
|
16
|
+
* Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
|
|
17
|
+
*/
|
|
18
|
+
export function uniqueValueRule(pages, thresholds) {
|
|
19
|
+
const { passBelow, errorBelow } = thresholds;
|
|
20
|
+
const N = pages.length;
|
|
21
|
+
const lnN = Math.log(N);
|
|
22
|
+
if (N <= 1 || lnN === 0)
|
|
23
|
+
return []; // can't measure rarity against a single page
|
|
24
|
+
const df = new Map();
|
|
25
|
+
const pageDistinct = pages.map((p) => new Set(tokenize(p.contentText)));
|
|
26
|
+
for (const distinct of pageDistinct) {
|
|
27
|
+
for (const t of distinct)
|
|
28
|
+
df.set(t, (df.get(t) ?? 0) + 1);
|
|
20
29
|
}
|
|
21
30
|
const findings = [];
|
|
22
|
-
pages.forEach((page,
|
|
23
|
-
const distinct =
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
});
|
|
48
|
-
}
|
|
31
|
+
pages.forEach((page, i) => {
|
|
32
|
+
const distinct = pageDistinct[i];
|
|
33
|
+
if (distinct.size === 0)
|
|
34
|
+
return; // empty page → thin-content handles it
|
|
35
|
+
let mass = 0;
|
|
36
|
+
for (const t of distinct)
|
|
37
|
+
mass += Math.log(N / (df.get(t) ?? 1)) / lnN;
|
|
38
|
+
const density = mass / distinct.size;
|
|
39
|
+
if (density >= passBelow)
|
|
40
|
+
return;
|
|
41
|
+
const severity = density < errorBelow ? "error" : "info";
|
|
42
|
+
const pct = (density * 100).toFixed(1);
|
|
43
|
+
findings.push({
|
|
44
|
+
ruleId: "content/unique-value",
|
|
45
|
+
severity,
|
|
46
|
+
message: `${page.url} has low unique-content density ${density.toFixed(3)} ` +
|
|
47
|
+
`(${pct}% of its ${distinct.size} distinct words are page-distinctive; floor ${passBelow.toFixed(2)}). ` +
|
|
48
|
+
`Most of its vocabulary also appears on other pages.`,
|
|
49
|
+
pageUrl: page.url,
|
|
50
|
+
fix: `Raise originality density: add page-specific text — a distinct lead, this ` +
|
|
51
|
+
`record's own facts, page-specific examples. Content repeated across pages on ` +
|
|
52
|
+
`the same axis (boilerplate, shared legal/spec blocks, per-axis data like a ` +
|
|
53
|
+
`role's regulations across that role's documents) is common vocabulary and ` +
|
|
54
|
+
`does NOT raise density, even when it is useful.`,
|
|
55
|
+
});
|
|
49
56
|
});
|
|
50
57
|
return findings;
|
|
51
58
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AASA,SAAS,QAAQ,CAAC,IAAY;IAC5B,4EAA4E;IAC5E,8BAA8B;IAC9B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,UAAiC;IAEjC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,UAAU,CAAC;IAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,CAAC,6CAA6C;IAEjF,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IACxE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,CAAC,uCAAuC;QACxE,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QACrC,IAAI,OAAO,IAAI,SAAS;YAAE,OAAO;QAEjC,MAAM,QAAQ,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QACzD,MAAM,GAAG,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,mCAAmC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACnE,IAAI,GAAG,YAAY,QAAQ,CAAC,IAAI,+CAA+C,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACxG,qDAAqD;YACvD,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,+EAA+E;gBAC/E,6EAA6E;gBAC7E,4EAA4E;gBAC5E,iDAAiD;SACpD,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -3,5 +3,11 @@ import type { ParsedPage, RuleResult } from "../../types.js";
|
|
|
3
3
|
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
4
4
|
* internal crawl link to another cluster and no inbound from another cluster.
|
|
5
5
|
*/
|
|
6
|
-
export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string
|
|
6
|
+
export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>,
|
|
7
|
+
/**
|
|
8
|
+
* 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
|
|
9
|
+
* that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
|
|
10
|
+
* unreliable. Only run on a full crawl.
|
|
11
|
+
*/
|
|
12
|
+
sampled?: boolean): RuleResult[];
|
|
7
13
|
//# sourceMappingURL=cluster-connectivity.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC;AACtB;;;;GAIG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CA0Dd"}
|
|
@@ -19,8 +19,14 @@ function hasCrossClusterInbound(clusterDir, urlsInCluster, pages, knownUrls) {
|
|
|
19
19
|
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
20
20
|
* internal crawl link to another cluster and no inbound from another cluster.
|
|
21
21
|
*/
|
|
22
|
-
export function clusterConnectivityRule(pages, knownUrls
|
|
23
|
-
|
|
22
|
+
export function clusterConnectivityRule(pages, knownUrls,
|
|
23
|
+
/**
|
|
24
|
+
* 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
|
|
25
|
+
* that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
|
|
26
|
+
* unreliable. Only run on a full crawl.
|
|
27
|
+
*/
|
|
28
|
+
sampled = false) {
|
|
29
|
+
if (sampled || pages.length < 2) {
|
|
24
30
|
return [];
|
|
25
31
|
}
|
|
26
32
|
const clusterPages = new Map();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;AACtB;;;;GAIG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1,3 +1,10 @@
|
|
|
1
1
|
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
-
export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string
|
|
2
|
+
export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string,
|
|
3
|
+
/**
|
|
4
|
+
* 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
|
|
5
|
+
* given URL is often simply not in the fetched subset, so "0 inbound in this
|
|
6
|
+
* crawl" is not evidence of a real orphan. Orphan detection is only reliable
|
|
7
|
+
* on a full crawl — skip it when sampled rather than flag healthy pages.
|
|
8
|
+
*/
|
|
9
|
+
sampled?: boolean): RuleResult[];
|
|
3
10
|
//# sourceMappingURL=orphan-pages.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM;AAChB;;;;;GAKG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CAqBd"}
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
export function orphanPagesRule(pages, inboundLinks, rootUrl
|
|
1
|
+
export function orphanPagesRule(pages, inboundLinks, rootUrl,
|
|
2
|
+
/**
|
|
3
|
+
* 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
|
|
4
|
+
* given URL is often simply not in the fetched subset, so "0 inbound in this
|
|
5
|
+
* crawl" is not evidence of a real orphan. Orphan detection is only reliable
|
|
6
|
+
* on a full crawl — skip it when sampled rather than flag healthy pages.
|
|
7
|
+
*/
|
|
8
|
+
sampled = false) {
|
|
9
|
+
if (sampled)
|
|
10
|
+
return [];
|
|
2
11
|
const findings = [];
|
|
3
12
|
for (const page of pages) {
|
|
4
13
|
if (rootUrl && page.url === rootUrl) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;
|
|
1
|
+
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;AAChB;;;;;GAKG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA6DvE"}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
export function schemaConsistencyRule(pages) {
|
|
2
2
|
const findings = [];
|
|
3
|
-
|
|
3
|
+
// Group pages by structureSignature so we only compare @type within template clusters.
|
|
4
|
+
// A normal site legitimately mixes types across templates (WebSite on home, Article on
|
|
5
|
+
// blog, Product on listings). Variance is only a problem when pages that share the same
|
|
6
|
+
// template (same structureSignature) use different @type values.
|
|
7
|
+
const clustersBySignature = new Map();
|
|
4
8
|
for (const page of pages) {
|
|
5
9
|
const types = new Set();
|
|
6
10
|
for (const entry of page.jsonLd) {
|
|
@@ -15,30 +19,38 @@ export function schemaConsistencyRule(pages) {
|
|
|
15
19
|
types.add(obj["@type"]);
|
|
16
20
|
}
|
|
17
21
|
}
|
|
18
|
-
if (types.size
|
|
19
|
-
|
|
22
|
+
if (types.size === 0) {
|
|
23
|
+
continue;
|
|
20
24
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
25
|
-
const allTypes = new Set();
|
|
26
|
-
for (const types of typesByPage.values()) {
|
|
27
|
-
for (const t of types) {
|
|
28
|
-
allTypes.add(t);
|
|
25
|
+
const sig = page.structureSignature;
|
|
26
|
+
if (!clustersBySignature.has(sig)) {
|
|
27
|
+
clustersBySignature.set(sig, []);
|
|
29
28
|
}
|
|
29
|
+
clustersBySignature.get(sig).push({ url: page.url, types });
|
|
30
30
|
}
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
// Within each cluster of ≥2 pages, check whether all pages use the same @type set.
|
|
32
|
+
for (const members of clustersBySignature.values()) {
|
|
33
|
+
if (members.length < 2) {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const allTypesInCluster = new Set();
|
|
37
|
+
for (const { types } of members) {
|
|
38
|
+
for (const t of types) {
|
|
39
|
+
allTypesInCluster.add(t);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (allTypesInCluster.size <= 1) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
const typeList = Array.from(allTypesInCluster).sort().join(", ");
|
|
46
|
+
findings.push({
|
|
47
|
+
ruleId: "schema/consistency",
|
|
48
|
+
severity: "info",
|
|
49
|
+
message: `Template pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
50
|
+
relatedUrls: members.map((m) => m.url),
|
|
51
|
+
fix: `Use a consistent @type across all pages that share the same template structure.`
|
|
52
|
+
});
|
|
33
53
|
}
|
|
34
|
-
const typeList = Array.from(allTypes).sort().join(", ");
|
|
35
|
-
findings.push({
|
|
36
|
-
ruleId: "schema/consistency",
|
|
37
|
-
severity: "info",
|
|
38
|
-
message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
39
|
-
relatedUrls: Array.from(typesByPage.keys()),
|
|
40
|
-
fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
|
|
41
|
-
});
|
|
42
54
|
return findings;
|
|
43
55
|
}
|
|
44
56
|
//# sourceMappingURL=consistency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,mFAAmF;IACnF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAU,CAAC;QAC5C,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,0CAA0C,QAAQ,6DAA6D;YACxH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,iFAAiF;SACvF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAsBrD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAgDhD"}
|
|
@@ -1,23 +1,65 @@
|
|
|
1
1
|
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
2
|
import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
|
|
3
|
+
/**
|
|
4
|
+
* Compute masking coverage: fraction of pages where at least one entity token
|
|
5
|
+
* was replaced by a placeholder. A page "benefited" from masking when its
|
|
6
|
+
* masked text differs from the original.
|
|
7
|
+
*
|
|
8
|
+
* ponytail: threshold is <20% of pages masked → low coverage (weak entity signal).
|
|
9
|
+
* Zero patterns supplied is a degenerate case and always yields low coverage.
|
|
10
|
+
*/
|
|
11
|
+
function maskingCoverage(pages, patterns) {
|
|
12
|
+
if (patterns.length === 0 || pages.length === 0)
|
|
13
|
+
return 0;
|
|
14
|
+
let touched = 0;
|
|
15
|
+
for (const page of pages) {
|
|
16
|
+
const masked = maskEntities(page.contentText, patterns);
|
|
17
|
+
if (masked !== page.contentText)
|
|
18
|
+
touched += 1;
|
|
19
|
+
}
|
|
20
|
+
return touched / pages.length;
|
|
21
|
+
}
|
|
22
|
+
const LOW_COVERAGE_THRESHOLD = 0.2; // ponytail: <20% pages masked → low-confidence signal
|
|
3
23
|
export function entitySwapRule(pages, patterns, threshold) {
|
|
4
24
|
const findings = [];
|
|
5
25
|
const pairs = [];
|
|
6
26
|
const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
|
|
27
|
+
const coverage = maskingCoverage(pages, patterns);
|
|
28
|
+
const isLowCoverage = coverage < LOW_COVERAGE_THRESHOLD;
|
|
7
29
|
for (let i = 0; i < pages.length; i += 1) {
|
|
8
30
|
for (let j = i + 1; j < pages.length; j += 1) {
|
|
9
31
|
const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
|
|
10
32
|
if (similarity >= threshold) {
|
|
11
33
|
pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
34
|
+
if (isLowCoverage) {
|
|
35
|
+
// Weak/absent entity patterns mean masking barely changed the text;
|
|
36
|
+
// this finding overlaps a plain near-duplicate signal, not a confirmed
|
|
37
|
+
// entity-swap. Downgrade to warning with low confidence.
|
|
38
|
+
findings.push({
|
|
39
|
+
ruleId: "spam/entity-swap",
|
|
40
|
+
severity: "warning",
|
|
41
|
+
confidence: "low",
|
|
42
|
+
message: `${pages[i].url} and ${pages[j].url} are near-identical, but entity masking ` +
|
|
43
|
+
`coverage is too low to confirm an entity-swap pattern (masking touched ` +
|
|
44
|
+
`${Math.round(coverage * 100)}% of pages). ` +
|
|
45
|
+
`Provide entity patterns or treat this as a near-duplicate finding instead.`,
|
|
46
|
+
pageUrl: pages[i].url,
|
|
47
|
+
relatedUrls: [pages[j].url],
|
|
48
|
+
similarity,
|
|
49
|
+
fix: "Supply entity patterns (city names, states, product names) so the rule can confirm whether these pages are entity-swapped templates. If no entity patterns apply, address as near-duplicate spam instead."
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/entity-swap",
|
|
55
|
+
severity: "critical",
|
|
56
|
+
message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
|
|
57
|
+
pageUrl: pages[i].url,
|
|
58
|
+
relatedUrls: [pages[j].url],
|
|
59
|
+
similarity,
|
|
60
|
+
fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
|
|
61
|
+
});
|
|
62
|
+
}
|
|
21
63
|
}
|
|
22
64
|
}
|
|
23
65
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAmB,EAAE,QAA6B;IACzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1D,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,KAAK,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;AAChC,CAAC;AAED,MAAM,sBAAsB,GAAG,GAAG,CAAC,CAAC,sDAAsD;AAE1F,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,QAAQ,GAAG,sBAAsB,CAAC;IAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAE1E,IAAI,aAAa,EAAE,CAAC;oBAClB,oEAAoE;oBACpE,uEAAuE;oBACvE,0DAA0D;oBAC1D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,SAAS;wBACnB,UAAU,EAAE,KAAK;wBACjB,OAAO,EACL,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,0CAA0C;4BAC7E,yEAAyE;4BACzE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,eAAe;4BAC5C,4EAA4E;wBAC9E,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,2MAA2M;qBACjN,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,UAAU;wBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;wBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,gKAAgK;qBACtK,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,CAkC1D"}
|
|
@@ -19,7 +19,11 @@ export function thinContentRule(pages, minWords) {
|
|
|
19
19
|
: "";
|
|
20
20
|
findings.push({
|
|
21
21
|
ruleId: "spam/thin-content",
|
|
22
|
-
|
|
22
|
+
// High confidence (far below the floor) is an error; the medium band — which
|
|
23
|
+
// the rule itself flags as "could legitimately be a short page" — is a
|
|
24
|
+
// warning, not a ship-blocker. The page still joins thinContentUrls either
|
|
25
|
+
// way so spam/doorway-pattern can stack on it.
|
|
26
|
+
severity: confidence === "high" ? "error" : "warning",
|
|
23
27
|
confidence,
|
|
24
28
|
message: `${page.url} has thin content (${words} words).${shortPageNote}`,
|
|
25
29
|
fix: `Add at least ${minWords - words} more words of substantive content relevant to this page's specific topic.`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ,EAAE,OAAO;
|
|
1
|
+
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,6EAA6E;YAC7E,uEAAuE;YACvE,2EAA2E;YAC3E,+CAA+C;YAC/C,QAAQ,EAAE,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACrD,UAAU;YACV,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sBAAsB,KAAK,WAAW,aAAa,EAAE;YACzE,GAAG,EAAE,gBAAgB,QAAQ,GAAG,KAAK,4EAA4E;SAClH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;
|
|
1
|
+
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;AAWD,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,aAAa,EAAE,mBAAmB,GACjC,UAAU,EAAE,CA+Kd"}
|