@pseolint/core 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -1
- package/dist/algorithms/fact-extraction.js +6 -0
- package/dist/algorithms/fact-extraction.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +39 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/eeat-signals.d.ts +13 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -1
- package/dist/rules/content/eeat-signals.js +36 -4
- package/dist/rules/content/eeat-signals.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/content/value-add.d.ts +8 -2
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +39 -48
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.d.ts +12 -7
- package/dist/rules/content/wikipedia-paraphrase.d.ts.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.js +52 -13
- package/dist/rules/content/wikipedia-paraphrase.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +37 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -1
- package/dist/rules/schema/json-ld-valid.js +8 -1
- package/dist/rules/schema/json-ld-valid.js.map +1 -1
- package/dist/rules/schema/required-fields.d.ts.map +1 -1
- package/dist/rules/schema/required-fields.js +47 -1
- package/dist/rules/schema/required-fields.js.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.js +36 -22
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/template-diversity.d.ts.map +1 -1
- package/dist/rules/spam/template-diversity.js +37 -2
- package/dist/rules/spam/template-diversity.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/og-completeness.d.ts +8 -3
- package/dist/rules/tech/og-completeness.d.ts.map +1 -1
- package/dist/rules/tech/og-completeness.js +15 -7
- package/dist/rules/tech/og-completeness.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +16 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +109 -93
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;
|
|
1
|
+
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;AAChB;;;;;GAKG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgEvE"}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
export function schemaConsistencyRule(pages) {
|
|
2
2
|
const findings = [];
|
|
3
|
-
|
|
3
|
+
// Group pages by structureSignature so we only compare @type within template clusters.
|
|
4
|
+
// A normal site legitimately mixes types across templates (WebSite on home, Article on
|
|
5
|
+
// blog, Product on listings). Variance is only a problem when pages that share the same
|
|
6
|
+
// template (same structureSignature) use different @type values.
|
|
7
|
+
const clustersBySignature = new Map();
|
|
4
8
|
for (const page of pages) {
|
|
5
9
|
const types = new Set();
|
|
6
10
|
for (const entry of page.jsonLd) {
|
|
@@ -15,30 +19,42 @@ export function schemaConsistencyRule(pages) {
|
|
|
15
19
|
types.add(obj["@type"]);
|
|
16
20
|
}
|
|
17
21
|
}
|
|
18
|
-
if (types.size
|
|
19
|
-
|
|
22
|
+
if (types.size === 0) {
|
|
23
|
+
continue;
|
|
20
24
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
25
|
-
const allTypes = new Set();
|
|
26
|
-
for (const types of typesByPage.values()) {
|
|
27
|
-
for (const t of types) {
|
|
28
|
-
allTypes.add(t);
|
|
25
|
+
const sig = page.structureSignature;
|
|
26
|
+
if (!clustersBySignature.has(sig)) {
|
|
27
|
+
clustersBySignature.set(sig, []);
|
|
29
28
|
}
|
|
29
|
+
clustersBySignature.get(sig).push({ url: page.url, types });
|
|
30
30
|
}
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
// Within each cluster of ≥2 pages, fire only when pages carry DIFFERENT @type
|
|
32
|
+
// SETS. A single page legitimately emits several JSON-LD blocks (e.g. Article +
|
|
33
|
+
// FAQPage + Organization) — that multi-type set is not an inconsistency. The
|
|
34
|
+
// problem is two pages on the SAME template disagreeing on their type set
|
|
35
|
+
// (e.g. one Article, one NewsArticle). Comparing per-page set signatures (not
|
|
36
|
+
// the union) avoids the false positive where every page shares the same set.
|
|
37
|
+
const setSignature = (types) => Array.from(types).sort().join("+");
|
|
38
|
+
for (const members of clustersBySignature.values()) {
|
|
39
|
+
if (members.length < 2) {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
const distinctSetSignatures = new Set(members.map((m) => setSignature(m.types)));
|
|
43
|
+
if (distinctSetSignatures.size <= 1) {
|
|
44
|
+
continue; // all pages in this template cluster agree on their @type set
|
|
45
|
+
}
|
|
46
|
+
const variants = Array.from(distinctSetSignatures)
|
|
47
|
+
.sort()
|
|
48
|
+
.map((s) => `[${s.split("+").join(", ")}]`)
|
|
49
|
+
.join(" vs ");
|
|
50
|
+
findings.push({
|
|
51
|
+
ruleId: "schema/consistency",
|
|
52
|
+
severity: "info",
|
|
53
|
+
message: `Template pages disagree on schema @type (${variants}). Use a consistent @type across pages that share the same template structure.`,
|
|
54
|
+
relatedUrls: members.map((m) => m.url),
|
|
55
|
+
fix: `Use a consistent @type (or set of @types) across all pages that share the same template structure.`
|
|
56
|
+
});
|
|
33
57
|
}
|
|
34
|
-
const typeList = Array.from(allTypes).sort().join(", ");
|
|
35
|
-
findings.push({
|
|
36
|
-
ruleId: "schema/consistency",
|
|
37
|
-
severity: "info",
|
|
38
|
-
message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
39
|
-
relatedUrls: Array.from(typesByPage.keys()),
|
|
40
|
-
fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
|
|
41
|
-
});
|
|
42
58
|
return findings;
|
|
43
59
|
}
|
|
44
60
|
//# sourceMappingURL=consistency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,8EAA8E;IAC9E,gFAAgF;IAChF,6EAA6E;IAC7E,0EAA0E;IAC1E,8EAA8E;IAC9E,6EAA6E;IAC7E,MAAM,YAAY,GAAG,CAAC,KAAkB,EAAU,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,qBAAqB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YACpC,SAAS,CAAC,8DAA8D;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;aAC/C,IAAI,EAAE;aACN,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;aAC1C,IAAI,CAAC,MAAM,CAAC,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,4CAA4C,QAAQ,gFAAgF;YAC7I,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,oGAAoG;SAC1G,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA8DjE"}
|
|
@@ -30,7 +30,14 @@ export function jsonLdValidRule(pages) {
|
|
|
30
30
|
}
|
|
31
31
|
if (obj["@type"] !== undefined) {
|
|
32
32
|
const typeValue = obj["@type"];
|
|
33
|
-
|
|
33
|
+
const typeIsValid =
|
|
34
|
+
// string: non-empty non-whitespace
|
|
35
|
+
(typeof typeValue === "string" && typeValue.trim() !== "") ||
|
|
36
|
+
// array: non-empty, every element is a non-empty non-whitespace string
|
|
37
|
+
(Array.isArray(typeValue) &&
|
|
38
|
+
typeValue.length > 0 &&
|
|
39
|
+
typeValue.every((t) => typeof t === "string" && t.trim() !== ""));
|
|
40
|
+
if (!typeIsValid) {
|
|
34
41
|
findings.push({
|
|
35
42
|
ruleId: "schema/json-ld-valid",
|
|
36
43
|
severity: "error",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM,WAAW;gBACf,mCAAmC;gBACnC,CAAC,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;oBAC1D,uEAAuE;oBACvE,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC;wBACvB,SAAS,CAAC,MAAM,GAAG,CAAC;wBACnB,SAAuB,CAAC,KAAK,CAC5B,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAChD,CAAC,CAAC;gBACP,IAAI,CAAC,WAAW,EAAE,CAAC;oBACjB,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyD7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwDpE"}
|
|
@@ -3,6 +3,47 @@ const REQUIRED_FIELDS = {
|
|
|
3
3
|
Product: ["name"],
|
|
4
4
|
FAQPage: ["mainEntity"]
|
|
5
5
|
};
|
|
6
|
+
/**
|
|
7
|
+
* Returns true when a field value should be treated as "missing" (junk/empty).
|
|
8
|
+
* Accepts non-empty strings, non-empty arrays, and non-empty objects as present.
|
|
9
|
+
*/
|
|
10
|
+
function isMissing(value) {
|
|
11
|
+
if (value === undefined || value === null)
|
|
12
|
+
return true;
|
|
13
|
+
if (typeof value === "string")
|
|
14
|
+
return value.trim() === "";
|
|
15
|
+
if (Array.isArray(value))
|
|
16
|
+
return value.length === 0;
|
|
17
|
+
if (typeof value === "object")
|
|
18
|
+
return Object.keys(value).length === 0;
|
|
19
|
+
// booleans (false/true) and numbers other than checked above
|
|
20
|
+
if (typeof value === "boolean" || typeof value === "number")
|
|
21
|
+
return false;
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Article `author` is valid when it is:
|
|
26
|
+
* - a non-empty string, OR
|
|
27
|
+
* - an object with a non-empty `name` property (Person/Organization), OR
|
|
28
|
+
* - a non-empty array of the above (co-authored articles — Schema.org allows
|
|
29
|
+
* `author` to be a list). Present if at least one element is a valid author.
|
|
30
|
+
* Returns true when the author value is missing/junk.
|
|
31
|
+
*/
|
|
32
|
+
function isAuthorMissing(value) {
|
|
33
|
+
if (value === undefined || value === null)
|
|
34
|
+
return true;
|
|
35
|
+
if (typeof value === "string")
|
|
36
|
+
return value.trim() === "";
|
|
37
|
+
if (Array.isArray(value)) {
|
|
38
|
+
return value.length === 0 || value.every((item) => isAuthorMissing(item));
|
|
39
|
+
}
|
|
40
|
+
if (typeof value === "object") {
|
|
41
|
+
const obj = value;
|
|
42
|
+
return typeof obj.name !== "string" || obj.name.trim() === "";
|
|
43
|
+
}
|
|
44
|
+
// booleans, numbers — not a valid author shape
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
6
47
|
function hasPrice(obj) {
|
|
7
48
|
if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
|
|
8
49
|
return true;
|
|
@@ -37,7 +78,12 @@ export function requiredFieldsRule(pages) {
|
|
|
37
78
|
}
|
|
38
79
|
const missing = [];
|
|
39
80
|
for (const field of required) {
|
|
40
|
-
if (
|
|
81
|
+
if (field === "author" && schemaType === "Article") {
|
|
82
|
+
if (isAuthorMissing(obj[field])) {
|
|
83
|
+
missing.push(field);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else if (isMissing(obj[field])) {
|
|
41
87
|
missing.push(field);
|
|
42
88
|
}
|
|
43
89
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,
|
|
1
|
+
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF;;;GAGG;AACH,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,KAAe,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAChF,6DAA6D;IAC7D,IAAI,OAAO,KAAK,KAAK,SAAS,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC1E,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,OAAO,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,CAAC;IACD,+CAA+C;IAC/C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,KAAK,KAAK,QAAQ,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;oBACnD,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBAChC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;qBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAa7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CA8DxF"}
|
|
@@ -4,11 +4,16 @@ function extractTextBlocks(contentText) {
|
|
|
4
4
|
.map((block) => block.trim().toLowerCase())
|
|
5
5
|
.filter((block) => block.length > 20);
|
|
6
6
|
}
|
|
7
|
+
function wordCount(block) {
|
|
8
|
+
return block.split(/\s+/).length;
|
|
9
|
+
}
|
|
7
10
|
export function boilerplateRatioRule(pages, maxRatio) {
|
|
8
11
|
if (pages.length < 2) {
|
|
9
12
|
return [];
|
|
10
13
|
}
|
|
14
|
+
const N = pages.length;
|
|
11
15
|
const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
|
|
16
|
+
// Build per-block document frequency (how many pages contain each block).
|
|
12
17
|
const blockFrequency = new Map();
|
|
13
18
|
for (const blocks of pageBlocks) {
|
|
14
19
|
const unique = new Set(blocks);
|
|
@@ -16,34 +21,43 @@ export function boilerplateRatioRule(pages, maxRatio) {
|
|
|
16
21
|
blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
|
|
17
22
|
}
|
|
18
23
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
// Continuous weight, min-max normalized over document frequency: a block
|
|
25
|
+
// unique to ONE page is not boilerplate at all (weight 0); a block on EVERY
|
|
26
|
+
// page is full boilerplate (weight 1); mid-frequency blocks scale linearly
|
|
27
|
+
// between. (freq-1)/(N-1) — not freq/N — so unique content never inflates the
|
|
28
|
+
// ratio (which freq/N does, giving every block at least 1/N). N>=2 here, so
|
|
29
|
+
// N-1>=1: no division by zero. Removes the binary skeleton cliff entirely.
|
|
30
|
+
const blockWeight = (block) => {
|
|
31
|
+
const freq = blockFrequency.get(block) ?? 0;
|
|
32
|
+
return (freq - 1) / (N - 1);
|
|
33
|
+
};
|
|
26
34
|
const findings = [];
|
|
27
35
|
pages.forEach((page, index) => {
|
|
28
36
|
const blocks = pageBlocks[index];
|
|
29
|
-
if (blocks.length === 0)
|
|
37
|
+
if (blocks.length === 0)
|
|
30
38
|
return;
|
|
31
|
-
|
|
32
|
-
const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
33
|
-
const boilerplateWords = blocks
|
|
34
|
-
.filter((b) => skeleton.has(b))
|
|
35
|
-
.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
39
|
+
const totalWords = blocks.reduce((sum, b) => sum + wordCount(b), 0);
|
|
36
40
|
if (totalWords === 0)
|
|
37
41
|
return;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
// Weighted boilerplate word count: each block contributes (weight * its word count).
|
|
43
|
+
const weightedBoilerplateWords = blocks.reduce((sum, b) => {
|
|
44
|
+
return sum + blockWeight(b) * wordCount(b);
|
|
45
|
+
}, 0);
|
|
46
|
+
const ratio = weightedBoilerplateWords / totalWords;
|
|
47
|
+
if (ratio <= maxRatio)
|
|
48
|
+
return;
|
|
49
|
+
// 2-band severity: clearly over (≥ threshold + 0.1) → error; just over → warning.
|
|
50
|
+
const clearlyOver = ratio >= maxRatio + 0.1;
|
|
51
|
+
const severity = clearlyOver ? "error" : "warning";
|
|
52
|
+
const confidence = clearlyOver ? "high" : "medium";
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/boilerplate-ratio",
|
|
55
|
+
severity,
|
|
56
|
+
confidence,
|
|
57
|
+
pageUrl: page.url,
|
|
58
|
+
message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
|
|
59
|
+
fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
|
|
60
|
+
});
|
|
47
61
|
});
|
|
48
62
|
return findings;
|
|
49
63
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACnC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,0EAA0E;IAC1E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,4EAA4E;IAC5E,2EAA2E;IAC3E,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,MAAM,WAAW,GAAG,CAAC,KAAa,EAAU,EAAE;QAC5C,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpE,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,qFAAqF;QACrF,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YACxD,OAAO,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,MAAM,KAAK,GAAG,wBAAwB,GAAG,UAAU,CAAC;QAEpD,IAAI,KAAK,IAAI,QAAQ;YAAE,OAAO;QAE9B,kFAAkF;QAClF,MAAM,WAAW,GAAG,KAAK,IAAI,QAAQ,GAAG,GAAG,CAAC;QAC5C,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACnD,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SACnQ,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAsBrD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAgDhD"}
|
|
@@ -1,23 +1,65 @@
|
|
|
1
1
|
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
2
|
import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
|
|
3
|
+
/**
|
|
4
|
+
* Compute masking coverage: fraction of pages where at least one entity token
|
|
5
|
+
* was replaced by a placeholder. A page "benefited" from masking when its
|
|
6
|
+
* masked text differs from the original.
|
|
7
|
+
*
|
|
8
|
+
* ponytail: threshold is <20% of pages masked → low coverage (weak entity signal).
|
|
9
|
+
* Zero patterns supplied is a degenerate case and always yields low coverage.
|
|
10
|
+
*/
|
|
11
|
+
function maskingCoverage(pages, patterns) {
|
|
12
|
+
if (patterns.length === 0 || pages.length === 0)
|
|
13
|
+
return 0;
|
|
14
|
+
let touched = 0;
|
|
15
|
+
for (const page of pages) {
|
|
16
|
+
const masked = maskEntities(page.contentText, patterns);
|
|
17
|
+
if (masked !== page.contentText)
|
|
18
|
+
touched += 1;
|
|
19
|
+
}
|
|
20
|
+
return touched / pages.length;
|
|
21
|
+
}
|
|
22
|
+
const LOW_COVERAGE_THRESHOLD = 0.2; // ponytail: <20% pages masked → low-confidence signal
|
|
3
23
|
export function entitySwapRule(pages, patterns, threshold) {
|
|
4
24
|
const findings = [];
|
|
5
25
|
const pairs = [];
|
|
6
26
|
const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
|
|
27
|
+
const coverage = maskingCoverage(pages, patterns);
|
|
28
|
+
const isLowCoverage = coverage < LOW_COVERAGE_THRESHOLD;
|
|
7
29
|
for (let i = 0; i < pages.length; i += 1) {
|
|
8
30
|
for (let j = i + 1; j < pages.length; j += 1) {
|
|
9
31
|
const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
|
|
10
32
|
if (similarity >= threshold) {
|
|
11
33
|
pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
34
|
+
if (isLowCoverage) {
|
|
35
|
+
// Weak/absent entity patterns mean masking barely changed the text;
|
|
36
|
+
// this finding overlaps a plain near-duplicate signal, not a confirmed
|
|
37
|
+
// entity-swap. Downgrade to warning with low confidence.
|
|
38
|
+
findings.push({
|
|
39
|
+
ruleId: "spam/entity-swap",
|
|
40
|
+
severity: "warning",
|
|
41
|
+
confidence: "low",
|
|
42
|
+
message: `${pages[i].url} and ${pages[j].url} are near-identical, but entity masking ` +
|
|
43
|
+
`coverage is too low to confirm an entity-swap pattern (masking touched ` +
|
|
44
|
+
`${Math.round(coverage * 100)}% of pages). ` +
|
|
45
|
+
`Provide entity patterns or treat this as a near-duplicate finding instead.`,
|
|
46
|
+
pageUrl: pages[i].url,
|
|
47
|
+
relatedUrls: [pages[j].url],
|
|
48
|
+
similarity,
|
|
49
|
+
fix: "Supply entity patterns (city names, states, product names) so the rule can confirm whether these pages are entity-swapped templates. If no entity patterns apply, address as near-duplicate spam instead."
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/entity-swap",
|
|
55
|
+
severity: "critical",
|
|
56
|
+
message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
|
|
57
|
+
pageUrl: pages[i].url,
|
|
58
|
+
relatedUrls: [pages[j].url],
|
|
59
|
+
similarity,
|
|
60
|
+
fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
|
|
61
|
+
});
|
|
62
|
+
}
|
|
21
63
|
}
|
|
22
64
|
}
|
|
23
65
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAmB,EAAE,QAA6B;IACzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1D,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,KAAK,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;AAChC,CAAC;AAED,MAAM,sBAAsB,GAAG,GAAG,CAAC,CAAC,sDAAsD;AAE1F,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,QAAQ,GAAG,sBAAsB,CAAC;IAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAE1E,IAAI,aAAa,EAAE,CAAC;oBAClB,oEAAoE;oBACpE,uEAAuE;oBACvE,0DAA0D;oBAC1D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,SAAS;wBACnB,UAAU,EAAE,KAAK;wBACjB,OAAO,EACL,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,0CAA0C;4BAC7E,yEAAyE;4BACzE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,eAAe;4BAC5C,4EAA4E;wBAC9E,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,2MAA2M;qBACjN,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,UAAU;wBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;wBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,gKAAgK;qBACtK,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AA8BzE,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,UAAU,EAAE,CA0Bd"}
|
|
@@ -1,17 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coarsen a structureSignature ("tag:count|tag:count|...") by bucketing each
|
|
3
|
+
* tag's count logarithmically. Pages that differ only by trivial chrome — one
|
|
4
|
+
* extra ad `<div>`, a conditional nav item — collapse to the SAME coarse
|
|
5
|
+
* signature, so a genuinely single-template site is no longer read as "diverse"
|
|
6
|
+
* from count noise (the false negative the exact-count fingerprint caused).
|
|
7
|
+
*
|
|
8
|
+
* The raw exact-count signature (parser.buildStructureSignature) is SHARED with
|
|
9
|
+
* spam/near-duplicate and spam/doorway-pattern and is deliberately left
|
|
10
|
+
* untouched — this coarsening is local to the diversity measure.
|
|
11
|
+
*/
|
|
12
|
+
function coarsenSignature(signature) {
|
|
13
|
+
if (!signature)
|
|
14
|
+
return signature;
|
|
15
|
+
return signature
|
|
16
|
+
.split("|")
|
|
17
|
+
.map((pair) => {
|
|
18
|
+
const idx = pair.lastIndexOf(":");
|
|
19
|
+
if (idx < 0)
|
|
20
|
+
return pair;
|
|
21
|
+
const tag = pair.slice(0, idx);
|
|
22
|
+
const count = Number(pair.slice(idx + 1));
|
|
23
|
+
if (!Number.isFinite(count))
|
|
24
|
+
return pair;
|
|
25
|
+
// log2 bucket: 1→1, 2-3→1, 4-7→2 … 32-63→5, 64-127→6. Trivial count
|
|
26
|
+
// differences land in the same bucket; an order-of-magnitude change does not.
|
|
27
|
+
return `${tag}:${Math.floor(Math.log2(count + 1))}`;
|
|
28
|
+
})
|
|
29
|
+
.join("|");
|
|
30
|
+
}
|
|
1
31
|
export function templateDiversityRule(pages, minUniqueRatio) {
|
|
2
32
|
if (pages.length === 0) {
|
|
3
33
|
return [];
|
|
4
34
|
}
|
|
5
|
-
const unique = new Set(pages.map((page) => page.structureSignature)).size;
|
|
35
|
+
const unique = new Set(pages.map((page) => coarsenSignature(page.structureSignature))).size;
|
|
6
36
|
const ratio = unique / pages.length;
|
|
7
37
|
if (ratio >= minUniqueRatio) {
|
|
8
38
|
return [];
|
|
9
39
|
}
|
|
40
|
+
// Confidence band: a ratio far below the floor is a stronger single-template
|
|
41
|
+
// signal than one hovering just under it.
|
|
42
|
+
const confidence = ratio < minUniqueRatio / 2 ? "high" : "medium";
|
|
10
43
|
return [
|
|
11
44
|
{
|
|
12
45
|
ruleId: "spam/template-diversity",
|
|
13
46
|
severity: "warning",
|
|
14
|
-
|
|
47
|
+
confidence,
|
|
48
|
+
message: `Template diversity ratio is ${ratio.toFixed(2)} (min ${minUniqueRatio.toFixed(2)}); ` +
|
|
49
|
+
`the ${pages.length} pages collapse to ${unique} distinct structural shapes after ignoring minor chrome variation.`,
|
|
15
50
|
fix: "Vary the HTML structure across pages. Add conditional sections, different layouts, or page-specific components. Identical-structure corpora are a primary scaled-content-abuse signal that the March 27, 2026 core update reinforced."
|
|
16
51
|
}
|
|
17
52
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;GAUG;AACH,SAAS,gBAAgB,CAAC,SAAiB;IACzC,IAAI,CAAC,SAAS;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,SAAS;SACb,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzC,oEAAoE;QACpE,8EAA8E;QAC9E,OAAO,GAAG,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,CAAC,CAAC;SACD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5F,MAAM,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;QAC5B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,6EAA6E;IAC7E,0CAA0C;IAC1C,MAAM,UAAU,GAAe,KAAK,GAAG,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE9E,OAAO;QACL;YACE,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,SAAS;YACnB,UAAU;YACV,OAAO,EACL,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACtF,OAAO,KAAK,CAAC,MAAM,sBAAsB,MAAM,oEAAoE;YACrH,GAAG,EAAE,uOAAuO;SAC7O;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,CAkC1D"}
|
|
@@ -19,7 +19,11 @@ export function thinContentRule(pages, minWords) {
|
|
|
19
19
|
: "";
|
|
20
20
|
findings.push({
|
|
21
21
|
ruleId: "spam/thin-content",
|
|
22
|
-
|
|
22
|
+
// High confidence (far below the floor) is an error; the medium band — which
|
|
23
|
+
// the rule itself flags as "could legitimately be a short page" — is a
|
|
24
|
+
// warning, not a ship-blocker. The page still joins thinContentUrls either
|
|
25
|
+
// way so spam/doorway-pattern can stack on it.
|
|
26
|
+
severity: confidence === "high" ? "error" : "warning",
|
|
23
27
|
confidence,
|
|
24
28
|
message: `${page.url} has thin content (${words} words).${shortPageNote}`,
|
|
25
29
|
fix: `Add at least ${minWords - words} more words of substantive content relevant to this page's specific topic.`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ,EAAE,OAAO;
|
|
1
|
+
{"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,6EAA6E;YAC7E,uEAAuE;YACvE,2EAA2E;YAC3E,+CAA+C;YAC/C,QAAQ,EAAE,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACrD,UAAU;YACV,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sBAAsB,KAAK,WAAW,aAAa,EAAE;YACzE,GAAG,EAAE,gBAAgB,QAAQ,GAAG,KAAK,4EAA4E;SAClH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;
|
|
1
|
+
{"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;AAWD,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,aAAa,EAAE,mBAAmB,GACjC,UAAU,EAAE,CA+Kd"}
|