@pseolint/core 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +27 -4
- package/dist/algorithms/content-effort/cache.d.ts +5 -0
- package/dist/algorithms/content-effort/cache.d.ts.map +1 -0
- package/dist/algorithms/content-effort/cache.js +23 -0
- package/dist/algorithms/content-effort/cache.js.map +1 -0
- package/dist/algorithms/content-effort/index.d.ts +4 -0
- package/dist/algorithms/content-effort/index.d.ts.map +1 -0
- package/dist/algorithms/content-effort/index.js +4 -0
- package/dist/algorithms/content-effort/index.js.map +1 -0
- package/dist/algorithms/content-effort/judge.d.ts +36 -0
- package/dist/algorithms/content-effort/judge.d.ts.map +1 -0
- package/dist/algorithms/content-effort/judge.js +69 -0
- package/dist/algorithms/content-effort/judge.js.map +1 -0
- package/dist/algorithms/content-effort/schema.d.ts +13 -0
- package/dist/algorithms/content-effort/schema.d.ts.map +1 -0
- package/dist/algorithms/content-effort/schema.js +20 -0
- package/dist/algorithms/content-effort/schema.js.map +1 -0
- package/dist/auditor.d.ts +18 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +155 -16
- package/dist/auditor.js.map +1 -1
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +18 -3
- package/dist/cache.js.map +1 -1
- package/dist/formatters/template-cards.js +32 -32
- package/dist/framework-detect.d.ts +6 -0
- package/dist/framework-detect.d.ts.map +1 -0
- package/dist/framework-detect.js +22 -0
- package/dist/framework-detect.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +1 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/eeat-signals.d.ts +13 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -1
- package/dist/rules/content/eeat-signals.js +36 -4
- package/dist/rules/content/eeat-signals.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +2 -2
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +8 -2
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/content/value-add.d.ts +8 -2
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +39 -48
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.d.ts +12 -7
- package/dist/rules/content/wikipedia-paraphrase.d.ts.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.js +52 -13
- package/dist/rules/content/wikipedia-paraphrase.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +16 -12
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -1
- package/dist/rules/schema/json-ld-valid.js +8 -1
- package/dist/rules/schema/json-ld-valid.js.map +1 -1
- package/dist/rules/schema/required-fields.d.ts.map +1 -1
- package/dist/rules/schema/required-fields.js +47 -1
- package/dist/rules/schema/required-fields.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +1 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.js +36 -22
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -1
- package/dist/rules/spam/template-diversity.d.ts.map +1 -1
- package/dist/rules/spam/template-diversity.js +37 -2
- package/dist/rules/spam/template-diversity.js.map +1 -1
- package/dist/rules/tech/csr-bailout.d.ts +8 -0
- package/dist/rules/tech/csr-bailout.d.ts.map +1 -0
- package/dist/rules/tech/csr-bailout.js +48 -0
- package/dist/rules/tech/csr-bailout.js.map +1 -0
- package/dist/rules/tech/og-completeness.d.ts +8 -3
- package/dist/rules/tech/og-completeness.d.ts.map +1 -1
- package/dist/rules/tech/og-completeness.js +15 -7
- package/dist/rules/tech/og-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +6 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +23 -0
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/schemas/audit-summary.schema.json +300 -300
- package/dist/rules/aeo/non-replicable-value.d.ts +0 -9
- package/dist/rules/aeo/non-replicable-value.d.ts.map +0 -1
- package/dist/rules/aeo/non-replicable-value.js +0 -95
- package/dist/rules/aeo/non-replicable-value.js.map +0 -1
- package/dist/rules/cannibal/keyword-collision.d.ts +0 -3
- package/dist/rules/cannibal/keyword-collision.d.ts.map +0 -1
- package/dist/rules/cannibal/keyword-collision.js +0 -25
- package/dist/rules/cannibal/keyword-collision.js.map +0 -1
- package/dist/rules/cannibal/title-overlap.d.ts +0 -3
- package/dist/rules/cannibal/title-overlap.d.ts.map +0 -1
- package/dist/rules/cannibal/title-overlap.js +0 -43
- package/dist/rules/cannibal/title-overlap.js.map +0 -1
- package/dist/rules/content/heading-uniqueness.d.ts +0 -3
- package/dist/rules/content/heading-uniqueness.d.ts.map +0 -1
- package/dist/rules/content/heading-uniqueness.js +0 -56
- package/dist/rules/content/heading-uniqueness.js.map +0 -1
- package/dist/rules/links/hub-pages.d.ts +0 -7
- package/dist/rules/links/hub-pages.d.ts.map +0 -1
- package/dist/rules/links/hub-pages.js +0 -73
- package/dist/rules/links/hub-pages.js.map +0 -1
|
@@ -1,23 +1,61 @@
|
|
|
1
1
|
import { wikipediaParaphraseRate } from "../../algorithms/wikipedia-paraphrase.js";
|
|
2
2
|
const RULE_ID = "content/wikipedia-paraphrase";
|
|
3
|
-
const THRESHOLD = 0.4;
|
|
4
3
|
/**
|
|
5
|
-
*
|
|
4
|
+
* ponytail: MIN_TRIGRAM_COUNT = 200
|
|
6
5
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
6
|
+
* The bloom filter has a ~5% per-query false-positive rate. On a page with
|
|
7
|
+
* N trigrams the expected bloom-noise hit count is 0.05 * N. For a short page
|
|
8
|
+
* (~48 trigrams) that alone produces ~2.4 expected FP hits; with a threshold
|
|
9
|
+
* of 40% (19/48) the noise alone can exceed the threshold on short pages.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
11
|
+
* Setting a floor of 200 trigrams (~202 words) means bloom noise contributes
|
|
12
|
+
* at most 10 / 200 = 5% of trigrams, far below the raised THRESHOLD, so noise
|
|
13
|
+
* cannot trigger the rule on its own.
|
|
14
|
+
*/
|
|
15
|
+
const MIN_TRIGRAM_COUNT = 200;
|
|
16
|
+
/**
|
|
17
|
+
* ponytail: THRESHOLD = 0.55
|
|
18
|
+
*
|
|
19
|
+
* Raised from 0.40 to 0.55 to account for the bloom filter's ~5% per-query
|
|
20
|
+
* FP rate and the "topic overlap" effect: legal/medical/geography pSEO pages
|
|
21
|
+
* share many encyclopedic trigrams ("the united states", "in the state of")
|
|
22
|
+
* purely through topical proximity, not paraphrase. A 55% overlap is
|
|
23
|
+
* substantially above both the noise floor (~5%) and the expected topic-
|
|
24
|
+
* overlap baseline, making the signal meaningfully indicative of genuine
|
|
25
|
+
* encyclopedic reuse. At this level the rule remains advisory (confidence:
|
|
26
|
+
* "low") because trigram overlap cannot distinguish paraphrase from topic
|
|
27
|
+
* proximity — it is a weak signal, not a verdict.
|
|
28
|
+
*/
|
|
29
|
+
const THRESHOLD = 0.55;
|
|
30
|
+
/**
|
|
31
|
+
* content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
|
|
32
|
+
*
|
|
33
|
+
* Detects pages whose contentText has unusually high trigram overlap with the
|
|
34
|
+
* bundled Wikipedia reference corpus. This is a weak, advisory signal only:
|
|
35
|
+
* trigram overlap cannot distinguish actual paraphrase from legitimate topical
|
|
36
|
+
* proximity (e.g. a legal-template page naturally shares many encyclopedic
|
|
37
|
+
* trigrams with Wikipedia articles on the same topic).
|
|
13
38
|
*
|
|
14
|
-
*
|
|
39
|
+
* Two guards reduce false positives:
|
|
40
|
+
* 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
|
|
41
|
+
* words) are skipped entirely — bloom noise alone dominates on short pages.
|
|
42
|
+
* 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
|
|
43
|
+
* (~5%) and typical topical-proximity baseline.
|
|
44
|
+
*
|
|
45
|
+
* Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
|
|
15
46
|
*/
|
|
16
47
|
export function wikipediaParaphraseRule(pages) {
|
|
17
48
|
const findings = [];
|
|
18
49
|
for (const page of pages) {
|
|
19
50
|
if (!page.contentText || page.contentText.trim().length === 0)
|
|
20
51
|
continue;
|
|
52
|
+
// Estimate trigram count without re-implementing extractTrigrams: count
|
|
53
|
+
// whitespace-separated tokens then subtract 2 (trigrams = tokens - 2).
|
|
54
|
+
// This is a cheap proxy; the algorithm file does the accurate extraction.
|
|
55
|
+
const tokenCount = page.contentText.trim().split(/\s+/).length;
|
|
56
|
+
const estimatedTrigrams = Math.max(0, tokenCount - 2);
|
|
57
|
+
if (estimatedTrigrams < MIN_TRIGRAM_COUNT)
|
|
58
|
+
continue;
|
|
21
59
|
const rate = wikipediaParaphraseRate(page.contentText);
|
|
22
60
|
if (rate < THRESHOLD)
|
|
23
61
|
continue;
|
|
@@ -27,11 +65,12 @@ export function wikipediaParaphraseRule(pages) {
|
|
|
27
65
|
severity: "warning",
|
|
28
66
|
confidence: "low",
|
|
29
67
|
pageUrl: page.url,
|
|
30
|
-
message: `${page.url}
|
|
31
|
-
`reference corpus.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
68
|
+
message: `${page.url} has high trigram overlap (${pct}%) with the bundled Wikipedia ` +
|
|
69
|
+
`reference corpus. This is an advisory signal — trigram overlap can reflect ` +
|
|
70
|
+
`topical proximity as well as copied content and cannot distinguish the two.`,
|
|
71
|
+
fix: "Review for borrowed encyclopedic phrasing and replace with original analysis " +
|
|
72
|
+
"specific to this page's subject. Even if attributed, high paraphrase rates " +
|
|
73
|
+
"correlate with low value-add by SpamBrain's helpful-content metric.",
|
|
35
74
|
});
|
|
36
75
|
}
|
|
37
76
|
return findings;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;;;;;;;;;;GAYG;AACH,MAAM,SAAS,GAAG,IAAI,CAAC;AAEvB;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAExE,wEAAwE;QACxE,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;QACtD,IAAI,iBAAiB,GAAG,iBAAiB;YAAE,SAAS;QAEpD,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAE/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,8BAA8B,GAAG,gCAAgC;gBAC5E,6EAA6E;gBAC7E,6EAA6E;YAC/E,GAAG,EACD,+EAA+E;gBAC/E,6EAA6E;gBAC7E,qEAAqE;SACxE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgEvE"}
|
|
@@ -28,27 +28,31 @@ export function schemaConsistencyRule(pages) {
|
|
|
28
28
|
}
|
|
29
29
|
clustersBySignature.get(sig).push({ url: page.url, types });
|
|
30
30
|
}
|
|
31
|
-
// Within each cluster of ≥2 pages,
|
|
31
|
+
// Within each cluster of ≥2 pages, fire only when pages carry DIFFERENT @type
|
|
32
|
+
// SETS. A single page legitimately emits several JSON-LD blocks (e.g. Article +
|
|
33
|
+
// FAQPage + Organization) — that multi-type set is not an inconsistency. The
|
|
34
|
+
// problem is two pages on the SAME template disagreeing on their type set
|
|
35
|
+
// (e.g. one Article, one NewsArticle). Comparing per-page set signatures (not
|
|
36
|
+
// the union) avoids the false positive where every page shares the same set.
|
|
37
|
+
const setSignature = (types) => Array.from(types).sort().join("+");
|
|
32
38
|
for (const members of clustersBySignature.values()) {
|
|
33
39
|
if (members.length < 2) {
|
|
34
40
|
continue;
|
|
35
41
|
}
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
allTypesInCluster.add(t);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
if (allTypesInCluster.size <= 1) {
|
|
43
|
-
continue;
|
|
42
|
+
const distinctSetSignatures = new Set(members.map((m) => setSignature(m.types)));
|
|
43
|
+
if (distinctSetSignatures.size <= 1) {
|
|
44
|
+
continue; // all pages in this template cluster agree on their @type set
|
|
44
45
|
}
|
|
45
|
-
const
|
|
46
|
+
const variants = Array.from(distinctSetSignatures)
|
|
47
|
+
.sort()
|
|
48
|
+
.map((s) => `[${s.split("+").join(", ")}]`)
|
|
49
|
+
.join(" vs ");
|
|
46
50
|
findings.push({
|
|
47
51
|
ruleId: "schema/consistency",
|
|
48
52
|
severity: "info",
|
|
49
|
-
message: `Template pages
|
|
53
|
+
message: `Template pages disagree on schema @type (${variants}). Use a consistent @type across pages that share the same template structure.`,
|
|
50
54
|
relatedUrls: members.map((m) => m.url),
|
|
51
|
-
fix: `Use a consistent @type across all pages that share the same template structure.`
|
|
55
|
+
fix: `Use a consistent @type (or set of @types) across all pages that share the same template structure.`
|
|
52
56
|
});
|
|
53
57
|
}
|
|
54
58
|
return findings;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,8EAA8E;IAC9E,gFAAgF;IAChF,6EAA6E;IAC7E,0EAA0E;IAC1E,8EAA8E;IAC9E,6EAA6E;IAC7E,MAAM,YAAY,GAAG,CAAC,KAAkB,EAAU,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,qBAAqB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YACpC,SAAS,CAAC,8DAA8D;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;aAC/C,IAAI,EAAE;aACN,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;aAC1C,IAAI,CAAC,MAAM,CAAC,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,4CAA4C,QAAQ,gFAAgF;YAC7I,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,oGAAoG;SAC1G,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA8DjE"}
|
|
@@ -30,7 +30,14 @@ export function jsonLdValidRule(pages) {
|
|
|
30
30
|
}
|
|
31
31
|
if (obj["@type"] !== undefined) {
|
|
32
32
|
const typeValue = obj["@type"];
|
|
33
|
-
|
|
33
|
+
const typeIsValid =
|
|
34
|
+
// string: non-empty non-whitespace
|
|
35
|
+
(typeof typeValue === "string" && typeValue.trim() !== "") ||
|
|
36
|
+
// array: non-empty, every element is a non-empty non-whitespace string
|
|
37
|
+
(Array.isArray(typeValue) &&
|
|
38
|
+
typeValue.length > 0 &&
|
|
39
|
+
typeValue.every((t) => typeof t === "string" && t.trim() !== ""));
|
|
40
|
+
if (!typeIsValid) {
|
|
34
41
|
findings.push({
|
|
35
42
|
ruleId: "schema/json-ld-valid",
|
|
36
43
|
severity: "error",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM,WAAW;gBACf,mCAAmC;gBACnC,CAAC,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;oBAC1D,uEAAuE;oBACvE,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC;wBACvB,SAAS,CAAC,MAAM,GAAG,CAAC;wBACnB,SAAuB,CAAC,KAAK,CAC5B,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAChD,CAAC,CAAC;gBACP,IAAI,CAAC,WAAW,EAAE,CAAC;oBACjB,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyD7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwDpE"}
|
|
@@ -3,6 +3,47 @@ const REQUIRED_FIELDS = {
|
|
|
3
3
|
Product: ["name"],
|
|
4
4
|
FAQPage: ["mainEntity"]
|
|
5
5
|
};
|
|
6
|
+
/**
|
|
7
|
+
* Returns true when a field value should be treated as "missing" (junk/empty).
|
|
8
|
+
* Accepts non-empty strings, non-empty arrays, and non-empty objects as present.
|
|
9
|
+
*/
|
|
10
|
+
function isMissing(value) {
|
|
11
|
+
if (value === undefined || value === null)
|
|
12
|
+
return true;
|
|
13
|
+
if (typeof value === "string")
|
|
14
|
+
return value.trim() === "";
|
|
15
|
+
if (Array.isArray(value))
|
|
16
|
+
return value.length === 0;
|
|
17
|
+
if (typeof value === "object")
|
|
18
|
+
return Object.keys(value).length === 0;
|
|
19
|
+
// booleans (false/true) and numbers other than checked above
|
|
20
|
+
if (typeof value === "boolean" || typeof value === "number")
|
|
21
|
+
return false;
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Article `author` is valid when it is:
|
|
26
|
+
* - a non-empty string, OR
|
|
27
|
+
* - an object with a non-empty `name` property (Person/Organization), OR
|
|
28
|
+
* - a non-empty array of the above (co-authored articles — Schema.org allows
|
|
29
|
+
* `author` to be a list). Present if at least one element is a valid author.
|
|
30
|
+
* Returns true when the author value is missing/junk.
|
|
31
|
+
*/
|
|
32
|
+
function isAuthorMissing(value) {
|
|
33
|
+
if (value === undefined || value === null)
|
|
34
|
+
return true;
|
|
35
|
+
if (typeof value === "string")
|
|
36
|
+
return value.trim() === "";
|
|
37
|
+
if (Array.isArray(value)) {
|
|
38
|
+
return value.length === 0 || value.every((item) => isAuthorMissing(item));
|
|
39
|
+
}
|
|
40
|
+
if (typeof value === "object") {
|
|
41
|
+
const obj = value;
|
|
42
|
+
return typeof obj.name !== "string" || obj.name.trim() === "";
|
|
43
|
+
}
|
|
44
|
+
// booleans, numbers — not a valid author shape
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
6
47
|
function hasPrice(obj) {
|
|
7
48
|
if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
|
|
8
49
|
return true;
|
|
@@ -37,7 +78,12 @@ export function requiredFieldsRule(pages) {
|
|
|
37
78
|
}
|
|
38
79
|
const missing = [];
|
|
39
80
|
for (const field of required) {
|
|
40
|
-
if (
|
|
81
|
+
if (field === "author" && schemaType === "Article") {
|
|
82
|
+
if (isAuthorMissing(obj[field])) {
|
|
83
|
+
missing.push(field);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else if (isMissing(obj[field])) {
|
|
41
87
|
missing.push(field);
|
|
42
88
|
}
|
|
43
89
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,
|
|
1
|
+
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF;;;GAGG;AACH,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,KAAe,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAChF,6DAA6D;IAC7D,IAAI,OAAO,KAAK,KAAK,SAAS,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC1E,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,OAAO,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,CAAC;IACD,+CAA+C;IAC/C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,KAAK,KAAK,QAAQ,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;oBACnD,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBAChC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;qBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAoEhD,CAAC;AAEF,4GAA4G;AAC5G,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE3D"}
|
package/dist/rules/scope.js
CHANGED
|
@@ -35,6 +35,7 @@ export const RULE_SCOPE = {
|
|
|
35
35
|
"tech/robots-compliance": "corpus",
|
|
36
36
|
"tech/robots-sitemap-presence": "corpus",
|
|
37
37
|
"tech/og-completeness": "page",
|
|
38
|
+
"tech/csr-bailout": "page",
|
|
38
39
|
// schema
|
|
39
40
|
"schema/json-ld-valid": "page",
|
|
40
41
|
"schema/required-fields": "page",
|
package/dist/rules/scope.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;IAChC,2BAA2B,EAAE,MAAM;IAEnC,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IACvC,+BAA+B,EAAE,QAAQ;IAEzC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IACxC,sBAAsB,EAAE,MAAM;
|
|
1
|
+
{"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;IAChC,2BAA2B,EAAE,MAAM;IAEnC,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IACvC,+BAA+B,EAAE,QAAQ;IAEzC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IACxC,sBAAsB,EAAE,MAAM;IAC9B,kBAAkB,EAAE,MAAM;IAE1B,SAAS;IACT,sBAAsB,EAAE,MAAM;IAC9B,wBAAwB,EAAE,MAAM;IAChC,oBAAoB,EAAE,QAAQ;IAE9B,kEAAkE;IAClE,kEAAkE;IAClE,uCAAuC;IACvC,sBAAsB,EAAE,QAAQ;IAEhC,eAAe;IACf,sBAAsB,EAAE,MAAM;IAC9B,6BAA6B,EAAE,QAAQ;IAEvC,iBAAiB;IACjB,qBAAqB,EAAE,QAAQ;IAE/B,oCAAoC;IACpC,cAAc,EAAE,QAAQ;IACxB,oBAAoB,EAAE,QAAQ;IAC9B,uBAAuB,EAAE,MAAM;IAC/B,kBAAkB,EAAE,MAAM;IAC1B,kBAAkB,EAAE,MAAM;IAC1B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,MAAM;IAChC,kBAAkB,EAAE,MAAM;CAC3B,CAAC;AAEF,4GAA4G;AAC5G,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,KAAK,MAAM,CAAC;AACrD,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAa7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CA8DxF"}
|
|
@@ -4,11 +4,16 @@ function extractTextBlocks(contentText) {
|
|
|
4
4
|
.map((block) => block.trim().toLowerCase())
|
|
5
5
|
.filter((block) => block.length > 20);
|
|
6
6
|
}
|
|
7
|
+
function wordCount(block) {
|
|
8
|
+
return block.split(/\s+/).length;
|
|
9
|
+
}
|
|
7
10
|
export function boilerplateRatioRule(pages, maxRatio) {
|
|
8
11
|
if (pages.length < 2) {
|
|
9
12
|
return [];
|
|
10
13
|
}
|
|
14
|
+
const N = pages.length;
|
|
11
15
|
const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
|
|
16
|
+
// Build per-block document frequency (how many pages contain each block).
|
|
12
17
|
const blockFrequency = new Map();
|
|
13
18
|
for (const blocks of pageBlocks) {
|
|
14
19
|
const unique = new Set(blocks);
|
|
@@ -16,34 +21,43 @@ export function boilerplateRatioRule(pages, maxRatio) {
|
|
|
16
21
|
blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
|
|
17
22
|
}
|
|
18
23
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
// Continuous weight, min-max normalized over document frequency: a block
|
|
25
|
+
// unique to ONE page is not boilerplate at all (weight 0); a block on EVERY
|
|
26
|
+
// page is full boilerplate (weight 1); mid-frequency blocks scale linearly
|
|
27
|
+
// between. (freq-1)/(N-1) — not freq/N — so unique content never inflates the
|
|
28
|
+
// ratio (which freq/N does, giving every block at least 1/N). N>=2 here, so
|
|
29
|
+
// N-1>=1: no division by zero. Removes the binary skeleton cliff entirely.
|
|
30
|
+
const blockWeight = (block) => {
|
|
31
|
+
const freq = blockFrequency.get(block) ?? 0;
|
|
32
|
+
return (freq - 1) / (N - 1);
|
|
33
|
+
};
|
|
26
34
|
const findings = [];
|
|
27
35
|
pages.forEach((page, index) => {
|
|
28
36
|
const blocks = pageBlocks[index];
|
|
29
|
-
if (blocks.length === 0)
|
|
37
|
+
if (blocks.length === 0)
|
|
30
38
|
return;
|
|
31
|
-
|
|
32
|
-
const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
33
|
-
const boilerplateWords = blocks
|
|
34
|
-
.filter((b) => skeleton.has(b))
|
|
35
|
-
.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
39
|
+
const totalWords = blocks.reduce((sum, b) => sum + wordCount(b), 0);
|
|
36
40
|
if (totalWords === 0)
|
|
37
41
|
return;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
// Weighted boilerplate word count: each block contributes (weight * its word count).
|
|
43
|
+
const weightedBoilerplateWords = blocks.reduce((sum, b) => {
|
|
44
|
+
return sum + blockWeight(b) * wordCount(b);
|
|
45
|
+
}, 0);
|
|
46
|
+
const ratio = weightedBoilerplateWords / totalWords;
|
|
47
|
+
if (ratio <= maxRatio)
|
|
48
|
+
return;
|
|
49
|
+
// 2-band severity: clearly over (≥ threshold + 0.1) → error; just over → warning.
|
|
50
|
+
const clearlyOver = ratio >= maxRatio + 0.1;
|
|
51
|
+
const severity = clearlyOver ? "error" : "warning";
|
|
52
|
+
const confidence = clearlyOver ? "high" : "medium";
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/boilerplate-ratio",
|
|
55
|
+
severity,
|
|
56
|
+
confidence,
|
|
57
|
+
pageUrl: page.url,
|
|
58
|
+
message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
|
|
59
|
+
fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
|
|
60
|
+
});
|
|
47
61
|
});
|
|
48
62
|
return findings;
|
|
49
63
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACnC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,0EAA0E;IAC1E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,4EAA4E;IAC5E,2EAA2E;IAC3E,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,MAAM,WAAW,GAAG,CAAC,KAAa,EAAU,EAAE;QAC5C,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpE,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,qFAAqF;QACrF,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YACxD,OAAO,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,MAAM,KAAK,GAAG,wBAAwB,GAAG,UAAU,CAAC;QAEpD,IAAI,KAAK,IAAI,QAAQ;YAAE,OAAO;QAE9B,kFAAkF;QAClF,MAAM,WAAW,GAAG,KAAK,IAAI,QAAQ,GAAG,GAAG,CAAC;QAC5C,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACnD,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SACnQ,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AA8BzE,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,UAAU,EAAE,CA0Bd"}
|
|
@@ -1,17 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coarsen a structureSignature ("tag:count|tag:count|...") by bucketing each
|
|
3
|
+
* tag's count logarithmically. Pages that differ only by trivial chrome — one
|
|
4
|
+
* extra ad `<div>`, a conditional nav item — collapse to the SAME coarse
|
|
5
|
+
* signature, so a genuinely single-template site is no longer read as "diverse"
|
|
6
|
+
* from count noise (the false negative the exact-count fingerprint caused).
|
|
7
|
+
*
|
|
8
|
+
* The raw exact-count signature (parser.buildStructureSignature) is SHARED with
|
|
9
|
+
* spam/near-duplicate and spam/doorway-pattern and is deliberately left
|
|
10
|
+
* untouched — this coarsening is local to the diversity measure.
|
|
11
|
+
*/
|
|
12
|
+
function coarsenSignature(signature) {
|
|
13
|
+
if (!signature)
|
|
14
|
+
return signature;
|
|
15
|
+
return signature
|
|
16
|
+
.split("|")
|
|
17
|
+
.map((pair) => {
|
|
18
|
+
const idx = pair.lastIndexOf(":");
|
|
19
|
+
if (idx < 0)
|
|
20
|
+
return pair;
|
|
21
|
+
const tag = pair.slice(0, idx);
|
|
22
|
+
const count = Number(pair.slice(idx + 1));
|
|
23
|
+
if (!Number.isFinite(count))
|
|
24
|
+
return pair;
|
|
25
|
+
// log2 bucket: 1→1, 2-3→1, 4-7→2 … 32-63→5, 64-127→6. Trivial count
|
|
26
|
+
// differences land in the same bucket; an order-of-magnitude change does not.
|
|
27
|
+
return `${tag}:${Math.floor(Math.log2(count + 1))}`;
|
|
28
|
+
})
|
|
29
|
+
.join("|");
|
|
30
|
+
}
|
|
1
31
|
export function templateDiversityRule(pages, minUniqueRatio) {
|
|
2
32
|
if (pages.length === 0) {
|
|
3
33
|
return [];
|
|
4
34
|
}
|
|
5
|
-
const unique = new Set(pages.map((page) => page.structureSignature)).size;
|
|
35
|
+
const unique = new Set(pages.map((page) => coarsenSignature(page.structureSignature))).size;
|
|
6
36
|
const ratio = unique / pages.length;
|
|
7
37
|
if (ratio >= minUniqueRatio) {
|
|
8
38
|
return [];
|
|
9
39
|
}
|
|
40
|
+
// Confidence band: a ratio far below the floor is a stronger single-template
|
|
41
|
+
// signal than one hovering just under it.
|
|
42
|
+
const confidence = ratio < minUniqueRatio / 2 ? "high" : "medium";
|
|
10
43
|
return [
|
|
11
44
|
{
|
|
12
45
|
ruleId: "spam/template-diversity",
|
|
13
46
|
severity: "warning",
|
|
14
|
-
|
|
47
|
+
confidence,
|
|
48
|
+
message: `Template diversity ratio is ${ratio.toFixed(2)} (min ${minUniqueRatio.toFixed(2)}); ` +
|
|
49
|
+
`the ${pages.length} pages collapse to ${unique} distinct structural shapes after ignoring minor chrome variation.`,
|
|
15
50
|
fix: "Vary the HTML structure across pages. Add conditional sections, different layouts, or page-specific components. Identical-structure corpora are a primary scaled-content-abuse signal that the March 27, 2026 core update reinforced."
|
|
16
51
|
}
|
|
17
52
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;GAUG;AACH,SAAS,gBAAgB,CAAC,SAAiB;IACzC,IAAI,CAAC,SAAS;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,SAAS;SACb,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzC,oEAAoE;QACpE,8EAA8E;QAC9E,OAAO,GAAG,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,CAAC,CAAC;SACD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5F,MAAM,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;QAC5B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,6EAA6E;IAC7E,0CAA0C;IAC1C,MAAM,UAAU,GAAe,KAAK,GAAG,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE9E,OAAO;QACL;YACE,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,SAAS;YACnB,UAAU;YACV,OAAO,EACL,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACtF,OAAO,KAAK,CAAC,MAAM,sBAAsB,MAAM,oEAAoE;YACrH,GAAG,EAAE,uOAAuO;SAC7O;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Flags pages whose interactive value (or substantive content) exists in the
|
|
4
|
+
* rendered DOM but not the raw server HTML — invisible to crawlers that don't
|
|
5
|
+
* run JS. Requires --render (no-op when page.renderedHtml is absent).
|
|
6
|
+
*/
|
|
7
|
+
export declare function csrBailoutRule(pages: ParsedPage[]): RuleResult[];
|
|
8
|
+
//# sourceMappingURL=csr-bailout.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csr-bailout.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/csr-bailout.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAc,MAAM,gBAAgB,CAAC;AAgBzE;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAkChE"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { countInteractive, detectClientFrameworkFromHtml } from "../../framework-detect.js";
|
|
3
|
+
// ponytail: render-diff thresholds, tuned to the paperforge case (0 raw / 44
|
|
4
|
+
// rendered interactive). Surface via rules options only if real audits need it.
|
|
5
|
+
const MIN_INTERACTIVE = 3;
|
|
6
|
+
const RATIO_FLOOR = 0.1;
|
|
7
|
+
const MIN_WORD_DELTA = 250;
|
|
8
|
+
const CONTENT_RATIO_FLOOR = 0.5;
|
|
9
|
+
function visibleWordCount(html) {
|
|
10
|
+
const $ = load(html);
|
|
11
|
+
$("script, style, noscript, template").remove();
|
|
12
|
+
return ($("body").text() || "").split(/\s+/).filter(Boolean).length;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Flags pages whose interactive value (or substantive content) exists in the
|
|
16
|
+
* rendered DOM but not the raw server HTML — invisible to crawlers that don't
|
|
17
|
+
* run JS. Requires --render (no-op when page.renderedHtml is absent).
|
|
18
|
+
*/
|
|
19
|
+
export function csrBailoutRule(pages) {
|
|
20
|
+
const findings = [];
|
|
21
|
+
for (const page of pages) {
|
|
22
|
+
if (!page.renderedHtml)
|
|
23
|
+
continue;
|
|
24
|
+
const rawI = countInteractive(page.html);
|
|
25
|
+
const rendI = countInteractive(page.renderedHtml);
|
|
26
|
+
const rawW = visibleWordCount(page.html);
|
|
27
|
+
const rendW = visibleWordCount(page.renderedHtml);
|
|
28
|
+
const interactiveBail = rendI >= MIN_INTERACTIVE && (rawI === 0 || rawI / rendI <= RATIO_FLOOR);
|
|
29
|
+
const contentBail = rendW - rawW >= MIN_WORD_DELTA && rawW / Math.max(rendW, 1) <= CONTENT_RATIO_FLOOR;
|
|
30
|
+
if (!interactiveBail && !contentBail)
|
|
31
|
+
continue;
|
|
32
|
+
const confidence = interactiveBail ? "high" : "medium";
|
|
33
|
+
const nextHint = detectClientFrameworkFromHtml(page.html) === "nextjs"
|
|
34
|
+
? " Next.js: keep useSearchParams()/dynamic hooks inside a <Suspense> boundary, and move new Date()/Math.random() out of client render paths under cacheComponents (into useEffect). Verify with `next build && next start`, not `next dev`."
|
|
35
|
+
: "";
|
|
36
|
+
findings.push({
|
|
37
|
+
ruleId: "tech/csr-bailout",
|
|
38
|
+
severity: "warning",
|
|
39
|
+
confidence,
|
|
40
|
+
pageUrl: page.url,
|
|
41
|
+
message: `${page.url} exposes ${rendI} interactive elements after hydration but ${rawI} in the server HTML ` +
|
|
42
|
+
`(${rawW}→${rendW} words). Crawlers and Google's first pass see an incomplete shell, making the page look thin or duplicate.${nextHint}`,
|
|
43
|
+
fix: "Server-render or prerender the interactive content so it is present in the raw HTML.",
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
return findings;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=csr-bailout.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csr-bailout.js","sourceRoot":"","sources":["../../../src/rules/tech/csr-bailout.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,gBAAgB,EAAE,6BAA6B,EAAE,MAAM,2BAA2B,CAAC;AAE5F,6EAA6E;AAC7E,gFAAgF;AAChF,MAAM,eAAe,GAAG,CAAC,CAAC;AAC1B,MAAM,WAAW,GAAG,GAAG,CAAC;AACxB,MAAM,cAAc,GAAG,GAAG,CAAC;AAC3B,MAAM,mBAAmB,GAAG,GAAG,CAAC;AAEhC,SAAS,gBAAgB,CAAC,IAAY;IACpC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC,CAAC,mCAAmC,CAAC,CAAC,MAAM,EAAE,CAAC;IAChD,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AACtE,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,KAAmB;IAChD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,YAAY;YAAE,SAAS;QAEjC,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAElD,MAAM,eAAe,GACnB,KAAK,IAAI,eAAe,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,GAAG,KAAK,IAAI,WAAW,CAAC,CAAC;QAC1E,MAAM,WAAW,GACf,KAAK,GAAG,IAAI,IAAI,cAAc,IAAI,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,mBAAmB,CAAC;QACrF,IAAI,CAAC,eAAe,IAAI,CAAC,WAAW;YAAE,SAAS;QAE/C,MAAM,UAAU,GAAe,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACnE,MAAM,QAAQ,GACZ,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,QAAQ;YACnD,CAAC,CAAC,2OAA2O;YAC7O,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,QAAQ,EAAE,SAAS;YACnB,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,YAAY,KAAK,6CAA6C,IAAI,sBAAsB;gBACnG,IAAI,IAAI,IAAI,KAAK,6GAA6G,QAAQ,EAAE;YAC1I,GAAG,EAAE,sFAAsF;SAC5F,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -6,9 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
|
|
|
6
6
|
*
|
|
7
7
|
* Required: og:title, og:description, og:image.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
9
|
+
* Severity gradation:
|
|
10
|
+
* - warning: og:title or og:description is missing (core social-card identity
|
|
11
|
+
* fields that affect how a link appears in feeds and AI summaries).
|
|
12
|
+
* - info: only og:image is missing (cosmetic — the card still has a title
|
|
13
|
+
* and description; the missing image is low-priority).
|
|
14
|
+
*
|
|
15
|
+
* Presence check: a field is considered MISSING when it is absent, empty, or
|
|
16
|
+
* whitespace-only (value is trimmed before evaluation).
|
|
12
17
|
*/
|
|
13
18
|
export declare function ogCompletenessRule(pages: ParsedPage[]): RuleResult[];
|
|
14
19
|
//# sourceMappingURL=og-completeness.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D
|
|
1
|
+
{"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBpE"}
|