@pseolint/core 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/rules/content/eeat-signals.d.ts +13 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -1
- package/dist/rules/content/eeat-signals.js +36 -4
- package/dist/rules/content/eeat-signals.js.map +1 -1
- package/dist/rules/content/value-add.d.ts +8 -2
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +39 -48
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.d.ts +12 -7
- package/dist/rules/content/wikipedia-paraphrase.d.ts.map +1 -1
- package/dist/rules/content/wikipedia-paraphrase.js +52 -13
- package/dist/rules/content/wikipedia-paraphrase.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +16 -12
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -1
- package/dist/rules/schema/json-ld-valid.js +8 -1
- package/dist/rules/schema/json-ld-valid.js.map +1 -1
- package/dist/rules/schema/required-fields.d.ts.map +1 -1
- package/dist/rules/schema/required-fields.js +47 -1
- package/dist/rules/schema/required-fields.js.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -1
- package/dist/rules/spam/boilerplate-ratio.js +36 -22
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -1
- package/dist/rules/spam/template-diversity.d.ts.map +1 -1
- package/dist/rules/spam/template-diversity.js +37 -2
- package/dist/rules/spam/template-diversity.js.map +1 -1
- package/dist/rules/tech/og-completeness.d.ts +8 -3
- package/dist/rules/tech/og-completeness.d.ts.map +1 -1
- package/dist/rules/tech/og-completeness.js +15 -7
- package/dist/rules/tech/og-completeness.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,16 @@
|
|
|
1
1
|
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Count how many of the 4 E-E-A-T signal categories the page satisfies.
|
|
4
|
+
*
|
|
5
|
+
* Categories:
|
|
6
|
+
* 1. About-page link — a same-host href containing /about
|
|
7
|
+
* 2. Author identity — any authorSignals field present
|
|
8
|
+
* 3. Published date — page.publishedDate set
|
|
9
|
+
* 4. Transparency text — "sources:", "references:", "last updated", etc.
|
|
10
|
+
* in page.contentText (NOT raw HTML, to avoid footer/JS false positives)
|
|
11
|
+
*
|
|
12
|
+
* Exported so value-add can reuse this without duplicating logic.
|
|
13
|
+
*/
|
|
14
|
+
export declare function countSignalCategories(page: ParsedPage): number;
|
|
2
15
|
export declare function eeatSignalsRule(pages: ParsedPage[]): RuleResult[];
|
|
3
16
|
//# sourceMappingURL=eeat-signals.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eeat-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"eeat-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAU7D;;;;;;;;;;;GAWG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAiC9D;AAED,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBjE"}
|
|
@@ -1,23 +1,55 @@
|
|
|
1
|
-
const
|
|
1
|
+
const EEAT_TEXT_PATTERNS = [
|
|
2
2
|
/last\s+updated/i,
|
|
3
3
|
/last\s+modified/i,
|
|
4
4
|
/reviewed\s+by/i,
|
|
5
5
|
/\bsources:/i,
|
|
6
6
|
/\breferences:/i
|
|
7
7
|
];
|
|
8
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Count how many of the 4 E-E-A-T signal categories the page satisfies.
|
|
10
|
+
*
|
|
11
|
+
* Categories:
|
|
12
|
+
* 1. About-page link — a same-host href containing /about
|
|
13
|
+
* 2. Author identity — any authorSignals field present
|
|
14
|
+
* 3. Published date — page.publishedDate set
|
|
15
|
+
* 4. Transparency text — "sources:", "references:", "last updated", etc.
|
|
16
|
+
* in page.contentText (NOT raw HTML, to avoid footer/JS false positives)
|
|
17
|
+
*
|
|
18
|
+
* Exported so value-add can reuse this without duplicating logic.
|
|
19
|
+
*/
|
|
20
|
+
export function countSignalCategories(page) {
|
|
9
21
|
let count = 0;
|
|
10
|
-
|
|
22
|
+
// 1. About-page: same-host link with /about in the path
|
|
23
|
+
const pageHost = (() => {
|
|
24
|
+
try {
|
|
25
|
+
return new URL(page.url).host;
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
return "";
|
|
29
|
+
}
|
|
30
|
+
})();
|
|
31
|
+
if (page.resolvedHrefs.some((href) => {
|
|
32
|
+
try {
|
|
33
|
+
const u = new URL(href);
|
|
34
|
+
return u.host === pageHost && /\/about\b/i.test(u.pathname);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
})) {
|
|
11
40
|
count += 1;
|
|
12
41
|
}
|
|
42
|
+
// 2. Author identity
|
|
13
43
|
const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
|
|
14
44
|
if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink) {
|
|
15
45
|
count += 1;
|
|
16
46
|
}
|
|
47
|
+
// 3. Published date
|
|
17
48
|
if (page.publishedDate) {
|
|
18
49
|
count += 1;
|
|
19
50
|
}
|
|
20
|
-
|
|
51
|
+
// 4. Transparency text — check contentText (parsed visible text), not raw HTML
|
|
52
|
+
if (EEAT_TEXT_PATTERNS.some((pattern) => pattern.test(page.contentText))) {
|
|
21
53
|
count += 1;
|
|
22
54
|
}
|
|
23
55
|
return count;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eeat-signals.js","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,
|
|
1
|
+
{"version":3,"file":"eeat-signals.js","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAgB;IACpD,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,wDAAwD;IACxD,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YAAC,OAAO,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,EAAE,CAAC;QAAC,CAAC;IAC7D,CAAC,CAAC,EAAE,CAAC;IACL,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;QACnC,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YACxB,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC9D,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,KAAK,CAAC;QAAC,CAAC;IAC3B,CAAC,CAAC,EAAE,CAAC;QACH,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,qBAAqB;IACrB,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa,EAAE,CAAC;QACxE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,+EAA+E;IAC/E,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,EAAE,CAAC;QACzE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAExE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC;gBACN,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,OAAO,OAAO,CAAC,MAAM,mGAAmG;gBACjI,GAAG,EAAE,oHAAoH;gBACzH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;aAC9C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;QAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,0GAA0G;KAChH,CAAC,CAAC,CAAC;AACN,CAAC"}
|
|
@@ -6,8 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
|
|
|
6
6
|
* Aggregates 7 per-page signal scores (originality, freshness, facts,
|
|
7
7
|
* E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
|
|
8
8
|
* single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
|
|
9
|
-
*
|
|
10
|
-
*
|
|
9
|
+
*
|
|
10
|
+
* E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
|
|
11
|
+
* a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
|
|
12
|
+
* logic drift between the two rules.
|
|
13
|
+
*
|
|
14
|
+
* Fires ONE finding per page when score < 0.5:
|
|
15
|
+
* - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
|
|
16
|
+
* - error (score < 0.35) — clearly low value-add
|
|
11
17
|
*/
|
|
12
18
|
export declare function valueAddRule(pages: ParsedPage[], findings: RuleResult[]): RuleResult[];
|
|
13
19
|
//# sourceMappingURL=value-add.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AA0HvE;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoBtF"}
|
|
@@ -1,26 +1,6 @@
|
|
|
1
1
|
import { hasAuthoritativeCitation } from "../../algorithms/fact-extraction.js";
|
|
2
|
+
import { countSignalCategories } from "./eeat-signals.js";
|
|
2
3
|
const RULE_ID = "content/value-add";
|
|
3
|
-
const EEAT_HTML_PATTERNS = [
|
|
4
|
-
/last\s+updated/i,
|
|
5
|
-
/last\s+modified/i,
|
|
6
|
-
/reviewed\s+by/i,
|
|
7
|
-
/\bsources:/i,
|
|
8
|
-
/\breferences:/i,
|
|
9
|
-
];
|
|
10
|
-
function countEeatCategories(page) {
|
|
11
|
-
let count = 0;
|
|
12
|
-
if (page.resolvedHrefs.some((h) => /\/about\b/i.test(h)))
|
|
13
|
-
count += 1;
|
|
14
|
-
const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
|
|
15
|
-
if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink)
|
|
16
|
-
count += 1;
|
|
17
|
-
if (page.publishedDate)
|
|
18
|
-
count += 1;
|
|
19
|
-
if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)) ||
|
|
20
|
-
hasAuthoritativeCitation(page.resolvedHrefs, page.url))
|
|
21
|
-
count += 1;
|
|
22
|
-
return count;
|
|
23
|
-
}
|
|
24
4
|
function computeSignals(page, allFindings) {
|
|
25
5
|
const pageFindings = allFindings.filter((f) => f.pageUrl === page.url);
|
|
26
6
|
// Originality: 1.0 if regurgitated-content doesn't fire, 0.0 if it does
|
|
@@ -50,18 +30,14 @@ function computeSignals(page, allFindings) {
|
|
|
50
30
|
else {
|
|
51
31
|
facts = 0.0;
|
|
52
32
|
}
|
|
53
|
-
// E-E-A-T:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
}
|
|
62
|
-
else {
|
|
63
|
-
eeat = 0.0;
|
|
64
|
-
}
|
|
33
|
+
// E-E-A-T: continuous fraction of 4 categories present.
|
|
34
|
+
// Reuses countSignalCategories from eeat-signals (no duplicate logic).
|
|
35
|
+
// Also grants the "sources" credit for authoritative outbound citations.
|
|
36
|
+
const eeatCount = countSignalCategories(page);
|
|
37
|
+
const hasCitation = hasAuthoritativeCitation(page.resolvedHrefs, page.url);
|
|
38
|
+
// Clamp to max 4 after adding citation credit (if sources category already counted it won't double-count)
|
|
39
|
+
const effectiveEeatCount = Math.min(4, eeatCount + (hasCitation && eeatCount < 4 ? 1 : 0));
|
|
40
|
+
const eeat = effectiveEeatCount / 4;
|
|
65
41
|
// Translation: 1.0 unless translation-no-op lists this page
|
|
66
42
|
const hasTranslationNoOp = allFindings.some((f) => f.ruleId === "content/translation-no-op" &&
|
|
67
43
|
(f.pageUrl === page.url || (f.relatedUrls ?? []).includes(page.url)));
|
|
@@ -69,13 +45,7 @@ function computeSignals(page, allFindings) {
|
|
|
69
45
|
// Cliché reuse (signal 6): 1.0 if common-phrase-reuse doesn't fire, 0.0 if it does
|
|
70
46
|
const hasClicheReuse = pageFindings.some((f) => f.ruleId === "content/common-phrase-reuse");
|
|
71
47
|
const clicheReuse = hasClicheReuse ? 0.0 : 1.0;
|
|
72
|
-
// Wikipedia paraphrase (signal 7
|
|
73
|
-
// fire on this page, 0.0 if it does. The rule fires at warning/low when
|
|
74
|
-
// page text overlaps ≥40% with the bundled trigram corpus — a real signal
|
|
75
|
-
// for "content lifted from Wikipedia," orthogonal to the other 6 originality
|
|
76
|
-
// proxies. Adding it shifts each signal's weight from 1/6 (16.7%) to 1/7
|
|
77
|
-
// (14.3%) — boundary cases at score=0.30 and score=0.50 may shift by
|
|
78
|
-
// ±0.024 per signal, which is below the granularity of severity bands.
|
|
48
|
+
// Wikipedia paraphrase (signal 7): 1.0 if wikipedia-paraphrase doesn't fire, 0.0 if it does
|
|
79
49
|
const hasWikipediaParaphrase = pageFindings.some((f) => f.ruleId === "content/wikipedia-paraphrase");
|
|
80
50
|
const wikipediaParaphrase = hasWikipediaParaphrase ? 0.0 : 1.0;
|
|
81
51
|
return { originality, freshness, facts, eeat, translation, clicheReuse, wikipediaParaphrase };
|
|
@@ -92,10 +62,24 @@ function meanScore(signals) {
|
|
|
92
62
|
];
|
|
93
63
|
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
94
64
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
65
|
+
/**
|
|
66
|
+
* Two-band severity for the composite score:
|
|
67
|
+
* - score in [0.35, 0.5) → "warning" (borderline: page is weak but not egregiously thin)
|
|
68
|
+
* - score < 0.35 → "error" (clearly low value-add)
|
|
69
|
+
*
|
|
70
|
+
* Confidence scales with distance from the fire threshold:
|
|
71
|
+
* - score < 0.2 → "high"
|
|
72
|
+
* - score in [0.2, 0.35) → "medium"
|
|
73
|
+
* - score in [0.35, 0.5) → "low" (borderline warning)
|
|
74
|
+
*/
|
|
75
|
+
function severityAndConfidence(score) {
|
|
76
|
+
if (score >= 0.35) {
|
|
77
|
+
return { severity: "warning", confidence: "low" };
|
|
78
|
+
}
|
|
79
|
+
if (score < 0.2) {
|
|
80
|
+
return { severity: "error", confidence: "high" };
|
|
81
|
+
}
|
|
82
|
+
return { severity: "error", confidence: "medium" };
|
|
99
83
|
}
|
|
100
84
|
function buildMessage(page, score, signals) {
|
|
101
85
|
const pct = (v) => `${(v * 100).toFixed(0)}%`;
|
|
@@ -119,8 +103,14 @@ function buildMessage(page, score, signals) {
|
|
|
119
103
|
* Aggregates 7 per-page signal scores (originality, freshness, facts,
|
|
120
104
|
* E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
|
|
121
105
|
* single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
|
|
122
|
-
*
|
|
123
|
-
*
|
|
106
|
+
*
|
|
107
|
+
* E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
|
|
108
|
+
* a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
|
|
109
|
+
* logic drift between the two rules.
|
|
110
|
+
*
|
|
111
|
+
* Fires ONE finding per page when score < 0.5:
|
|
112
|
+
* - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
|
|
113
|
+
* - error (score < 0.35) — clearly low value-add
|
|
124
114
|
*/
|
|
125
115
|
export function valueAddRule(pages, findings) {
|
|
126
116
|
const results = [];
|
|
@@ -129,10 +119,11 @@ export function valueAddRule(pages, findings) {
|
|
|
129
119
|
const score = meanScore(signals);
|
|
130
120
|
if (score >= 0.5)
|
|
131
121
|
continue;
|
|
122
|
+
const { severity, confidence } = severityAndConfidence(score);
|
|
132
123
|
results.push({
|
|
133
124
|
ruleId: RULE_ID,
|
|
134
|
-
severity
|
|
135
|
-
confidence
|
|
125
|
+
severity,
|
|
126
|
+
confidence,
|
|
136
127
|
message: buildMessage(page, score, signals),
|
|
137
128
|
fix: "Add proprietary content (original analysis, primary-source data, expert commentary, original imagery) to lift the value-add score above 0.5. Score is a composite — improve any underweight signal.",
|
|
138
129
|
pageUrl: page.url,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;
|
|
1
|
+
{"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAYpC,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,wDAAwD;IACxD,uEAAuE;IACvE,yEAAyE;IACzE,MAAM,SAAS,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3E,0GAA0G;IAC1G,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,WAAW,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3F,MAAM,IAAI,GAAG,kBAAkB,GAAG,CAAC,CAAC;IAEpC,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,4FAA4F;IAC5F,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,qBAAqB,CAAC,KAAa;IAC1C,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;IACpD,CAAC;IACD,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;QAChB,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IACnD,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AACrD,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
2
|
/**
|
|
3
|
-
* content/wikipedia-paraphrase —
|
|
3
|
+
* content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
|
|
4
4
|
*
|
|
5
|
-
* Detects pages whose contentText has high trigram overlap with the
|
|
6
|
-
* Wikipedia reference corpus.
|
|
7
|
-
*
|
|
5
|
+
* Detects pages whose contentText has unusually high trigram overlap with the
|
|
6
|
+
* bundled Wikipedia reference corpus. This is a weak, advisory signal only:
|
|
7
|
+
* trigram overlap cannot distinguish actual paraphrase from legitimate topical
|
|
8
|
+
* proximity (e.g. a legal-template page naturally shares many encyclopedic
|
|
9
|
+
* trigrams with Wikipedia articles on the same topic).
|
|
8
10
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
+
* Two guards reduce false positives:
|
|
12
|
+
* 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
|
|
13
|
+
* words) are skipped entirely — bloom noise alone dominates on short pages.
|
|
14
|
+
* 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
|
|
15
|
+
* (~5%) and typical topical-proximity baseline.
|
|
11
16
|
*
|
|
12
|
-
* Fires: one warning/low-confidence finding per qualifying page (rate >= 0.
|
|
17
|
+
* Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
|
|
13
18
|
*/
|
|
14
19
|
export declare function wikipediaParaphraseRule(pages: ParsedPage[]): RuleResult[];
|
|
15
20
|
//# sourceMappingURL=wikipedia-paraphrase.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAkC7D;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgCzE"}
|
|
@@ -1,23 +1,61 @@
|
|
|
1
1
|
import { wikipediaParaphraseRate } from "../../algorithms/wikipedia-paraphrase.js";
|
|
2
2
|
const RULE_ID = "content/wikipedia-paraphrase";
|
|
3
|
-
const THRESHOLD = 0.4;
|
|
4
3
|
/**
|
|
5
|
-
*
|
|
4
|
+
* ponytail: MIN_TRIGRAM_COUNT = 200
|
|
6
5
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
6
|
+
* The bloom filter has a ~5% per-query false-positive rate. On a page with
|
|
7
|
+
* N trigrams the expected bloom-noise hit count is 0.05 * N. For a short page
|
|
8
|
+
* (~48 trigrams) that alone produces ~2.4 expected FP hits; with a threshold
|
|
9
|
+
* of 40% (19/48) the noise alone can exceed the threshold on short pages.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
11
|
+
* Setting a floor of 200 trigrams (~202 words) means bloom noise contributes
|
|
12
|
+
* at most 10 / 200 = 5% of trigrams, far below the raised THRESHOLD, so noise
|
|
13
|
+
* cannot trigger the rule on its own.
|
|
14
|
+
*/
|
|
15
|
+
const MIN_TRIGRAM_COUNT = 200;
|
|
16
|
+
/**
|
|
17
|
+
* ponytail: THRESHOLD = 0.55
|
|
18
|
+
*
|
|
19
|
+
* Raised from 0.40 to 0.55 to account for the bloom filter's ~5% per-query
|
|
20
|
+
* FP rate and the "topic overlap" effect: legal/medical/geography pSEO pages
|
|
21
|
+
* share many encyclopedic trigrams ("the united states", "in the state of")
|
|
22
|
+
* purely through topical proximity, not paraphrase. A 55% overlap is
|
|
23
|
+
* substantially above both the noise floor (~5%) and the expected topic-
|
|
24
|
+
* overlap baseline, making the signal meaningfully indicative of genuine
|
|
25
|
+
* encyclopedic reuse. At this level the rule remains advisory (confidence:
|
|
26
|
+
* "low") because trigram overlap cannot distinguish paraphrase from topic
|
|
27
|
+
* proximity — it is a weak signal, not a verdict.
|
|
28
|
+
*/
|
|
29
|
+
const THRESHOLD = 0.55;
|
|
30
|
+
/**
|
|
31
|
+
* content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
|
|
32
|
+
*
|
|
33
|
+
* Detects pages whose contentText has unusually high trigram overlap with the
|
|
34
|
+
* bundled Wikipedia reference corpus. This is a weak, advisory signal only:
|
|
35
|
+
* trigram overlap cannot distinguish actual paraphrase from legitimate topical
|
|
36
|
+
* proximity (e.g. a legal-template page naturally shares many encyclopedic
|
|
37
|
+
* trigrams with Wikipedia articles on the same topic).
|
|
13
38
|
*
|
|
14
|
-
*
|
|
39
|
+
* Two guards reduce false positives:
|
|
40
|
+
* 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
|
|
41
|
+
* words) are skipped entirely — bloom noise alone dominates on short pages.
|
|
42
|
+
* 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
|
|
43
|
+
* (~5%) and typical topical-proximity baseline.
|
|
44
|
+
*
|
|
45
|
+
* Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
|
|
15
46
|
*/
|
|
16
47
|
export function wikipediaParaphraseRule(pages) {
|
|
17
48
|
const findings = [];
|
|
18
49
|
for (const page of pages) {
|
|
19
50
|
if (!page.contentText || page.contentText.trim().length === 0)
|
|
20
51
|
continue;
|
|
52
|
+
// Estimate trigram count without re-implementing extractTrigrams: count
|
|
53
|
+
// whitespace-separated tokens then subtract 2 (trigrams = tokens - 2).
|
|
54
|
+
// This is a cheap proxy; the algorithm file does the accurate extraction.
|
|
55
|
+
const tokenCount = page.contentText.trim().split(/\s+/).length;
|
|
56
|
+
const estimatedTrigrams = Math.max(0, tokenCount - 2);
|
|
57
|
+
if (estimatedTrigrams < MIN_TRIGRAM_COUNT)
|
|
58
|
+
continue;
|
|
21
59
|
const rate = wikipediaParaphraseRate(page.contentText);
|
|
22
60
|
if (rate < THRESHOLD)
|
|
23
61
|
continue;
|
|
@@ -27,11 +65,12 @@ export function wikipediaParaphraseRule(pages) {
|
|
|
27
65
|
severity: "warning",
|
|
28
66
|
confidence: "low",
|
|
29
67
|
pageUrl: page.url,
|
|
30
|
-
message: `${page.url}
|
|
31
|
-
`reference corpus.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
68
|
+
message: `${page.url} has high trigram overlap (${pct}%) with the bundled Wikipedia ` +
|
|
69
|
+
`reference corpus. This is an advisory signal — trigram overlap can reflect ` +
|
|
70
|
+
`topical proximity as well as copied content and cannot distinguish the two.`,
|
|
71
|
+
fix: "Review for borrowed encyclopedic phrasing and replace with original analysis " +
|
|
72
|
+
"specific to this page's subject. Even if attributed, high paraphrase rates " +
|
|
73
|
+
"correlate with low value-add by SpamBrain's helpful-content metric.",
|
|
35
74
|
});
|
|
36
75
|
}
|
|
37
76
|
return findings;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;;;;;;;;;;GAYG;AACH,MAAM,SAAS,GAAG,IAAI,CAAC;AAEvB;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAExE,wEAAwE;QACxE,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;QACtD,IAAI,iBAAiB,GAAG,iBAAiB;YAAE,SAAS;QAEpD,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAE/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,8BAA8B,GAAG,gCAAgC;gBAC5E,6EAA6E;gBAC7E,6EAA6E;YAC/E,GAAG,EACD,+EAA+E;gBAC/E,6EAA6E;gBAC7E,qEAAqE;SACxE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgEvE"}
|
|
@@ -28,27 +28,31 @@ export function schemaConsistencyRule(pages) {
|
|
|
28
28
|
}
|
|
29
29
|
clustersBySignature.get(sig).push({ url: page.url, types });
|
|
30
30
|
}
|
|
31
|
-
// Within each cluster of ≥2 pages,
|
|
31
|
+
// Within each cluster of ≥2 pages, fire only when pages carry DIFFERENT @type
|
|
32
|
+
// SETS. A single page legitimately emits several JSON-LD blocks (e.g. Article +
|
|
33
|
+
// FAQPage + Organization) — that multi-type set is not an inconsistency. The
|
|
34
|
+
// problem is two pages on the SAME template disagreeing on their type set
|
|
35
|
+
// (e.g. one Article, one NewsArticle). Comparing per-page set signatures (not
|
|
36
|
+
// the union) avoids the false positive where every page shares the same set.
|
|
37
|
+
const setSignature = (types) => Array.from(types).sort().join("+");
|
|
32
38
|
for (const members of clustersBySignature.values()) {
|
|
33
39
|
if (members.length < 2) {
|
|
34
40
|
continue;
|
|
35
41
|
}
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
allTypesInCluster.add(t);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
if (allTypesInCluster.size <= 1) {
|
|
43
|
-
continue;
|
|
42
|
+
const distinctSetSignatures = new Set(members.map((m) => setSignature(m.types)));
|
|
43
|
+
if (distinctSetSignatures.size <= 1) {
|
|
44
|
+
continue; // all pages in this template cluster agree on their @type set
|
|
44
45
|
}
|
|
45
|
-
const
|
|
46
|
+
const variants = Array.from(distinctSetSignatures)
|
|
47
|
+
.sort()
|
|
48
|
+
.map((s) => `[${s.split("+").join(", ")}]`)
|
|
49
|
+
.join(" vs ");
|
|
46
50
|
findings.push({
|
|
47
51
|
ruleId: "schema/consistency",
|
|
48
52
|
severity: "info",
|
|
49
|
-
message: `Template pages
|
|
53
|
+
message: `Template pages disagree on schema @type (${variants}). Use a consistent @type across pages that share the same template structure.`,
|
|
50
54
|
relatedUrls: members.map((m) => m.url),
|
|
51
|
-
fix: `Use a consistent @type across all pages that share the same template structure.`
|
|
55
|
+
fix: `Use a consistent @type (or set of @types) across all pages that share the same template structure.`
|
|
52
56
|
});
|
|
53
57
|
}
|
|
54
58
|
return findings;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,8EAA8E;IAC9E,gFAAgF;IAChF,6EAA6E;IAC7E,0EAA0E;IAC1E,8EAA8E;IAC9E,6EAA6E;IAC7E,MAAM,YAAY,GAAG,CAAC,KAAkB,EAAU,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,qBAAqB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YACpC,SAAS,CAAC,8DAA8D;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;aAC/C,IAAI,EAAE;aACN,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;aAC1C,IAAI,CAAC,MAAM,CAAC,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,4CAA4C,QAAQ,gFAAgF;YAC7I,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,oGAAoG;SAC1G,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA8DjE"}
|
|
@@ -30,7 +30,14 @@ export function jsonLdValidRule(pages) {
|
|
|
30
30
|
}
|
|
31
31
|
if (obj["@type"] !== undefined) {
|
|
32
32
|
const typeValue = obj["@type"];
|
|
33
|
-
|
|
33
|
+
const typeIsValid =
|
|
34
|
+
// string: non-empty non-whitespace
|
|
35
|
+
(typeof typeValue === "string" && typeValue.trim() !== "") ||
|
|
36
|
+
// array: non-empty, every element is a non-empty non-whitespace string
|
|
37
|
+
(Array.isArray(typeValue) &&
|
|
38
|
+
typeValue.length > 0 &&
|
|
39
|
+
typeValue.every((t) => typeof t === "string" && t.trim() !== ""));
|
|
40
|
+
if (!typeIsValid) {
|
|
34
41
|
findings.push({
|
|
35
42
|
ruleId: "schema/json-ld-valid",
|
|
36
43
|
severity: "error",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,
|
|
1
|
+
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM,WAAW;gBACf,mCAAmC;gBACnC,CAAC,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;oBAC1D,uEAAuE;oBACvE,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC;wBACvB,SAAS,CAAC,MAAM,GAAG,CAAC;wBACnB,SAAuB,CAAC,KAAK,CAC5B,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAChD,CAAC,CAAC;gBACP,IAAI,CAAC,WAAW,EAAE,CAAC;oBACjB,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyD7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwDpE"}
|
|
@@ -3,6 +3,47 @@ const REQUIRED_FIELDS = {
|
|
|
3
3
|
Product: ["name"],
|
|
4
4
|
FAQPage: ["mainEntity"]
|
|
5
5
|
};
|
|
6
|
+
/**
|
|
7
|
+
* Returns true when a field value should be treated as "missing" (junk/empty).
|
|
8
|
+
* Accepts non-empty strings, non-empty arrays, and non-empty objects as present.
|
|
9
|
+
*/
|
|
10
|
+
function isMissing(value) {
|
|
11
|
+
if (value === undefined || value === null)
|
|
12
|
+
return true;
|
|
13
|
+
if (typeof value === "string")
|
|
14
|
+
return value.trim() === "";
|
|
15
|
+
if (Array.isArray(value))
|
|
16
|
+
return value.length === 0;
|
|
17
|
+
if (typeof value === "object")
|
|
18
|
+
return Object.keys(value).length === 0;
|
|
19
|
+
// booleans (false/true) and numbers other than checked above
|
|
20
|
+
if (typeof value === "boolean" || typeof value === "number")
|
|
21
|
+
return false;
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Article `author` is valid when it is:
|
|
26
|
+
* - a non-empty string, OR
|
|
27
|
+
* - an object with a non-empty `name` property (Person/Organization), OR
|
|
28
|
+
* - a non-empty array of the above (co-authored articles — Schema.org allows
|
|
29
|
+
* `author` to be a list). Present if at least one element is a valid author.
|
|
30
|
+
* Returns true when the author value is missing/junk.
|
|
31
|
+
*/
|
|
32
|
+
function isAuthorMissing(value) {
|
|
33
|
+
if (value === undefined || value === null)
|
|
34
|
+
return true;
|
|
35
|
+
if (typeof value === "string")
|
|
36
|
+
return value.trim() === "";
|
|
37
|
+
if (Array.isArray(value)) {
|
|
38
|
+
return value.length === 0 || value.every((item) => isAuthorMissing(item));
|
|
39
|
+
}
|
|
40
|
+
if (typeof value === "object") {
|
|
41
|
+
const obj = value;
|
|
42
|
+
return typeof obj.name !== "string" || obj.name.trim() === "";
|
|
43
|
+
}
|
|
44
|
+
// booleans, numbers — not a valid author shape
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
6
47
|
function hasPrice(obj) {
|
|
7
48
|
if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
|
|
8
49
|
return true;
|
|
@@ -37,7 +78,12 @@ export function requiredFieldsRule(pages) {
|
|
|
37
78
|
}
|
|
38
79
|
const missing = [];
|
|
39
80
|
for (const field of required) {
|
|
40
|
-
if (
|
|
81
|
+
if (field === "author" && schemaType === "Article") {
|
|
82
|
+
if (isAuthorMissing(obj[field])) {
|
|
83
|
+
missing.push(field);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else if (isMissing(obj[field])) {
|
|
41
87
|
missing.push(field);
|
|
42
88
|
}
|
|
43
89
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,
|
|
1
|
+
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF;;;GAGG;AACH,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,KAAe,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAChF,6DAA6D;IAC7D,IAAI,OAAO,KAAK,KAAK,SAAS,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC1E,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,OAAO,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,CAAC;IACD,+CAA+C;IAC/C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,KAAK,KAAK,QAAQ,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;oBACnD,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBAChC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;qBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAa7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CA8DxF"}
|
|
@@ -4,11 +4,16 @@ function extractTextBlocks(contentText) {
|
|
|
4
4
|
.map((block) => block.trim().toLowerCase())
|
|
5
5
|
.filter((block) => block.length > 20);
|
|
6
6
|
}
|
|
7
|
+
function wordCount(block) {
|
|
8
|
+
return block.split(/\s+/).length;
|
|
9
|
+
}
|
|
7
10
|
export function boilerplateRatioRule(pages, maxRatio) {
|
|
8
11
|
if (pages.length < 2) {
|
|
9
12
|
return [];
|
|
10
13
|
}
|
|
14
|
+
const N = pages.length;
|
|
11
15
|
const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
|
|
16
|
+
// Build per-block document frequency (how many pages contain each block).
|
|
12
17
|
const blockFrequency = new Map();
|
|
13
18
|
for (const blocks of pageBlocks) {
|
|
14
19
|
const unique = new Set(blocks);
|
|
@@ -16,34 +21,43 @@ export function boilerplateRatioRule(pages, maxRatio) {
|
|
|
16
21
|
blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
|
|
17
22
|
}
|
|
18
23
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
// Continuous weight, min-max normalized over document frequency: a block
|
|
25
|
+
// unique to ONE page is not boilerplate at all (weight 0); a block on EVERY
|
|
26
|
+
// page is full boilerplate (weight 1); mid-frequency blocks scale linearly
|
|
27
|
+
// between. (freq-1)/(N-1) — not freq/N — so unique content never inflates the
|
|
28
|
+
// ratio (which freq/N does, giving every block at least 1/N). N>=2 here, so
|
|
29
|
+
// N-1>=1: no division by zero. Removes the binary skeleton cliff entirely.
|
|
30
|
+
const blockWeight = (block) => {
|
|
31
|
+
const freq = blockFrequency.get(block) ?? 0;
|
|
32
|
+
return (freq - 1) / (N - 1);
|
|
33
|
+
};
|
|
26
34
|
const findings = [];
|
|
27
35
|
pages.forEach((page, index) => {
|
|
28
36
|
const blocks = pageBlocks[index];
|
|
29
|
-
if (blocks.length === 0)
|
|
37
|
+
if (blocks.length === 0)
|
|
30
38
|
return;
|
|
31
|
-
|
|
32
|
-
const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
33
|
-
const boilerplateWords = blocks
|
|
34
|
-
.filter((b) => skeleton.has(b))
|
|
35
|
-
.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
39
|
+
const totalWords = blocks.reduce((sum, b) => sum + wordCount(b), 0);
|
|
36
40
|
if (totalWords === 0)
|
|
37
41
|
return;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
// Weighted boilerplate word count: each block contributes (weight * its word count).
|
|
43
|
+
const weightedBoilerplateWords = blocks.reduce((sum, b) => {
|
|
44
|
+
return sum + blockWeight(b) * wordCount(b);
|
|
45
|
+
}, 0);
|
|
46
|
+
const ratio = weightedBoilerplateWords / totalWords;
|
|
47
|
+
if (ratio <= maxRatio)
|
|
48
|
+
return;
|
|
49
|
+
// 2-band severity: clearly over (≥ threshold + 0.1) → error; just over → warning.
|
|
50
|
+
const clearlyOver = ratio >= maxRatio + 0.1;
|
|
51
|
+
const severity = clearlyOver ? "error" : "warning";
|
|
52
|
+
const confidence = clearlyOver ? "high" : "medium";
|
|
53
|
+
findings.push({
|
|
54
|
+
ruleId: "spam/boilerplate-ratio",
|
|
55
|
+
severity,
|
|
56
|
+
confidence,
|
|
57
|
+
pageUrl: page.url,
|
|
58
|
+
message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
|
|
59
|
+
fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
|
|
60
|
+
});
|
|
47
61
|
});
|
|
48
62
|
return findings;
|
|
49
63
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACnC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,0EAA0E;IAC1E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,4EAA4E;IAC5E,2EAA2E;IAC3E,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,MAAM,WAAW,GAAG,CAAC,KAAa,EAAU,EAAE;QAC5C,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpE,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,qFAAqF;QACrF,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YACxD,OAAO,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,MAAM,KAAK,GAAG,wBAAwB,GAAG,UAAU,CAAC;QAEpD,IAAI,KAAK,IAAI,QAAQ;YAAE,OAAO;QAE9B,kFAAkF;QAClF,MAAM,WAAW,GAAG,KAAK,IAAI,QAAQ,GAAG,GAAG,CAAC;QAC5C,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACnD,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SACnQ,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AA8BzE,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,UAAU,EAAE,CA0Bd"}
|
|
@@ -1,17 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coarsen a structureSignature ("tag:count|tag:count|...") by bucketing each
|
|
3
|
+
* tag's count logarithmically. Pages that differ only by trivial chrome — one
|
|
4
|
+
* extra ad `<div>`, a conditional nav item — collapse to the SAME coarse
|
|
5
|
+
* signature, so a genuinely single-template site is no longer read as "diverse"
|
|
6
|
+
* from count noise (the false negative the exact-count fingerprint caused).
|
|
7
|
+
*
|
|
8
|
+
* The raw exact-count signature (parser.buildStructureSignature) is SHARED with
|
|
9
|
+
* spam/near-duplicate and spam/doorway-pattern and is deliberately left
|
|
10
|
+
* untouched — this coarsening is local to the diversity measure.
|
|
11
|
+
*/
|
|
12
|
+
function coarsenSignature(signature) {
|
|
13
|
+
if (!signature)
|
|
14
|
+
return signature;
|
|
15
|
+
return signature
|
|
16
|
+
.split("|")
|
|
17
|
+
.map((pair) => {
|
|
18
|
+
const idx = pair.lastIndexOf(":");
|
|
19
|
+
if (idx < 0)
|
|
20
|
+
return pair;
|
|
21
|
+
const tag = pair.slice(0, idx);
|
|
22
|
+
const count = Number(pair.slice(idx + 1));
|
|
23
|
+
if (!Number.isFinite(count))
|
|
24
|
+
return pair;
|
|
25
|
+
// log2 bucket: 1→1, 2-3→1, 4-7→2 … 32-63→5, 64-127→6. Trivial count
|
|
26
|
+
// differences land in the same bucket; an order-of-magnitude change does not.
|
|
27
|
+
return `${tag}:${Math.floor(Math.log2(count + 1))}`;
|
|
28
|
+
})
|
|
29
|
+
.join("|");
|
|
30
|
+
}
|
|
1
31
|
export function templateDiversityRule(pages, minUniqueRatio) {
|
|
2
32
|
if (pages.length === 0) {
|
|
3
33
|
return [];
|
|
4
34
|
}
|
|
5
|
-
const unique = new Set(pages.map((page) => page.structureSignature)).size;
|
|
35
|
+
const unique = new Set(pages.map((page) => coarsenSignature(page.structureSignature))).size;
|
|
6
36
|
const ratio = unique / pages.length;
|
|
7
37
|
if (ratio >= minUniqueRatio) {
|
|
8
38
|
return [];
|
|
9
39
|
}
|
|
40
|
+
// Confidence band: a ratio far below the floor is a stronger single-template
|
|
41
|
+
// signal than one hovering just under it.
|
|
42
|
+
const confidence = ratio < minUniqueRatio / 2 ? "high" : "medium";
|
|
10
43
|
return [
|
|
11
44
|
{
|
|
12
45
|
ruleId: "spam/template-diversity",
|
|
13
46
|
severity: "warning",
|
|
14
|
-
|
|
47
|
+
confidence,
|
|
48
|
+
message: `Template diversity ratio is ${ratio.toFixed(2)} (min ${minUniqueRatio.toFixed(2)}); ` +
|
|
49
|
+
`the ${pages.length} pages collapse to ${unique} distinct structural shapes after ignoring minor chrome variation.`,
|
|
15
50
|
fix: "Vary the HTML structure across pages. Add conditional sections, different layouts, or page-specific components. Identical-structure corpora are a primary scaled-content-abuse signal that the March 27, 2026 core update reinforced."
|
|
16
51
|
}
|
|
17
52
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;GAUG;AACH,SAAS,gBAAgB,CAAC,SAAiB;IACzC,IAAI,CAAC,SAAS;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,SAAS;SACb,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzC,oEAAoE;QACpE,8EAA8E;QAC9E,OAAO,GAAG,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,CAAC,CAAC;SACD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5F,MAAM,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;QAC5B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,6EAA6E;IAC7E,0CAA0C;IAC1C,MAAM,UAAU,GAAe,KAAK,GAAG,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE9E,OAAO;QACL;YACE,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,SAAS;YACnB,UAAU;YACV,OAAO,EACL,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACtF,OAAO,KAAK,CAAC,MAAM,sBAAsB,MAAM,oEAAoE;YACrH,GAAG,EAAE,uOAAuO;SAC7O;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -6,9 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
|
|
|
6
6
|
*
|
|
7
7
|
* Required: og:title, og:description, og:image.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
9
|
+
* Severity gradation:
|
|
10
|
+
* - warning: og:title or og:description is missing (core social-card identity
|
|
11
|
+
* fields that affect how a link appears in feeds and AI summaries).
|
|
12
|
+
* - info: only og:image is missing (cosmetic — the card still has a title
|
|
13
|
+
* and description; the missing image is low-priority).
|
|
14
|
+
*
|
|
15
|
+
* Presence check: a field is considered MISSING when it is absent, empty, or
|
|
16
|
+
* whitespace-only (value is trimmed before evaluation).
|
|
12
17
|
*/
|
|
13
18
|
export declare function ogCompletenessRule(pages: ParsedPage[]): RuleResult[];
|
|
14
19
|
//# sourceMappingURL=og-completeness.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D
|
|
1
|
+
{"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBpE"}
|
|
@@ -5,25 +5,33 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Required: og:title, og:description, og:image.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* Severity gradation:
|
|
9
|
+
* - warning: og:title or og:description is missing (core social-card identity
|
|
10
|
+
* fields that affect how a link appears in feeds and AI summaries).
|
|
11
|
+
* - info: only og:image is missing (cosmetic — the card still has a title
|
|
12
|
+
* and description; the missing image is low-priority).
|
|
13
|
+
*
|
|
14
|
+
* Presence check: a field is considered MISSING when it is absent, empty, or
|
|
15
|
+
* whitespace-only (value is trimmed before evaluation).
|
|
11
16
|
*/
|
|
12
17
|
export function ogCompletenessRule(pages) {
|
|
13
18
|
const findings = [];
|
|
14
19
|
for (const page of pages) {
|
|
15
20
|
const missing = [];
|
|
16
|
-
if (!page.og.title)
|
|
21
|
+
if (!page.og.title.trim())
|
|
17
22
|
missing.push("og:title");
|
|
18
|
-
if (!page.og.description)
|
|
23
|
+
if (!page.og.description.trim())
|
|
19
24
|
missing.push("og:description");
|
|
20
|
-
if (!page.og.image)
|
|
25
|
+
if (!page.og.image.trim())
|
|
21
26
|
missing.push("og:image");
|
|
22
27
|
if (missing.length === 0)
|
|
23
28
|
continue;
|
|
29
|
+
const missingCore = missing.some((f) => f === "og:title" || f === "og:description");
|
|
30
|
+
const severity = missingCore ? "warning" : "info";
|
|
24
31
|
findings.push({
|
|
25
32
|
ruleId: "tech/og-completeness",
|
|
26
|
-
severity
|
|
33
|
+
severity,
|
|
34
|
+
confidence: missingCore ? "high" : "medium",
|
|
27
35
|
message: `${page.url} is missing Open Graph tags: ${missing.join(", ")}.`,
|
|
28
36
|
pageUrl: page.url,
|
|
29
37
|
fix: `Add the missing meta tags inside <head>: ${missing.map((tag) => `<meta property="${tag}" content="...">`).join(" ")}.`,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"og-completeness.js","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAEA
|
|
1
|
+
{"version":3,"file":"og-completeness.js","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACpD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAChE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACpD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEnC,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,UAAU,IAAI,CAAC,KAAK,gBAAgB,CAAC,CAAC;QACpF,MAAM,QAAQ,GAA2B,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC;QAE1E,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ;YAC3C,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,gCAAgC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACzE,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,4CAA4C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,mBAAmB,GAAG,kBAAkB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG;SAC7H,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pseolint/core",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.2",
|
|
4
4
|
"description": "Programmatic SEO audit engine — 32 rules across 4 categories (integrity, discoverability, citation, data) for SpamBrain risk + AI Overview citability. v0.4 verdict ladder + site classifier.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Ouranos Labs <contact@ouranos-labs.dev>",
|