@pseolint/core 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/rules/content/eeat-signals.d.ts +13 -0
  2. package/dist/rules/content/eeat-signals.d.ts.map +1 -1
  3. package/dist/rules/content/eeat-signals.js +36 -4
  4. package/dist/rules/content/eeat-signals.js.map +1 -1
  5. package/dist/rules/content/value-add.d.ts +8 -2
  6. package/dist/rules/content/value-add.d.ts.map +1 -1
  7. package/dist/rules/content/value-add.js +39 -48
  8. package/dist/rules/content/value-add.js.map +1 -1
  9. package/dist/rules/content/wikipedia-paraphrase.d.ts +12 -7
  10. package/dist/rules/content/wikipedia-paraphrase.d.ts.map +1 -1
  11. package/dist/rules/content/wikipedia-paraphrase.js +52 -13
  12. package/dist/rules/content/wikipedia-paraphrase.js.map +1 -1
  13. package/dist/rules/schema/consistency.d.ts.map +1 -1
  14. package/dist/rules/schema/consistency.js +16 -12
  15. package/dist/rules/schema/consistency.js.map +1 -1
  16. package/dist/rules/schema/json-ld-valid.d.ts.map +1 -1
  17. package/dist/rules/schema/json-ld-valid.js +8 -1
  18. package/dist/rules/schema/json-ld-valid.js.map +1 -1
  19. package/dist/rules/schema/required-fields.d.ts.map +1 -1
  20. package/dist/rules/schema/required-fields.js +47 -1
  21. package/dist/rules/schema/required-fields.js.map +1 -1
  22. package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -1
  23. package/dist/rules/spam/boilerplate-ratio.js +36 -22
  24. package/dist/rules/spam/boilerplate-ratio.js.map +1 -1
  25. package/dist/rules/spam/template-diversity.d.ts.map +1 -1
  26. package/dist/rules/spam/template-diversity.js +37 -2
  27. package/dist/rules/spam/template-diversity.js.map +1 -1
  28. package/dist/rules/tech/og-completeness.d.ts +8 -3
  29. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  30. package/dist/rules/tech/og-completeness.js +15 -7
  31. package/dist/rules/tech/og-completeness.js.map +1 -1
  32. package/package.json +1 -1
@@ -1,3 +1,16 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
+ /**
3
+ * Count how many of the 4 E-E-A-T signal categories the page satisfies.
4
+ *
5
+ * Categories:
6
+ * 1. About-page link — a same-host href containing /about
7
+ * 2. Author identity — any authorSignals field present
8
+ * 3. Published date — page.publishedDate set
9
+ * 4. Transparency text — "sources:", "references:", "last updated", etc.
10
+ * in page.contentText (NOT raw HTML, to avoid footer/JS false positives)
11
+ *
12
+ * Exported so value-add can reuse this without duplicating logic.
13
+ */
14
+ export declare function countSignalCategories(page: ParsedPage): number;
2
15
  export declare function eeatSignalsRule(pages: ParsedPage[]): RuleResult[];
3
16
  //# sourceMappingURL=eeat-signals.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"eeat-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAiC7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBjE"}
1
+ {"version":3,"file":"eeat-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAU7D;;;;;;;;;;;GAWG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAiC9D;AAED,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBjE"}
@@ -1,23 +1,55 @@
1
- const EEAT_HTML_PATTERNS = [
1
+ const EEAT_TEXT_PATTERNS = [
2
2
  /last\s+updated/i,
3
3
  /last\s+modified/i,
4
4
  /reviewed\s+by/i,
5
5
  /\bsources:/i,
6
6
  /\breferences:/i
7
7
  ];
8
- function countSignalCategories(page) {
8
+ /**
9
+ * Count how many of the 4 E-E-A-T signal categories the page satisfies.
10
+ *
11
+ * Categories:
12
+ * 1. About-page link — a same-host href containing /about
13
+ * 2. Author identity — any authorSignals field present
14
+ * 3. Published date — page.publishedDate set
15
+ * 4. Transparency text — "sources:", "references:", "last updated", etc.
16
+ * in page.contentText (NOT raw HTML, to avoid footer/JS false positives)
17
+ *
18
+ * Exported so value-add can reuse this without duplicating logic.
19
+ */
20
+ export function countSignalCategories(page) {
9
21
  let count = 0;
10
- if (page.resolvedHrefs.some((href) => /\/about\b/i.test(href))) {
22
+ // 1. About-page: same-host link with /about in the path
23
+ const pageHost = (() => {
24
+ try {
25
+ return new URL(page.url).host;
26
+ }
27
+ catch {
28
+ return "";
29
+ }
30
+ })();
31
+ if (page.resolvedHrefs.some((href) => {
32
+ try {
33
+ const u = new URL(href);
34
+ return u.host === pageHost && /\/about\b/i.test(u.pathname);
35
+ }
36
+ catch {
37
+ return false;
38
+ }
39
+ })) {
11
40
  count += 1;
12
41
  }
42
+ // 2. Author identity
13
43
  const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
14
44
  if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink) {
15
45
  count += 1;
16
46
  }
47
+ // 3. Published date
17
48
  if (page.publishedDate) {
18
49
  count += 1;
19
50
  }
20
- if (EEAT_HTML_PATTERNS.some((pattern) => pattern.test(page.html))) {
51
+ // 4. Transparency text — check contentText (parsed visible text), not raw HTML
52
+ if (EEAT_TEXT_PATTERNS.some((pattern) => pattern.test(page.contentText))) {
21
53
  count += 1;
22
54
  }
23
55
  return count;
@@ -1 +1 @@
1
- {"version":3,"file":"eeat-signals.js","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,qBAAqB,CAAC,IAAgB;IAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QAC/D,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa,EAAE,CAAC;QACxE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QAClE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAExE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC;gBACN,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,OAAO,OAAO,CAAC,MAAM,mGAAmG;gBACjI,GAAG,EAAE,oHAAoH;gBACzH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;aAC9C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;QAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,0GAA0G;KAChH,CAAC,CAAC,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"eeat-signals.js","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAgB;IACpD,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,wDAAwD;IACxD,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YAAC,OAAO,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,EAAE,CAAC;QAAC,CAAC;IAC7D,CAAC,CAAC,EAAE,CAAC;IACL,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;QACnC,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;YACxB,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC9D,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,KAAK,CAAC;QAAC,CAAC;IAC3B,CAAC,CAAC,EAAE,CAAC;QACH,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,qBAAqB;IACrB,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa,EAAE,CAAC;QACxE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,+EAA+E;IAC/E,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,EAAE,CAAC;QACzE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAExE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC;gBACN,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,OAAO,OAAO,CAAC,MAAM,mGAAmG;gBACjI,GAAG,EAAE,oHAAoH;gBACzH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;aAC9C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;QAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,0GAA0G;KAChH,CAAC,CAAC,CAAC;AACN,CAAC"}
@@ -6,8 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
6
6
  * Aggregates 7 per-page signal scores (originality, freshness, facts,
7
7
  * E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
8
8
  * single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
9
- * Fires ONE critical/error finding per page when score < 0.5
10
- * (critical < 0.3, error otherwise).
9
+ *
10
+ * E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
11
+ * a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
12
+ * logic drift between the two rules.
13
+ *
14
+ * Fires ONE finding per page when score < 0.5:
15
+ * - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
16
+ * - error (score < 0.35) — clearly low value-add
11
17
  */
12
18
  export declare function valueAddRule(pages: ParsedPage[], findings: RuleResult[]): RuleResult[];
13
19
  //# sourceMappingURL=value-add.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AAuIvE;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
1
+ {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AA0HvE;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoBtF"}
@@ -1,26 +1,6 @@
1
1
  import { hasAuthoritativeCitation } from "../../algorithms/fact-extraction.js";
2
+ import { countSignalCategories } from "./eeat-signals.js";
2
3
  const RULE_ID = "content/value-add";
3
- const EEAT_HTML_PATTERNS = [
4
- /last\s+updated/i,
5
- /last\s+modified/i,
6
- /reviewed\s+by/i,
7
- /\bsources:/i,
8
- /\breferences:/i,
9
- ];
10
- function countEeatCategories(page) {
11
- let count = 0;
12
- if (page.resolvedHrefs.some((h) => /\/about\b/i.test(h)))
13
- count += 1;
14
- const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
15
- if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink)
16
- count += 1;
17
- if (page.publishedDate)
18
- count += 1;
19
- if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)) ||
20
- hasAuthoritativeCitation(page.resolvedHrefs, page.url))
21
- count += 1;
22
- return count;
23
- }
24
4
  function computeSignals(page, allFindings) {
25
5
  const pageFindings = allFindings.filter((f) => f.pageUrl === page.url);
26
6
  // Originality: 1.0 if regurgitated-content doesn't fire, 0.0 if it does
@@ -50,18 +30,14 @@ function computeSignals(page, allFindings) {
50
30
  else {
51
31
  facts = 0.0;
52
32
  }
53
- // E-E-A-T: based on signal count
54
- const eeatCount = countEeatCategories(page);
55
- let eeat;
56
- if (eeatCount >= 4) {
57
- eeat = 1.0;
58
- }
59
- else if (eeatCount >= 2) {
60
- eeat = 0.5;
61
- }
62
- else {
63
- eeat = 0.0;
64
- }
33
+ // E-E-A-T: continuous fraction of 4 categories present.
34
+ // Reuses countSignalCategories from eeat-signals (no duplicate logic).
35
+ // Also grants the "sources" credit for authoritative outbound citations.
36
+ const eeatCount = countSignalCategories(page);
37
+ const hasCitation = hasAuthoritativeCitation(page.resolvedHrefs, page.url);
38
+ // Clamp to max 4 after adding citation credit (if sources category already counted it won't double-count)
39
+ const effectiveEeatCount = Math.min(4, eeatCount + (hasCitation && eeatCount < 4 ? 1 : 0));
40
+ const eeat = effectiveEeatCount / 4;
65
41
  // Translation: 1.0 unless translation-no-op lists this page
66
42
  const hasTranslationNoOp = allFindings.some((f) => f.ruleId === "content/translation-no-op" &&
67
43
  (f.pageUrl === page.url || (f.relatedUrls ?? []).includes(page.url)));
@@ -69,13 +45,7 @@ function computeSignals(page, allFindings) {
69
45
  // Cliché reuse (signal 6): 1.0 if common-phrase-reuse doesn't fire, 0.0 if it does
70
46
  const hasClicheReuse = pageFindings.some((f) => f.ruleId === "content/common-phrase-reuse");
71
47
  const clicheReuse = hasClicheReuse ? 0.0 : 1.0;
72
- // Wikipedia paraphrase (signal 7, v0.5.14): 1.0 if wikipedia-paraphrase doesn't
73
- // fire on this page, 0.0 if it does. The rule fires at warning/low when
74
- // page text overlaps ≥40% with the bundled trigram corpus — a real signal
75
- // for "content lifted from Wikipedia," orthogonal to the other 6 originality
76
- // proxies. Adding it shifts each signal's weight from 1/6 (16.7%) to 1/7
77
- // (14.3%) — boundary cases at score=0.30 and score=0.50 may shift by
78
- // ±0.024 per signal, which is below the granularity of severity bands.
48
+ // Wikipedia paraphrase (signal 7): 1.0 if wikipedia-paraphrase doesn't fire, 0.0 if it does
79
49
  const hasWikipediaParaphrase = pageFindings.some((f) => f.ruleId === "content/wikipedia-paraphrase");
80
50
  const wikipediaParaphrase = hasWikipediaParaphrase ? 0.0 : 1.0;
81
51
  return { originality, freshness, facts, eeat, translation, clicheReuse, wikipediaParaphrase };
@@ -92,10 +62,24 @@ function meanScore(signals) {
92
62
  ];
93
63
  return values.reduce((a, b) => a + b, 0) / values.length;
94
64
  }
95
- function severityForScore(score) {
96
- if (score < 0.3)
97
- return "critical";
98
- return "error";
65
+ /**
66
+ * Two-band severity for the composite score:
67
+ * - score in [0.35, 0.5) → "warning" (borderline: page is weak but not egregiously thin)
68
+ * - score < 0.35 → "error" (clearly low value-add)
69
+ *
70
+ * Confidence scales with distance from the fire threshold:
71
+ * - score < 0.2 → "high"
72
+ * - score in [0.2, 0.35) → "medium"
73
+ * - score in [0.35, 0.5) → "low" (borderline warning)
74
+ */
75
+ function severityAndConfidence(score) {
76
+ if (score >= 0.35) {
77
+ return { severity: "warning", confidence: "low" };
78
+ }
79
+ if (score < 0.2) {
80
+ return { severity: "error", confidence: "high" };
81
+ }
82
+ return { severity: "error", confidence: "medium" };
99
83
  }
100
84
  function buildMessage(page, score, signals) {
101
85
  const pct = (v) => `${(v * 100).toFixed(0)}%`;
@@ -119,8 +103,14 @@ function buildMessage(page, score, signals) {
119
103
  * Aggregates 7 per-page signal scores (originality, freshness, facts,
120
104
  * E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
121
105
  * single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
122
- * Fires ONE critical/error finding per page when score < 0.5
123
- * (critical < 0.3, error otherwise).
106
+ *
107
+ * E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
108
+ * a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
109
+ * logic drift between the two rules.
110
+ *
111
+ * Fires ONE finding per page when score < 0.5:
112
+ * - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
113
+ * - error (score < 0.35) — clearly low value-add
124
114
  */
125
115
  export function valueAddRule(pages, findings) {
126
116
  const results = [];
@@ -129,10 +119,11 @@ export function valueAddRule(pages, findings) {
129
119
  const score = meanScore(signals);
130
120
  if (score >= 0.5)
131
121
  continue;
122
+ const { severity, confidence } = severityAndConfidence(score);
132
123
  results.push({
133
124
  ruleId: RULE_ID,
134
- severity: severityForScore(score),
135
- confidence: "medium",
125
+ severity,
126
+ confidence,
136
127
  message: buildMessage(page, score, signals),
137
128
  fix: "Add proprietary content (original analysis, primary-source data, expert commentary, original imagery) to lift the value-add score above 0.5. Score is a composite — improve any underweight signal.",
138
129
  pageUrl: page.url,
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAE/E,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IACE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC;QACtD,KAAK,IAAI,CAAC,CAAC;IACb,OAAO,KAAK,CAAC;AACf,CAAC;AAYD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,gFAAgF;IAChF,wEAAwE;IACxE,0EAA0E;IAC1E,6EAA6E;IAC7E,yEAAyE;IACzE,qEAAqE;IACrE,uEAAuE;IACvE,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAYpC,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,wDAAwD;IACxD,uEAAuE;IACvE,yEAAyE;IACzE,MAAM,SAAS,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3E,0GAA0G;IAC1G,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,WAAW,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3F,MAAM,IAAI,GAAG,kBAAkB,GAAG,CAAC,CAAC;IAEpC,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,4FAA4F;IAC5F,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,qBAAqB,CAAC,KAAa;IAC1C,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;IACpD,CAAC;IACD,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;QAChB,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IACnD,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AACrD,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -1,15 +1,20 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
2
  /**
3
- * content/wikipedia-paraphrase — standalone originality signal (v0.5.14).
3
+ * content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
4
4
  *
5
- * Detects pages whose contentText has high trigram overlap with the curated
6
- * Wikipedia reference corpus. High overlap indicates paraphrased or verbatim
7
- * encyclopedic content that adds no proprietary value.
5
+ * Detects pages whose contentText has unusually high trigram overlap with the
6
+ * bundled Wikipedia reference corpus. This is a weak, advisory signal only:
7
+ * trigram overlap cannot distinguish actual paraphrase from legitimate topical
8
+ * proximity (e.g. a legal-template page naturally shares many encyclopedic
9
+ * trigrams with Wikipedia articles on the same topic).
8
10
  *
9
- * Composite integration into content/value-add is deferred to v0.5.15 to
10
- * avoid test-math recalibration in this release.
11
+ * Two guards reduce false positives:
12
+ * 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
13
+ * words) are skipped entirely — bloom noise alone dominates on short pages.
14
+ * 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
15
+ * (~5%) and typical topical-proximity baseline.
11
16
  *
12
- * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.4).
17
+ * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
13
18
  */
14
19
  export declare function wikipediaParaphraseRule(pages: ParsedPage[]): RuleResult[];
15
20
  //# sourceMappingURL=wikipedia-paraphrase.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAM7D;;;;;;;;;;;GAWG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBzE"}
1
+ {"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAkC7D;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgCzE"}
@@ -1,23 +1,61 @@
1
1
  import { wikipediaParaphraseRate } from "../../algorithms/wikipedia-paraphrase.js";
2
2
  const RULE_ID = "content/wikipedia-paraphrase";
3
- const THRESHOLD = 0.4;
4
3
  /**
5
- * content/wikipedia-paraphrase standalone originality signal (v0.5.14).
4
+ * ponytail: MIN_TRIGRAM_COUNT = 200
6
5
  *
7
- * Detects pages whose contentText has high trigram overlap with the curated
8
- * Wikipedia reference corpus. High overlap indicates paraphrased or verbatim
9
- * encyclopedic content that adds no proprietary value.
6
+ * The bloom filter has a ~5% per-query false-positive rate. On a page with
7
+ * N trigrams the expected bloom-noise hit count is 0.05 * N. For a short page
8
+ * (~48 trigrams) that alone produces ~2.4 expected FP hits; with a threshold
9
+ * of 40% (19/48) the noise alone can exceed the threshold on short pages.
10
10
  *
11
- * Composite integration into content/value-add is deferred to v0.5.15 to
12
- * avoid test-math recalibration in this release.
11
+ * Setting a floor of 200 trigrams (~202 words) means bloom noise contributes
12
+ * at most 10 / 200 = 5% of trigrams, far below the raised THRESHOLD, so noise
13
+ * cannot trigger the rule on its own.
14
+ */
15
+ const MIN_TRIGRAM_COUNT = 200;
16
+ /**
17
+ * ponytail: THRESHOLD = 0.55
18
+ *
19
+ * Raised from 0.40 to 0.55 to account for the bloom filter's ~5% per-query
20
+ * FP rate and the "topic overlap" effect: legal/medical/geography pSEO pages
21
+ * share many encyclopedic trigrams ("the united states", "in the state of")
22
+ * purely through topical proximity, not paraphrase. A 55% overlap is
23
+ * substantially above both the noise floor (~5%) and the expected topic-
24
+ * overlap baseline, making the signal meaningfully indicative of genuine
25
+ * encyclopedic reuse. At this level the rule remains advisory (confidence:
26
+ * "low") because trigram overlap cannot distinguish paraphrase from topic
27
+ * proximity — it is a weak signal, not a verdict.
28
+ */
29
+ const THRESHOLD = 0.55;
30
+ /**
31
+ * content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
32
+ *
33
+ * Detects pages whose contentText has unusually high trigram overlap with the
34
+ * bundled Wikipedia reference corpus. This is a weak, advisory signal only:
35
+ * trigram overlap cannot distinguish actual paraphrase from legitimate topical
36
+ * proximity (e.g. a legal-template page naturally shares many encyclopedic
37
+ * trigrams with Wikipedia articles on the same topic).
13
38
  *
14
- * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.4).
39
+ * Two guards reduce false positives:
40
+ * 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
41
+ * words) are skipped entirely — bloom noise alone dominates on short pages.
42
+ * 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
43
+ * (~5%) and typical topical-proximity baseline.
44
+ *
45
+ * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
15
46
  */
16
47
  export function wikipediaParaphraseRule(pages) {
17
48
  const findings = [];
18
49
  for (const page of pages) {
19
50
  if (!page.contentText || page.contentText.trim().length === 0)
20
51
  continue;
52
+ // Estimate trigram count without re-implementing extractTrigrams: count
53
+ // whitespace-separated tokens then subtract 2 (trigrams = tokens - 2).
54
+ // This is a cheap proxy; the algorithm file does the accurate extraction.
55
+ const tokenCount = page.contentText.trim().split(/\s+/).length;
56
+ const estimatedTrigrams = Math.max(0, tokenCount - 2);
57
+ if (estimatedTrigrams < MIN_TRIGRAM_COUNT)
58
+ continue;
21
59
  const rate = wikipediaParaphraseRate(page.contentText);
22
60
  if (rate < THRESHOLD)
23
61
  continue;
@@ -27,11 +65,12 @@ export function wikipediaParaphraseRule(pages) {
27
65
  severity: "warning",
28
66
  confidence: "low",
29
67
  pageUrl: page.url,
30
- message: `${page.url} contains content with high trigram overlap (${pct}%) against the Wikipedia ` +
31
- `reference corpus. May indicate paraphrased or copy-pasted Wikipedia content.`,
32
- fix: "Replace borrowed encyclopedic phrasing with original analysis specific to this page's " +
33
- "subject. Even if attributed, high paraphrase rates correlate with low value-add by " +
34
- "SpamBrain's helpful-content metric.",
68
+ message: `${page.url} has high trigram overlap (${pct}%) with the bundled Wikipedia ` +
69
+ `reference corpus. This is an advisory signal trigram overlap can reflect ` +
70
+ `topical proximity as well as copied content and cannot distinguish the two.`,
71
+ fix: "Review for borrowed encyclopedic phrasing and replace with original analysis " +
72
+ "specific to this page's subject. Even if attributed, high paraphrase rates " +
73
+ "correlate with low value-add by SpamBrain's helpful-content metric.",
35
74
  });
36
75
  }
37
76
  return findings;
@@ -1 +1 @@
1
- {"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAC/C,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QACxE,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAC/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,gDAAgD,GAAG,2BAA2B;gBACzF,8EAA8E;YAChF,GAAG,EACD,wFAAwF;gBACxF,qFAAqF;gBACrF,qCAAqC;SACxC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;;;;;;;;;;GAYG;AACH,MAAM,SAAS,GAAG,IAAI,CAAC;AAEvB;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAExE,wEAAwE;QACxE,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;QACtD,IAAI,iBAAiB,GAAG,iBAAiB;YAAE,SAAS;QAEpD,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAE/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,8BAA8B,GAAG,gCAAgC;gBAC5E,6EAA6E;gBAC7E,6EAA6E;YAC/E,GAAG,EACD,+EAA+E;gBAC/E,6EAA6E;gBAC7E,qEAAqE;SACxE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA6DvE"}
1
+ {"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgEvE"}
@@ -28,27 +28,31 @@ export function schemaConsistencyRule(pages) {
28
28
  }
29
29
  clustersBySignature.get(sig).push({ url: page.url, types });
30
30
  }
31
- // Within each cluster of ≥2 pages, check whether all pages use the same @type set.
31
+ // Within each cluster of ≥2 pages, fire only when pages carry DIFFERENT @type
32
+ // SETS. A single page legitimately emits several JSON-LD blocks (e.g. Article +
33
+ // FAQPage + Organization) — that multi-type set is not an inconsistency. The
34
+ // problem is two pages on the SAME template disagreeing on their type set
35
+ // (e.g. one Article, one NewsArticle). Comparing per-page set signatures (not
36
+ // the union) avoids the false positive where every page shares the same set.
37
+ const setSignature = (types) => Array.from(types).sort().join("+");
32
38
  for (const members of clustersBySignature.values()) {
33
39
  if (members.length < 2) {
34
40
  continue;
35
41
  }
36
- const allTypesInCluster = new Set();
37
- for (const { types } of members) {
38
- for (const t of types) {
39
- allTypesInCluster.add(t);
40
- }
41
- }
42
- if (allTypesInCluster.size <= 1) {
43
- continue;
42
+ const distinctSetSignatures = new Set(members.map((m) => setSignature(m.types)));
43
+ if (distinctSetSignatures.size <= 1) {
44
+ continue; // all pages in this template cluster agree on their @type set
44
45
  }
45
- const typeList = Array.from(allTypesInCluster).sort().join(", ");
46
+ const variants = Array.from(distinctSetSignatures)
47
+ .sort()
48
+ .map((s) => `[${s.split("+").join(", ")}]`)
49
+ .join(" vs ");
46
50
  findings.push({
47
51
  ruleId: "schema/consistency",
48
52
  severity: "info",
49
- message: `Template pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
53
+ message: `Template pages disagree on schema @type (${variants}). Use a consistent @type across pages that share the same template structure.`,
50
54
  relatedUrls: members.map((m) => m.url),
51
- fix: `Use a consistent @type across all pages that share the same template structure.`
55
+ fix: `Use a consistent @type (or set of @types) across all pages that share the same template structure.`
52
56
  });
53
57
  }
54
58
  return findings;
@@ -1 +1 @@
1
- {"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,mFAAmF;IACnF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAU,CAAC;QAC5C,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,0CAA0C,QAAQ,6DAA6D;YACxH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,iFAAiF;SACvF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,8EAA8E;IAC9E,gFAAgF;IAChF,6EAA6E;IAC7E,0EAA0E;IAC1E,8EAA8E;IAC9E,6EAA6E;IAC7E,MAAM,YAAY,GAAG,CAAC,KAAkB,EAAU,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,qBAAqB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YACpC,SAAS,CAAC,8DAA8D;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;aAC/C,IAAI,EAAE;aACN,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;aAC1C,IAAI,CAAC,MAAM,CAAC,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,4CAA4C,QAAQ,gFAAgF;YAC7I,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,oGAAoG;SAC1G,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAqDjE"}
1
+ {"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA8DjE"}
@@ -30,7 +30,14 @@ export function jsonLdValidRule(pages) {
30
30
  }
31
31
  if (obj["@type"] !== undefined) {
32
32
  const typeValue = obj["@type"];
33
- if (typeof typeValue !== "string" || typeValue.trim() === "") {
33
+ const typeIsValid =
34
+ // string: non-empty non-whitespace
35
+ (typeof typeValue === "string" && typeValue.trim() !== "") ||
36
+ // array: non-empty, every element is a non-empty non-whitespace string
37
+ (Array.isArray(typeValue) &&
38
+ typeValue.length > 0 &&
39
+ typeValue.every((t) => typeof t === "string" && t.trim() !== ""));
40
+ if (!typeIsValid) {
34
41
  findings.push({
35
42
  ruleId: "schema/json-ld-valid",
36
43
  severity: "error",
@@ -1 +1 @@
1
- {"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;oBAC7D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,MAAM,WAAW;gBACf,mCAAmC;gBACnC,CAAC,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;oBAC1D,uEAAuE;oBACvE,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC;wBACvB,SAAS,CAAC,MAAM,GAAG,CAAC;wBACnB,SAAuB,CAAC,KAAK,CAC5B,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAChD,CAAC,CAAC;gBACP,IAAI,CAAC,WAAW,EAAE,CAAC;oBACjB,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAqB7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoDpE"}
1
+ {"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyD7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwDpE"}
@@ -3,6 +3,47 @@ const REQUIRED_FIELDS = {
3
3
  Product: ["name"],
4
4
  FAQPage: ["mainEntity"]
5
5
  };
6
+ /**
7
+ * Returns true when a field value should be treated as "missing" (junk/empty).
8
+ * Accepts non-empty strings, non-empty arrays, and non-empty objects as present.
9
+ */
10
+ function isMissing(value) {
11
+ if (value === undefined || value === null)
12
+ return true;
13
+ if (typeof value === "string")
14
+ return value.trim() === "";
15
+ if (Array.isArray(value))
16
+ return value.length === 0;
17
+ if (typeof value === "object")
18
+ return Object.keys(value).length === 0;
19
+ // booleans (false/true) and numbers other than checked above
20
+ if (typeof value === "boolean" || typeof value === "number")
21
+ return false;
22
+ return true;
23
+ }
24
+ /**
25
+ * Article `author` is valid when it is:
26
+ * - a non-empty string, OR
27
+ * - an object with a non-empty `name` property (Person/Organization), OR
28
+ * - a non-empty array of the above (co-authored articles — Schema.org allows
29
+ * `author` to be a list). Present if at least one element is a valid author.
30
+ * Returns true when the author value is missing/junk.
31
+ */
32
+ function isAuthorMissing(value) {
33
+ if (value === undefined || value === null)
34
+ return true;
35
+ if (typeof value === "string")
36
+ return value.trim() === "";
37
+ if (Array.isArray(value)) {
38
+ return value.length === 0 || value.every((item) => isAuthorMissing(item));
39
+ }
40
+ if (typeof value === "object") {
41
+ const obj = value;
42
+ return typeof obj.name !== "string" || obj.name.trim() === "";
43
+ }
44
+ // booleans, numbers — not a valid author shape
45
+ return true;
46
+ }
6
47
  function hasPrice(obj) {
7
48
  if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
8
49
  return true;
@@ -37,7 +78,12 @@ export function requiredFieldsRule(pages) {
37
78
  }
38
79
  const missing = [];
39
80
  for (const field of required) {
40
- if (obj[field] === undefined || obj[field] === null || obj[field] === "") {
81
+ if (field === "author" && schemaType === "Article") {
82
+ if (isAuthorMissing(obj[field])) {
83
+ missing.push(field);
84
+ }
85
+ }
86
+ else if (isMissing(obj[field])) {
41
87
  missing.push(field);
42
88
  }
43
89
  }
@@ -1 +1 @@
1
- {"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC;oBACzE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF;;;GAGG;AACH,SAAS,SAAS,CAAC,KAAc;IAC/B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,KAAe,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;IAChF,6DAA6D;IAC7D,IAAI,OAAO,KAAK,KAAK,SAAS,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC1E,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,OAAO,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,CAAC;IACD,+CAA+C;IAC/C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,KAAK,KAAK,QAAQ,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;oBACnD,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBAChC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;qBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAS7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CAoDxF"}
1
+ {"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAa7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CA8DxF"}
@@ -4,11 +4,16 @@ function extractTextBlocks(contentText) {
4
4
  .map((block) => block.trim().toLowerCase())
5
5
  .filter((block) => block.length > 20);
6
6
  }
7
+ function wordCount(block) {
8
+ return block.split(/\s+/).length;
9
+ }
7
10
  export function boilerplateRatioRule(pages, maxRatio) {
8
11
  if (pages.length < 2) {
9
12
  return [];
10
13
  }
14
+ const N = pages.length;
11
15
  const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
16
+ // Build per-block document frequency (how many pages contain each block).
12
17
  const blockFrequency = new Map();
13
18
  for (const blocks of pageBlocks) {
14
19
  const unique = new Set(blocks);
@@ -16,34 +21,43 @@ export function boilerplateRatioRule(pages, maxRatio) {
16
21
  blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
17
22
  }
18
23
  }
19
- const skeletonCutoff = Math.max(2, Math.floor(pages.length * 0.8) + 1);
20
- const skeleton = new Set(Array.from(blockFrequency.entries())
21
- .filter(([, count]) => count >= skeletonCutoff)
22
- .map(([block]) => block));
23
- if (skeleton.size === 0) {
24
- return [];
25
- }
24
+ // Continuous weight, min-max normalized over document frequency: a block
25
+ // unique to ONE page is not boilerplate at all (weight 0); a block on EVERY
26
+ // page is full boilerplate (weight 1); mid-frequency blocks scale linearly
27
+ // between. (freq-1)/(N-1) not freq/N — so unique content never inflates the
28
+ // ratio (which freq/N does, giving every block at least 1/N). N>=2 here, so
29
+ // N-1>=1: no division by zero. Removes the binary skeleton cliff entirely.
30
+ const blockWeight = (block) => {
31
+ const freq = blockFrequency.get(block) ?? 0;
32
+ return (freq - 1) / (N - 1);
33
+ };
26
34
  const findings = [];
27
35
  pages.forEach((page, index) => {
28
36
  const blocks = pageBlocks[index];
29
- if (blocks.length === 0) {
37
+ if (blocks.length === 0)
30
38
  return;
31
- }
32
- const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
33
- const boilerplateWords = blocks
34
- .filter((b) => skeleton.has(b))
35
- .reduce((sum, b) => sum + b.split(/\s+/).length, 0);
39
+ const totalWords = blocks.reduce((sum, b) => sum + wordCount(b), 0);
36
40
  if (totalWords === 0)
37
41
  return;
38
- const ratio = boilerplateWords / totalWords;
39
- if (ratio > maxRatio) {
40
- findings.push({
41
- ruleId: "spam/boilerplate-ratio",
42
- severity: "error",
43
- message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
44
- fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
45
- });
46
- }
42
+ // Weighted boilerplate word count: each block contributes (weight * its word count).
43
+ const weightedBoilerplateWords = blocks.reduce((sum, b) => {
44
+ return sum + blockWeight(b) * wordCount(b);
45
+ }, 0);
46
+ const ratio = weightedBoilerplateWords / totalWords;
47
+ if (ratio <= maxRatio)
48
+ return;
49
+ // 2-band severity: clearly over (≥ threshold + 0.1) → error; just over → warning.
50
+ const clearlyOver = ratio >= maxRatio + 0.1;
51
+ const severity = clearlyOver ? "error" : "warning";
52
+ const confidence = clearlyOver ? "high" : "medium";
53
+ findings.push({
54
+ ruleId: "spam/boilerplate-ratio",
55
+ severity,
56
+ confidence,
57
+ pageUrl: page.url,
58
+ message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
59
+ fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
60
+ });
47
61
  });
48
62
  return findings;
49
63
  }
@@ -1 +1 @@
1
- {"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,MAAM,QAAQ,GAAG,IAAI,GAAG,CACtB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;SACjC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,IAAI,cAAc,CAAC;SAC9C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAC3B,CAAC;IAEF,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC7E,MAAM,gBAAgB,GAAG,MAAM;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAC9B,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAEtD,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,MAAM,KAAK,GAAG,gBAAgB,GAAG,UAAU,CAAC;QAC5C,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,wBAAwB;gBAChC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;aACnQ,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;AACnC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,0EAA0E;IAC1E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,4EAA4E;IAC5E,2EAA2E;IAC3E,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,MAAM,WAAW,GAAG,CAAC,KAAa,EAAU,EAAE;QAC5C,MAAM,IAAI,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpE,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,qFAAqF;QACrF,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YACxD,OAAO,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,MAAM,KAAK,GAAG,wBAAwB,GAAG,UAAU,CAAC;QAEpD,IAAI,KAAK,IAAI,QAAQ;YAAE,OAAO;QAE9B,kFAAkF;QAClF,MAAM,WAAW,GAAG,KAAK,IAAI,QAAQ,GAAG,GAAG,CAAC;QAC5C,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;QACnD,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,wBAAwB;YAChC,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SACnQ,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,UAAU,EAAE,CAmBd"}
1
+ {"version":3,"file":"template-diversity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AA8BzE,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,UAAU,EAAE,CA0Bd"}
@@ -1,17 +1,52 @@
1
+ /**
2
+ * Coarsen a structureSignature ("tag:count|tag:count|...") by bucketing each
3
+ * tag's count logarithmically. Pages that differ only by trivial chrome — one
4
+ * extra ad `<div>`, a conditional nav item — collapse to the SAME coarse
5
+ * signature, so a genuinely single-template site is no longer read as "diverse"
6
+ * from count noise (the false negative the exact-count fingerprint caused).
7
+ *
8
+ * The raw exact-count signature (parser.buildStructureSignature) is SHARED with
9
+ * spam/near-duplicate and spam/doorway-pattern and is deliberately left
10
+ * untouched — this coarsening is local to the diversity measure.
11
+ */
12
+ function coarsenSignature(signature) {
13
+ if (!signature)
14
+ return signature;
15
+ return signature
16
+ .split("|")
17
+ .map((pair) => {
18
+ const idx = pair.lastIndexOf(":");
19
+ if (idx < 0)
20
+ return pair;
21
+ const tag = pair.slice(0, idx);
22
+ const count = Number(pair.slice(idx + 1));
23
+ if (!Number.isFinite(count))
24
+ return pair;
25
+ // log2 bucket: 1→1, 2-3→1, 4-7→2 … 32-63→5, 64-127→6. Trivial count
26
+ // differences land in the same bucket; an order-of-magnitude change does not.
27
+ return `${tag}:${Math.floor(Math.log2(count + 1))}`;
28
+ })
29
+ .join("|");
30
+ }
1
31
  export function templateDiversityRule(pages, minUniqueRatio) {
2
32
  if (pages.length === 0) {
3
33
  return [];
4
34
  }
5
- const unique = new Set(pages.map((page) => page.structureSignature)).size;
35
+ const unique = new Set(pages.map((page) => coarsenSignature(page.structureSignature))).size;
6
36
  const ratio = unique / pages.length;
7
37
  if (ratio >= minUniqueRatio) {
8
38
  return [];
9
39
  }
40
+ // Confidence band: a ratio far below the floor is a stronger single-template
41
+ // signal than one hovering just under it.
42
+ const confidence = ratio < minUniqueRatio / 2 ? "high" : "medium";
10
43
  return [
11
44
  {
12
45
  ruleId: "spam/template-diversity",
13
46
  severity: "warning",
14
- message: `Template diversity ratio is ${ratio.toFixed(2)} (min ${minUniqueRatio.toFixed(2)}).`,
47
+ confidence,
48
+ message: `Template diversity ratio is ${ratio.toFixed(2)} (min ${minUniqueRatio.toFixed(2)}); ` +
49
+ `the ${pages.length} pages collapse to ${unique} distinct structural shapes after ignoring minor chrome variation.`,
15
50
  fix: "Vary the HTML structure across pages. Add conditional sections, different layouts, or page-specific components. Identical-structure corpora are a primary scaled-content-abuse signal that the March 27, 2026 core update reinforced."
16
51
  }
17
52
  ];
@@ -1 +1 @@
1
- {"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1E,MAAM,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;QAC5B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO;QACL;YACE,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YAC9F,GAAG,EAAE,uOAAuO;SAC7O;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"template-diversity.js","sourceRoot":"","sources":["../../../src/rules/spam/template-diversity.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;GAUG;AACH,SAAS,gBAAgB,CAAC,SAAiB;IACzC,IAAI,CAAC,SAAS;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,SAAS;SACb,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QACzC,oEAAoE;QACpE,8EAA8E;QAC9E,OAAO,GAAG,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtD,CAAC,CAAC;SACD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,cAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5F,MAAM,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;QAC5B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,6EAA6E;IAC7E,0CAA0C;IAC1C,MAAM,UAAU,GAAe,KAAK,GAAG,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE9E,OAAO;QACL;YACE,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,SAAS;YACnB,UAAU;YACV,OAAO,EACL,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACtF,OAAO,KAAK,CAAC,MAAM,sBAAsB,MAAM,oEAAoE;YACrH,GAAG,EAAE,uOAAuO;SAC7O;KACF,CAAC;AACJ,CAAC"}
@@ -6,9 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
6
6
  *
7
7
  * Required: og:title, og:description, og:image.
8
8
  *
9
- * The rule was referenced in the v0.4.x README without ever shipping. The
10
- * 2026-05-03 blind-spot audit surfaced it as a tier-1 gap; this is the
11
- * v0.5.2 fix.
9
+ * Severity gradation:
10
+ * - warning: og:title or og:description is missing (core social-card identity
11
+ * fields that affect how a link appears in feeds and AI summaries).
12
+ * - info: only og:image is missing (cosmetic — the card still has a title
13
+ * and description; the missing image is low-priority).
14
+ *
15
+ * Presence check: a field is considered MISSING when it is absent, empty, or
16
+ * whitespace-only (value is trimmed before evaluation).
12
17
  */
13
18
  export declare function ogCompletenessRule(pages: ParsedPage[]): RuleResult[];
14
19
  //# sourceMappingURL=og-completeness.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAiBpE"}
1
+ {"version":3,"file":"og-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBpE"}
@@ -5,25 +5,33 @@
5
5
  *
6
6
  * Required: og:title, og:description, og:image.
7
7
  *
8
- * The rule was referenced in the v0.4.x README without ever shipping. The
9
- * 2026-05-03 blind-spot audit surfaced it as a tier-1 gap; this is the
10
- * v0.5.2 fix.
8
+ * Severity gradation:
9
+ * - warning: og:title or og:description is missing (core social-card identity
10
+ * fields that affect how a link appears in feeds and AI summaries).
11
+ * - info: only og:image is missing (cosmetic — the card still has a title
12
+ * and description; the missing image is low-priority).
13
+ *
14
+ * Presence check: a field is considered MISSING when it is absent, empty, or
15
+ * whitespace-only (value is trimmed before evaluation).
11
16
  */
12
17
  export function ogCompletenessRule(pages) {
13
18
  const findings = [];
14
19
  for (const page of pages) {
15
20
  const missing = [];
16
- if (!page.og.title)
21
+ if (!page.og.title.trim())
17
22
  missing.push("og:title");
18
- if (!page.og.description)
23
+ if (!page.og.description.trim())
19
24
  missing.push("og:description");
20
- if (!page.og.image)
25
+ if (!page.og.image.trim())
21
26
  missing.push("og:image");
22
27
  if (missing.length === 0)
23
28
  continue;
29
+ const missingCore = missing.some((f) => f === "og:title" || f === "og:description");
30
+ const severity = missingCore ? "warning" : "info";
24
31
  findings.push({
25
32
  ruleId: "tech/og-completeness",
26
- severity: "warning",
33
+ severity,
34
+ confidence: missingCore ? "high" : "medium",
27
35
  message: `${page.url} is missing Open Graph tags: ${missing.join(", ")}.`,
28
36
  pageUrl: page.url,
29
37
  fix: `Add the missing meta tags inside <head>: ${missing.map((tag) => `<meta property="${tag}" content="...">`).join(" ")}.`,
@@ -1 +1 @@
1
- {"version":3,"file":"og-completeness.js","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;GAUG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,WAAW;YAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QACzD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QACnC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,gCAAgC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACzE,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,4CAA4C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,mBAAmB,GAAG,kBAAkB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG;SAC7H,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"og-completeness.js","sourceRoot":"","sources":["../../../src/rules/tech/og-completeness.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACpD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAChE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACpD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEnC,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,UAAU,IAAI,CAAC,KAAK,gBAAgB,CAAC,CAAC;QACpF,MAAM,QAAQ,GAA2B,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC;QAE1E,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ;YAC3C,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,gCAAgC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACzE,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,4CAA4C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,mBAAmB,GAAG,kBAAkB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG;SAC7H,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pseolint/core",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "description": "Programmatic SEO audit engine — 32 rules across 4 categories (integrity, discoverability, citation, data) for SpamBrain risk + AI Overview citability. v0.4 verdict ladder + site classifier.",
5
5
  "license": "MIT",
6
6
  "author": "Ouranos Labs <contact@ouranos-labs.dev>",