@pseolint/core 0.6.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +3 -3
  2. package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
  3. package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
  4. package/dist/algorithms/authority/commoncrawl.js +17 -0
  5. package/dist/algorithms/authority/commoncrawl.js.map +1 -0
  6. package/dist/algorithms/authority/openpagerank.d.ts +19 -0
  7. package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
  8. package/dist/algorithms/authority/openpagerank.js +42 -0
  9. package/dist/algorithms/authority/openpagerank.js.map +1 -0
  10. package/dist/algorithms/authority/provider.d.ts +16 -0
  11. package/dist/algorithms/authority/provider.d.ts.map +1 -0
  12. package/dist/algorithms/authority/provider.js +24 -0
  13. package/dist/algorithms/authority/provider.js.map +1 -0
  14. package/dist/algorithms/auto-entity-mask.d.ts +19 -0
  15. package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
  16. package/dist/algorithms/auto-entity-mask.js +102 -0
  17. package/dist/algorithms/auto-entity-mask.js.map +1 -0
  18. package/dist/algorithms/example-regions.d.ts +22 -0
  19. package/dist/algorithms/example-regions.d.ts.map +1 -0
  20. package/dist/algorithms/example-regions.js +32 -0
  21. package/dist/algorithms/example-regions.js.map +1 -0
  22. package/dist/algorithms/fact-extraction.d.ts +46 -0
  23. package/dist/algorithms/fact-extraction.d.ts.map +1 -0
  24. package/dist/algorithms/fact-extraction.js +223 -0
  25. package/dist/algorithms/fact-extraction.js.map +1 -0
  26. package/dist/auditor.d.ts.map +1 -1
  27. package/dist/auditor.js +55 -9
  28. package/dist/auditor.js.map +1 -1
  29. package/dist/enrich-findings.d.ts.map +1 -1
  30. package/dist/enrich-findings.js +9 -8
  31. package/dist/enrich-findings.js.map +1 -1
  32. package/dist/index.d.ts +11 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +9 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/origin-preflight.d.ts +89 -0
  37. package/dist/origin-preflight.d.ts.map +1 -0
  38. package/dist/origin-preflight.js +93 -0
  39. package/dist/origin-preflight.js.map +1 -0
  40. package/dist/rule-references.d.ts.map +1 -1
  41. package/dist/rule-references.js +1 -0
  42. package/dist/rule-references.js.map +1 -1
  43. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  44. package/dist/rules/aeo/citable-facts.js +4 -33
  45. package/dist/rules/aeo/citable-facts.js.map +1 -1
  46. package/dist/rules/aeo/crawler-access.d.ts +14 -0
  47. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  48. package/dist/rules/aeo/crawler-access.js +96 -15
  49. package/dist/rules/aeo/crawler-access.js.map +1 -1
  50. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  51. package/dist/rules/aeo/summary-bait.js +4 -3
  52. package/dist/rules/aeo/summary-bait.js.map +1 -1
  53. package/dist/rules/content/citation-coverage.d.ts +11 -0
  54. package/dist/rules/content/citation-coverage.d.ts.map +1 -0
  55. package/dist/rules/content/citation-coverage.js +43 -0
  56. package/dist/rules/content/citation-coverage.js.map +1 -0
  57. package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
  58. package/dist/rules/content/common-phrase-reuse.js +7 -2
  59. package/dist/rules/content/common-phrase-reuse.js.map +1 -1
  60. package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
  61. package/dist/rules/content/regurgitated-content.js +11 -2
  62. package/dist/rules/content/regurgitated-content.js.map +1 -1
  63. package/dist/rules/content/translation-no-op.d.ts.map +1 -1
  64. package/dist/rules/content/translation-no-op.js +5 -1
  65. package/dist/rules/content/translation-no-op.js.map +1 -1
  66. package/dist/rules/content/unique-value.d.ts +15 -1
  67. package/dist/rules/content/unique-value.d.ts.map +1 -1
  68. package/dist/rules/content/unique-value.js +46 -39
  69. package/dist/rules/content/unique-value.js.map +1 -1
  70. package/dist/rules/content/value-add.d.ts.map +1 -1
  71. package/dist/rules/content/value-add.js +3 -1
  72. package/dist/rules/content/value-add.js.map +1 -1
  73. package/dist/rules/links/cluster-connectivity.d.ts +7 -1
  74. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
  75. package/dist/rules/links/cluster-connectivity.js +8 -2
  76. package/dist/rules/links/cluster-connectivity.js.map +1 -1
  77. package/dist/rules/links/orphan-pages.d.ts +8 -1
  78. package/dist/rules/links/orphan-pages.d.ts.map +1 -1
  79. package/dist/rules/links/orphan-pages.js +10 -1
  80. package/dist/rules/links/orphan-pages.js.map +1 -1
  81. package/dist/rules/schema/consistency.d.ts.map +1 -1
  82. package/dist/rules/schema/consistency.js +33 -21
  83. package/dist/rules/schema/consistency.js.map +1 -1
  84. package/dist/rules/scope.d.ts.map +1 -1
  85. package/dist/rules/scope.js +1 -0
  86. package/dist/rules/scope.js.map +1 -1
  87. package/dist/rules/spam/entity-swap.d.ts.map +1 -1
  88. package/dist/rules/spam/entity-swap.js +51 -9
  89. package/dist/rules/spam/entity-swap.js.map +1 -1
  90. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  91. package/dist/rules/spam/thin-content.js +5 -1
  92. package/dist/rules/spam/thin-content.js.map +1 -1
  93. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
  94. package/dist/rules/tech/canonical-consistency.js +144 -28
  95. package/dist/rules/tech/canonical-consistency.js.map +1 -1
  96. package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
  97. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
  98. package/dist/rules/tech/sitemap-completeness.js +21 -5
  99. package/dist/rules/tech/sitemap-completeness.js.map +1 -1
  100. package/dist/rules/tech/soft-404.d.ts +11 -0
  101. package/dist/rules/tech/soft-404.d.ts.map +1 -1
  102. package/dist/rules/tech/soft-404.js +47 -5
  103. package/dist/rules/tech/soft-404.js.map +1 -1
  104. package/dist/site-classifier.d.ts.map +1 -1
  105. package/dist/site-classifier.js +1 -0
  106. package/dist/site-classifier.js.map +1 -1
  107. package/dist/template-detection.d.ts +1 -0
  108. package/dist/template-detection.d.ts.map +1 -1
  109. package/dist/template-detection.js +1 -1
  110. package/dist/template-detection.js.map +1 -1
  111. package/dist/types.d.ts +22 -1
  112. package/dist/types.d.ts.map +1 -1
  113. package/package.json +17 -1
@@ -56,55 +56,125 @@ export function parseRobotsByUserAgent(robotsTxt) {
56
56
  }
57
57
  return result;
58
58
  }
59
+ /**
60
+ * Parse robots.txt into a map of user-agent -> list of Allow patterns.
61
+ * Mirrors parseRobotsByUserAgent but captures Allow directives.
62
+ */
63
+ export function parseRobotsAllowByUserAgent(robotsTxt) {
64
+ const lines = robotsTxt.split(/\r?\n/);
65
+ const result = new Map();
66
+ let currentAgents = [];
67
+ let expectingRules = false;
68
+ for (const raw of lines) {
69
+ const line = raw.trim();
70
+ if (!line || line.startsWith("#"))
71
+ continue;
72
+ if (/^user-agent\s*:/i.test(line)) {
73
+ const ua = line.replace(/^user-agent\s*:\s*/i, "").trim().toLowerCase();
74
+ if (!expectingRules) {
75
+ currentAgents.push(ua);
76
+ }
77
+ else {
78
+ currentAgents = [ua];
79
+ expectingRules = false;
80
+ }
81
+ if (!result.has(ua))
82
+ result.set(ua, []);
83
+ continue;
84
+ }
85
+ if (/^(allow|disallow|crawl-delay|sitemap)\s*:/i.test(line)) {
86
+ expectingRules = true;
87
+ }
88
+ if (/^allow\s*:/i.test(line)) {
89
+ const value = line.replace(/^allow\s*:\s*/i, "").trim();
90
+ if (!value)
91
+ continue;
92
+ for (const agent of currentAgents) {
93
+ const bucket = result.get(agent);
94
+ if (bucket)
95
+ bucket.push(value);
96
+ }
97
+ }
98
+ }
99
+ return result;
100
+ }
59
101
  /** True if the Disallow list includes a root block (`/`). */
60
102
  export function isFullyDisallowed(patterns) {
61
103
  if (!patterns)
62
104
  return false;
63
105
  return patterns.some((p) => p === "/" || p === "/*");
64
106
  }
107
+ /**
108
+ * RFC 9309 block status for an agent given its disallow and allow patterns.
109
+ * Returns:
110
+ * "none" — not blocked (no root disallow, or root disallow overridden by Allow: /)
111
+ * "partial" — root disallow with some Allow paths that reopen part of the site (but not all)
112
+ * "full" — root disallow with no overriding Allow
113
+ */
114
+ export function blockStatus(disallowPatterns, allowPatterns) {
115
+ if (!isFullyDisallowed(disallowPatterns))
116
+ return "none";
117
+ const allows = allowPatterns ?? [];
118
+ // Allow: / (or Allow: /*) reopens everything — not blocked at all.
119
+ if (allows.some((p) => p === "/" || p === "/*"))
120
+ return "none";
121
+ // Any Allow directive at all means partial access remains.
122
+ if (allows.length > 0)
123
+ return "partial";
124
+ return "full";
125
+ }
65
126
  /**
66
127
  * Warn per blocked AI crawler; escalate to error when all configured crawlers are blocked.
67
128
  * Wildcard blocks (`User-agent: *` + `Disallow: /`) also count as blocking each named crawler
68
129
  * unless the crawler has its own more-permissive block.
130
+ * Per RFC 9309, Allow directives override Disallow when more specific (or equal length).
69
131
  */
70
132
  export function crawlerAccessRule(robotsTxtContent, options) {
71
133
  if (!robotsTxtContent)
72
134
  return [];
73
135
  const crawlers = options?.crawlers ?? DEFAULT_AI_CRAWLERS;
74
- const byAgent = parseRobotsByUserAgent(robotsTxtContent);
75
- const wildcardBlocked = isFullyDisallowed(byAgent.get("*"));
76
- const blocked = [];
136
+ const byAgentDisallow = parseRobotsByUserAgent(robotsTxtContent);
137
+ const byAgentAllow = parseRobotsAllowByUserAgent(robotsTxtContent);
138
+ const wildcardStatus = blockStatus(byAgentDisallow.get("*"), byAgentAllow.get("*"));
139
+ // Categorize each crawler as "full", "partial", or "none".
140
+ const fullyBlocked = [];
141
+ const partiallyBlocked = [];
77
142
  for (const crawler of crawlers) {
78
143
  const key = crawler.toLowerCase();
79
- const ownBlock = byAgent.get(key);
80
- if (ownBlock === undefined) {
81
- // No explicit block for this agent it falls back to the wildcard block.
82
- if (wildcardBlocked)
83
- blocked.push(crawler);
144
+ const hasOwnGroup = byAgentDisallow.has(key) || byAgentAllow.has(key);
145
+ if (!hasOwnGroup) {
146
+ // No explicit groupinherit the wildcard status.
147
+ if (wildcardStatus === "full")
148
+ fullyBlocked.push(crawler);
149
+ else if (wildcardStatus === "partial")
150
+ partiallyBlocked.push(crawler);
84
151
  continue;
85
152
  }
86
- if (isFullyDisallowed(ownBlock))
87
- blocked.push(crawler);
153
+ const status = blockStatus(byAgentDisallow.get(key), byAgentAllow.get(key));
154
+ if (status === "full")
155
+ fullyBlocked.push(crawler);
156
+ else if (status === "partial")
157
+ partiallyBlocked.push(crawler);
88
158
  }
89
- if (blocked.length === 0)
159
+ if (fullyBlocked.length === 0 && partiallyBlocked.length === 0)
90
160
  return [];
91
161
  const findings = [];
92
- const allBlocked = blocked.length === crawlers.length;
93
- if (allBlocked) {
162
+ const allFullyBlocked = fullyBlocked.length === crawlers.length && partiallyBlocked.length === 0;
163
+ if (allFullyBlocked) {
94
164
  findings.push({
95
165
  ruleId: "aeo/crawler-access",
96
166
  severity: "error",
97
167
  // High: blocking ALL crawlers is either deliberate (clear intent) or a clear
98
168
  // mistake — either way the finding is unambiguous.
99
169
  confidence: "high",
100
- message: `robots.txt blocks all ${crawlers.length} configured AI crawlers: ${blocked.join(", ")}.`,
170
+ message: `robots.txt blocks all ${crawlers.length} configured AI crawlers: ${fullyBlocked.join(", ")}.`,
101
171
  fix: `Blocking every AI crawler makes your pages invisible to answer engines. ` +
102
172
  `Sites uncited in AI Overviews lose ~68% of traffic vs ~12% for cited sites. ` +
103
173
  `Remove the Disallow rules for these crawlers unless you have a specific legal or competitive reason to block them.`,
104
174
  });
105
175
  return findings;
106
176
  }
107
- for (const crawler of blocked) {
177
+ for (const crawler of fullyBlocked) {
108
178
  findings.push({
109
179
  ruleId: "aeo/crawler-access",
110
180
  severity: "warning",
@@ -117,6 +187,17 @@ export function crawlerAccessRule(robotsTxtContent, options) {
117
187
  `If selective blocking is intentional (e.g. admin routes only), narrow the Disallow pattern instead of blocking the whole site.`,
118
188
  });
119
189
  }
190
+ for (const crawler of partiallyBlocked) {
191
+ findings.push({
192
+ ruleId: "aeo/crawler-access",
193
+ severity: "warning",
194
+ confidence: "medium",
195
+ message: `robots.txt partially blocks ${crawler} (Disallow: / with Allow override).`,
196
+ fix: `Your robots.txt has "Disallow: /" for ${crawler} with some Allow paths that reopen specific routes. ` +
197
+ `While this is a partial block, crawlers may still miss large parts of your site. ` +
198
+ `Consider narrowing the Disallow directive to only the paths you actually want to restrict.`,
199
+ });
200
+ }
120
201
  return findings;
121
202
  }
122
203
  //# sourceMappingURL=crawler-access.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"crawler-access.js","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,cAAc;IACd,WAAW;IACX,eAAe;IACf,YAAY;IACZ,iBAAiB;IACjB,OAAO;IACP,mBAAmB;CACX,CAAC;AAEX;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,8EAA8E;gBAC9E,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,iBAAiB,CAAC,QAA8B;IAC9D,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAOD;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAC/B,gBAAwB,EACxB,OAA8B;IAE9B,IAAI,CAAC,gBAAgB;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,mBAAmB,CAAC;IAC1D,MAAM,OAAO,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,iBAAiB,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAE5D,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,0EAA0E;YAC1E,IAAI,eAAe;gBAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC3C,SAAS;QACX,CAAC;QACD,IAAI,iBAAiB,CAAC,QAAQ,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,CAAC;IAEtD,IAAI,UAAU,EAAE,CAAC;QACf,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,OAAO;YACjB,6EAA6E;YAC7E,mDAAmD;YACnD,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,yBAAyB,QAAQ,CAAC,MAAM,4BAA4B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YAClG,GAAG,EACD,0EAA0E;gBAC1E,8EAA8E;gBAC9E,oHAAoH;SACvH,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;QAC9B,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,4EAA4E;YAC5E,gDAAgD;YAChD,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,qBAAqB,OAAO,GAAG;YACxC,GAAG,EACD,sDAAsD,OAAO,uBAAuB;gBACpF,YAAY,OAAO,8DAA8D;gBACjF,gIAAgI;SACnI,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"crawler-access.js","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,cAAc;IACd,WAAW;IACX,eAAe;IACf,YAAY;IACZ,iBAAiB;IACjB,OAAO;IACP,mBAAmB;CACX,CAAC;AAEX;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,8EAA8E;gBAC9E,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,2BAA2B,CAAC,SAAiB;IAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACxD,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,iBAAiB,CAAC,QAA8B;IAC9D,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CACzB,gBAAsC,EACtC,aAAmC;IAEnC,IAAI,CAAC,iBAAiB,CAAC,gBAAgB,CAAC;QAAE,OAAO,MAAM,CAAC;IAExD,MAAM,MAAM,GAAG,aAAa,IAAI,EAAE,CAAC;IAEnC,mEAAmE;IACnE,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAE/D,2DAA2D;IAC3D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IAExC,OAAO,MAAM,CAAC;AAChB,CAAC;AAOD;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAC/B,gBAAwB,EACxB,OAA8B;IAE9B,IAAI,CAAC,gBAAgB;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,mBAAmB,CAAC;IAC1D,MAAM,eAAe,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IACjE,MAAM,YAAY,GAAG,2BAA2B,CAAC,gBAAgB,CAAC,CAAC;IAEnE,MAAM,cAAc,GAAG,WAAW,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAEpF,2DAA2D;IAC3D,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEtE,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,mDAAmD;YACnD,IAAI,cAAc,KAAK,MAAM;gBAAE,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBACrD,IAAI,cAAc,KAAK,SAAS;gBAAE,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,WAAW,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5E,IAAI,MAAM,KAAK,MAAM;YAAE,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;aAC7C,IAAI,MAAM,KAAK,SAAS;YAAE,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE1E,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GACnB,YAAY,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,CAAC;IAE3E,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,OAAO;YACjB,6EAA6E;YAC7E,mDAAmD;YACnD,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,yBAAyB,QAAQ,CAAC,MAAM,4BAA4B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACvG,GAAG,EACD,0EAA0E;gBAC1E,8EAA8E;gBAC9E,oHAAoH;SACvH,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,4EAA4E;YAC5E,gDAAgD;YAChD,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,qBAAqB,OAAO,GAAG;YACxC,GAAG,EACD,sDAAsD,OAAO,uBAAuB;gBACpF,YAAY,OAAO,8DAA8D;gBACjF,gIAAgI;SACnI,CAAC,CAAC;IACL,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,+BAA+B,OAAO,qCAAqC;YACpF,GAAG,EACD,yCAAyC,OAAO,sDAAsD;gBACtG,mFAAmF;gBACnF,4FAA4F;SAC/F,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"summary-bait.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,kBAAkB;IACjC,kFAAkF;IAClF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oGAAoG;IACpG,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,yFAAyF;IACzF,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AA8FD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,CA8Cd"}
1
+ {"version":3,"file":"summary-bait.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,kBAAkB;IACjC,kFAAkF;IAClF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oGAAoG;IACpG,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,yFAAyF;IACzF,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AA8FD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,CA+Cd"}
@@ -125,9 +125,10 @@ export function summaryBaitRule(pages, entityPatterns, options) {
125
125
  continue;
126
126
  findings.push({
127
127
  ruleId: "aeo/summary-bait",
128
- severity: "error",
129
- // Always medium: this is a forecast we measure what AI MIGHT do (cite without
130
- // sending the click), not what it WILL do for any given page.
128
+ // Warning, not error: this is a forecast — we measure what AI MIGHT do (cite
129
+ // without sending the click), not what it WILL do for any given page. An
130
+ // error severity would overstate a probabilistic, page-shape signal.
131
+ severity: "warning",
131
132
  confidence: "medium",
132
133
  message: `${page.url} is optimized for summarization, not retention. ` +
133
134
  `${Math.round(concentration * 100)}% of citable facts (${openerFacts.length}/${fullFacts.length}) ` +
@@ -1 +1 @@
1
- {"version":3,"file":"summary-bait.js","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAW1D,+EAA+E;AAC/E,+EAA+E;AAC/E,sDAAsD;AACtD,MAAM,aAAa,GAAa;IAC9B,qBAAqB;IACrB,oBAAoB;IACpB,qFAAqF;IACrF,uHAAuH;IACvH,wBAAwB;IACxB,gCAAgC;CACjC,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,MAAM;IACN,0BAA0B;IAC1B,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,qBAAqB;IACrB,QAAQ;IACR,oBAAoB;IACpB,mBAAmB;IACnB,aAAa;IACb,eAAe;IACf,UAAU;IACV,aAAa;IACb,OAAO;IACP,UAAU;IACV,YAAY;IACZ,cAAc;IACd,SAAS;IACT,aAAa;CACd,CAAC;AACF,MAAM,gBAAgB,GAAG,8CAA8C,CAAC;AAExE,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,4EAA4E;IAC5E,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,MAAM,MAAM,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACxC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC;QACjB,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IACH,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;IAC9F,OAAO,OAAO,IAAI,kBAAkB,CAAC;AACvC,CAAC;AAED,SAAS,yBAAyB,CAAC,IAAY,EAAE,WAAmB;IAClE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,KAAK,MAAM,GAAG,IAAI,qBAAqB,EAAE,CAAC;QACxC,IAAI,CAAC;YACH,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IACpC,CAAC;IACD,IAAI,sEAAsE,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnG,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,IAAI,WAAW;YAAE,OAAO,KAAK,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpF,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,SAAS,EAAE,CAAC;YACzC,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC,CAAC;IACH,IAAI,WAAW;QAAE,OAAO,IAAI,CAAC;IAC7B,OAAO,0EAA0E,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACtG,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,cAAmC,EACnC,OAA4B;IAE5B,MAAM,WAAW,GAAG,OAAO,EAAE,eAAe,IAAI,GAAG,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,EAAE,gCAAgC,IAAI,GAAG,CAAC;IACnE,MAAM,QAAQ,GAAG,OAAO,EAAE,iBAAiB,IAAI,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACrD,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC;YAAE,SAAS;QAErE,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACrE,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ;YAAE,SAAS;QAE1C,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;QAC5D,IAAI,aAAa,GAAG,SAAS;YAAE,SAAS;QAExC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,QAAQ,EAAE,OAAO;YACjB,gFAAgF;YAChF,8DAA8D;YAC9D,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,kDAAkD;gBAC7D,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,uBAAuB,WAAW,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,IAAI;gBACnG,oBAAoB,WAAW,mEAAmE;gBAClG,gGAAgG;gBAChG,sFAAsF;YACxF,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,wFAAwF;gBACxF,8FAA8F;gBAC9F,+FAA+F;gBAC/F,8BAA8B;gBAC9B,qGAAqG;gBACrG,oBAAoB;gBACpB,wEAAwE;SAC3E,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"summary-bait.js","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAW1D,+EAA+E;AAC/E,+EAA+E;AAC/E,sDAAsD;AACtD,MAAM,aAAa,GAAa;IAC9B,qBAAqB;IACrB,oBAAoB;IACpB,qFAAqF;IACrF,uHAAuH;IACvH,wBAAwB;IACxB,gCAAgC;CACjC,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,MAAM;IACN,0BAA0B;IAC1B,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,qBAAqB;IACrB,QAAQ;IACR,oBAAoB;IACpB,mBAAmB;IACnB,aAAa;IACb,eAAe;IACf,UAAU;IACV,aAAa;IACb,OAAO;IACP,UAAU;IACV,YAAY;IACZ,cAAc;IACd,SAAS;IACT,aAAa;CACd,CAAC;AACF,MAAM,gBAAgB,GAAG,8CAA8C,CAAC;AAExE,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,4EAA4E;IAC5E,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,MAAM,MAAM,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACxC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC;QACjB,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IACH,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;IAC9F,OAAO,OAAO,IAAI,kBAAkB,CAAC;AACvC,CAAC;AAED,SAAS,yBAAyB,CAAC,IAAY,EAAE,WAAmB;IAClE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,KAAK,MAAM,GAAG,IAAI,qBAAqB,EAAE,CAAC;QACxC,IAAI,CAAC;YACH,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IACpC,CAAC;IACD,IAAI,sEAAsE,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnG,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,IAAI,WAAW;YAAE,OAAO,KAAK,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpF,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,SAAS,EAAE,CAAC;YACzC,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC,CAAC;IACH,IAAI,WAAW;QAAE,OAAO,IAAI,CAAC;IAC7B,OAAO,0EAA0E,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACtG,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,cAAmC,EACnC,OAA4B;IAE5B,MAAM,WAAW,GAAG,OAAO,EAAE,eAAe,IAAI,GAAG,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,EAAE,gCAAgC,IAAI,GAAG,CAAC;IACnE,MAAM,QAAQ,GAAG,OAAO,EAAE,iBAAiB,IAAI,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACrD,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC;YAAE,SAAS;QAErE,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACrE,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ;YAAE,SAAS;QAE1C,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;QAC5D,IAAI,aAAa,GAAG,SAAS;YAAE,SAAS;QAExC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,6EAA6E;YAC7E,yEAAyE;YACzE,qEAAqE;YACrE,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,kDAAkD;gBAC7D,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,uBAAuB,WAAW,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,IAAI;gBACnG,oBAAoB,WAAW,mEAAmE;gBAClG,gGAAgG;gBAChG,sFAAsF;YACxF,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,wFAAwF;gBACxF,8FAA8F;gBAC9F,+FAA+F;gBAC/F,8BAA8B;gBAC9B,qGAAqG;gBACrG,oBAAoB;gBACpB,wEAAwE;SAC3E,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
2
+ export interface CitationCoverageOptions {
3
+ /** Quantified-claim count at/above which an authoritative citation is expected. Default: 4. */
4
+ minClaims?: number;
5
+ /** Authoritative citations below which the rule fires (when claims >= minClaims). Default: 1. */
6
+ minAuthoritative?: number;
7
+ /** Extra authoritative domains, merged with the extractor default allowlist. */
8
+ allowlist?: readonly string[];
9
+ }
10
+ export declare function citationCoverageRule(pages: ParsedPage[], entityPatterns: EntityMaskPattern[], options?: CitationCoverageOptions): RuleResult[];
11
+ //# sourceMappingURL=citation-coverage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-coverage.d.ts","sourceRoot":"","sources":["../../../src/rules/content/citation-coverage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,uBAAuB;IACtC,+FAA+F;IAC/F,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iGAAiG;IACjG,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,gFAAgF;IAChF,SAAS,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CAC/B;AAED,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,uBAAuB,GAChC,UAAU,EAAE,CA6Cd"}
@@ -0,0 +1,43 @@
1
+ import { extractPageFacts, DEFAULT_CITATION_ALLOWLIST } from "../../algorithms/fact-extraction.js";
2
+ export function citationCoverageRule(pages, entityPatterns, options) {
3
+ const minClaims = options?.minClaims ?? 4;
4
+ const minAuthoritative = options?.minAuthoritative ?? 1;
5
+ // Merge caller-supplied domains with the default allowlist (additive, per the
6
+ // option contract) rather than replacing it.
7
+ const allowlist = options?.allowlist
8
+ ? [...DEFAULT_CITATION_ALLOWLIST, ...options.allowlist]
9
+ : undefined;
10
+ const findings = [];
11
+ for (const page of pages) {
12
+ const facts = extractPageFacts(page, entityPatterns, allowlist);
13
+ // "Quantified claims": distinct numeric facts + measurements + grounded claims.
14
+ const quantified = new Set([
15
+ ...facts.citableFacts,
16
+ ...facts.measurements.map((m) => m.value),
17
+ ]);
18
+ const statClaims = quantified.size + facts.groundedClaims.length;
19
+ const authoritative = facts.citations.filter((c) => c.authority === "authoritative").length;
20
+ if (statClaims < minClaims)
21
+ continue;
22
+ if (authoritative >= minAuthoritative)
23
+ continue;
24
+ const entityNames = facts.namedEntities.slice(0, 4).map((e) => e.value).join(", ");
25
+ const entityNote = entityNames ? ` (${facts.namedEntities.length} named entities: ${entityNames})` : "";
26
+ findings.push({
27
+ ruleId: "content/citation-coverage",
28
+ severity: "warning",
29
+ // Low in general; the grounded-claim portion is speculative. A page can
30
+ // legitimately make claims without citing (opinion, first-party data).
31
+ confidence: "low",
32
+ message: `${page.url} makes ${statClaims} quantified claim${statClaims === 1 ? "" : "s"} ` +
33
+ `but cites ${authoritative} authoritative source${authoritative === 1 ? "" : "s"}${entityNote}.`,
34
+ pageUrl: page.url,
35
+ fix: "Cite the primary sources behind your numbers — link the statute, standard, dataset, " +
36
+ ".gov/.edu page, or research that backs each statistic. AI Overviews and Google's " +
37
+ "helpful-content systems weight pages that ground claims in authoritative references. " +
38
+ "Note: this rule detects statistic+citation co-occurrence, not semantic correctness.",
39
+ });
40
+ }
41
+ return findings;
42
+ }
43
+ //# sourceMappingURL=citation-coverage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-coverage.js","sourceRoot":"","sources":["../../../src/rules/content/citation-coverage.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,0BAA0B,EAAE,MAAM,qCAAqC,CAAC;AAWnG,MAAM,UAAU,oBAAoB,CAClC,KAAmB,EACnB,cAAmC,EACnC,OAAiC;IAEjC,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,CAAC,CAAC;IAC1C,MAAM,gBAAgB,GAAG,OAAO,EAAE,gBAAgB,IAAI,CAAC,CAAC;IACxD,8EAA8E;IAC9E,6CAA6C;IAC7C,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS;QAClC,CAAC,CAAC,CAAC,GAAG,0BAA0B,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,EAAE,cAAc,EAAE,SAAS,CAAC,CAAC;QAChE,gFAAgF;QAChF,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS;YACjC,GAAG,KAAK,CAAC,YAAY;YACrB,GAAG,KAAK,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;SAC1C,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,GAAG,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC;QACjE,MAAM,aAAa,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;QAE5F,IAAI,UAAU,GAAG,SAAS;YAAE,SAAS;QACrC,IAAI,aAAa,IAAI,gBAAgB;YAAE,SAAS;QAEhD,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnF,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,aAAa,CAAC,MAAM,oBAAoB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAExG,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,SAAS;YACnB,wEAAwE;YACxE,uEAAuE;YACvE,UAAU,EAAE,KAAK;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oBAAoB,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG;gBACjF,aAAa,aAAa,wBAAwB,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,UAAU,GAAG;YAClG,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,sFAAsF;gBACtF,mFAAmF;gBACnF,uFAAuF;gBACvF,qFAAqF;SACxF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"common-phrase-reuse.d.ts","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAwH7D;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoBvE"}
1
+ {"version":3,"file":"common-phrase-reuse.d.ts","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyH7D;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwBvE"}
@@ -1,3 +1,4 @@
1
+ import { proseTextExcludingExamples } from "../../algorithms/example-regions.js";
1
2
  const RULE_ID = "content/common-phrase-reuse";
2
3
  /**
3
4
  * Detects overuse of pSEO marketing clichés in page body content.
@@ -113,9 +114,13 @@ function buildMessage(url, count, matchedPhrases) {
113
114
  export function commonPhraseReuseRule(pages) {
114
115
  const results = [];
115
116
  for (const page of pages) {
116
- if (!page.contentText)
117
+ // Judge the page's OWN prose: strip quoted-example/code regions so a page
118
+ // that *teaches* about clichés (an explainer or style guide) isn't flagged
119
+ // for the examples it quotes. Falls back to contentText when html is absent.
120
+ const prose = proseTextExcludingExamples(page);
121
+ if (!prose)
117
122
  continue;
118
- const matched = findMatchedPhrases(page.contentText);
123
+ const matched = findMatchedPhrases(prose);
119
124
  if (matched.length < FIRE_THRESHOLD)
120
125
  continue;
121
126
  results.push({
@@ -1 +1 @@
1
- {"version":3,"file":"common-phrase-reuse.js","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,GAAG,6BAA6B,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,mBAAmB;AACnB,MAAM,gBAAgB,GAAG;IACvB,iBAAiB;IACjB,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,qBAAqB;IACrB,aAAa;IACb,oBAAoB;IACpB,uBAAuB;IACvB,cAAc;IACd,iBAAiB;CACT,CAAC;AAEX,yBAAyB;AACzB,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB,sBAAsB;IACtB,kBAAkB;IAClB,oBAAoB;IACpB,yBAAyB;IACzB,qBAAqB;IACrB,qBAAqB;IACrB,aAAa;IACb,cAAc;IACd,gBAAgB;CACR,CAAC;AAEX,6BAA6B;AAC7B,MAAM,kBAAkB,GAAG;IACzB,WAAW;IACX,kBAAkB;IAClB,kBAAkB;IAClB,qBAAqB;IACrB,mBAAmB;IACnB,kBAAkB;IAClB,mBAAmB;IACnB,sBAAsB;CACd,CAAC;AAEX,iBAAiB;AACjB,MAAM,cAAc,GAAG;IACrB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,mBAAmB;IACnB,oBAAoB;IACpB,gBAAgB;IAChB,mBAAmB;CACX,CAAC;AAEX,gBAAgB;AAChB,MAAM,aAAa,GAAG;IACpB,qBAAqB;IACrB,cAAc;IACd,uBAAuB;IACvB,iBAAiB;IACjB,iBAAiB;IACjB,aAAa;IACb,aAAa;CACL,CAAC;AAEX,MAAM,WAAW,GAAsB;IACrC,GAAG,gBAAgB;IACnB,GAAG,iBAAiB;IACpB,GAAG,kBAAkB;IACrB,GAAG,cAAc;IACjB,GAAG,aAAa;CACjB,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,SAAS,kBAAkB,CAAC,WAAmB;IAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IACxC,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,cAAwB;IACxE,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5E,OAAO,CACL,GAAG,GAAG,WAAW,KAAK,wCAAwC,QAAQ,KAAK;QAC3E,0FAA0F,CAC3F,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW;YAAE,SAAS;QAEhC,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACrD,IAAI,OAAO,CAAC,MAAM,GAAG,cAAc;YAAE,SAAS;QAE9C,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC;YACxD,GAAG,EAAE,yLAAyL;YAC9L,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"common-phrase-reuse.js","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,0BAA0B,EAAE,MAAM,qCAAqC,CAAC;AAEjF,MAAM,OAAO,GAAG,6BAA6B,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,mBAAmB;AACnB,MAAM,gBAAgB,GAAG;IACvB,iBAAiB;IACjB,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,qBAAqB;IACrB,aAAa;IACb,oBAAoB;IACpB,uBAAuB;IACvB,cAAc;IACd,iBAAiB;CACT,CAAC;AAEX,yBAAyB;AACzB,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB,sBAAsB;IACtB,kBAAkB;IAClB,oBAAoB;IACpB,yBAAyB;IACzB,qBAAqB;IACrB,qBAAqB;IACrB,aAAa;IACb,cAAc;IACd,gBAAgB;CACR,CAAC;AAEX,6BAA6B;AAC7B,MAAM,kBAAkB,GAAG;IACzB,WAAW;IACX,kBAAkB;IAClB,kBAAkB;IAClB,qBAAqB;IACrB,mBAAmB;IACnB,kBAAkB;IAClB,mBAAmB;IACnB,sBAAsB;CACd,CAAC;AAEX,iBAAiB;AACjB,MAAM,cAAc,GAAG;IACrB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,mBAAmB;IACnB,oBAAoB;IACpB,gBAAgB;IAChB,mBAAmB;CACX,CAAC;AAEX,gBAAgB;AAChB,MAAM,aAAa,GAAG;IACpB,qBAAqB;IACrB,cAAc;IACd,uBAAuB;IACvB,iBAAiB;IACjB,iBAAiB;IACjB,aAAa;IACb,aAAa;CACL,CAAC;AAEX,MAAM,WAAW,GAAsB;IACrC,GAAG,gBAAgB;IACnB,GAAG,iBAAiB;IACpB,GAAG,kBAAkB;IACrB,GAAG,cAAc;IACjB,GAAG,aAAa;CACjB,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,SAAS,kBAAkB,CAAC,WAAmB;IAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IACxC,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,cAAwB;IACxE,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5E,OAAO,CACL,GAAG,GAAG,WAAW,KAAK,wCAAwC,QAAQ,KAAK;QAC3E,0FAA0F,CAC3F,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,0EAA0E;QAC1E,2EAA2E;QAC3E,6EAA6E;QAC7E,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,OAAO,CAAC,MAAM,GAAG,cAAc;YAAE,SAAS;QAE9C,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC;YACxD,GAAG,EAAE,yLAAyL;YAC9L,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"regurgitated-content.d.ts","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAiG7D,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA8BzE"}
1
+ {"version":3,"file":"regurgitated-content.d.ts","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAkG7D,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsCzE"}
@@ -1,4 +1,5 @@
1
1
  import * as cheerio from "cheerio";
2
+ import { EXAMPLE_REGION_SELECTOR } from "../../algorithms/example-regions.js";
2
3
  const RULE_ID = "content/regurgitated-content";
3
4
  /**
4
5
  * Detects the Google Places API regurgitation pattern: sites that lift
@@ -86,12 +87,20 @@ export function regurgitatedContentRule(pages) {
86
87
  if (!html)
87
88
  continue;
88
89
  const $ = cheerio.load(html);
90
+ // Drop quoted-example/code regions before scanning so an explainer page that
91
+ // *documents* the regurgitation patterns (e.g. /rules/regurgitated-content,
92
+ // which quotes "powered by Google", the Static Maps URL, and the Places API
93
+ // JS marker as code) isn't flagged for teaching them. We deliberately keep
94
+ // <script>/<style> in place: a real Places-scraping site carries the JS in a
95
+ // live <script>, which must still trip the detector.
96
+ $(EXAMPLE_REGION_SELECTOR).remove();
97
+ const cleanedHtml = $.html();
89
98
  const eeat = eeatSignalCount(page);
90
99
  const signals = [
91
100
  checkGoogleAttribution($),
92
101
  checkGoogleImagesDominate($),
93
- checkStaticMapsEmbed($, html),
94
- checkPlacesApiJs(html),
102
+ checkStaticMapsEmbed($, cleanedHtml),
103
+ checkPlacesApiJs(cleanedHtml),
95
104
  checkAggregatorFootprint($, eeat),
96
105
  ];
97
106
  const fired = signals.filter((s) => s.fired);
@@ -1 +1 @@
1
- {"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;GAOG;AAEH,+DAA+D;AAC/D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,uBAAuB;IACvB,2BAA2B;IAC3B,0CAA0C;IAC1C,oCAAoC;CACrC,CAAC;AAEF,MAAM,iBAAiB,GAAG,8CAA8C,CAAC;AACzE,MAAM,iBAAiB,GAAG,iDAAiD,CAAC;AAE5E,MAAM,gBAAgB,GACpB,6FAA6F,CAAC;AAEhG,+EAA+E;AAC/E,kEAAkE;AAClE,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,0BAA0B,GAAG,CAAC,CAAC;AACrC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AACvC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAO5B,SAAS,sBAAsB,CAAC,CAAqB;IACnD,MAAM,KAAK,GAAG,2BAA2B,CAAC;IAC1C,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACxE,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9E,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,yBAAyB,CAAC,CAAqB;IACtD,MAAM,KAAK,GAAG,uCAAuC,CAAC;IACtD,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,GAAG,0BAA0B;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC7E,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAqB,EAAE,IAAY;IAC/D,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,MAAM,cAAc,GAClB,CAAC,CAAC,aAAa,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,IAAI,CAAC,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,IAAI,CAAC,CAAC;IACjE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,wBAAwB,CAAC,CAAqB,EAAE,IAAY;IACnE,MAAM,KAAK,GAAG,2DAA2D,CAAC;IAC1E,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACpD,MAAM,CAAC;IACV,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAmB;YAC9B,sBAAsB,CAAC,CAAC,CAAC;YACzB,yBAAyB,CAAC,CAAC,CAAC;YAC5B,oBAAoB,CAAC,CAAC,EAAE,IAAI,CAAC;YAC7B,gBAAgB,CAAC,IAAI,CAAC;YACtB,wBAAwB,CAAC,CAAC,EAAE,IAAI,CAAC;SAClC,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,2CAA2C,UAAU,GAAG;YAC7F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,8TAA8T;YACnU,UAAU,EAAE,aAAa;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,qCAAqC,CAAC;AAE9E,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;GAOG;AAEH,+DAA+D;AAC/D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,uBAAuB;IACvB,2BAA2B;IAC3B,0CAA0C;IAC1C,oCAAoC;CACrC,CAAC;AAEF,MAAM,iBAAiB,GAAG,8CAA8C,CAAC;AACzE,MAAM,iBAAiB,GAAG,iDAAiD,CAAC;AAE5E,MAAM,gBAAgB,GACpB,6FAA6F,CAAC;AAEhG,+EAA+E;AAC/E,kEAAkE;AAClE,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,0BAA0B,GAAG,CAAC,CAAC;AACrC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AACvC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAO5B,SAAS,sBAAsB,CAAC,CAAqB;IACnD,MAAM,KAAK,GAAG,2BAA2B,CAAC;IAC1C,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACxE,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9E,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,yBAAyB,CAAC,CAAqB;IACtD,MAAM,KAAK,GAAG,uCAAuC,CAAC;IACtD,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,GAAG,0BAA0B;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC7E,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAqB,EAAE,IAAY;IAC/D,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,MAAM,cAAc,GAClB,CAAC,CAAC,aAAa,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,IAAI,CAAC,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,IAAI,CAAC,CAAC;IACjE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,wBAAwB,CAAC,CAAqB,EAAE,IAAY;IACnE,MAAM,KAAK,GAAG,2DAA2D,CAAC;IAC1E,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACpD,MAAM,CAAC;IACV,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,6EAA6E;QAC7E,4EAA4E;QAC5E,4EAA4E;QAC5E,2EAA2E;QAC3E,6EAA6E;QAC7E,qDAAqD;QACrD,CAAC,CAAC,uBAAuB,CAAC,CAAC,MAAM,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAmB;YAC9B,sBAAsB,CAAC,CAAC,CAAC;YACzB,yBAAyB,CAAC,CAAC,CAAC;YAC5B,oBAAoB,CAAC,CAAC,EAAE,WAAW,CAAC;YACpC,gBAAgB,CAAC,WAAW,CAAC;YAC7B,wBAAwB,CAAC,CAAC,EAAE,IAAI,CAAC;SAClC,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,2CAA2C,UAAU,GAAG;YAC7F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,8TAA8T;YACnU,UAAU,EAAE,aAAa;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgErE"}
1
+ {"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoErE"}
@@ -88,7 +88,11 @@ export function translationNoOpRule(pages) {
88
88
  : `${(minSim * 100).toFixed(0)}%--${(maxSim * 100).toFixed(0)}%`;
89
89
  findings.push({
90
90
  ruleId: "content/translation-no-op",
91
- severity: "error",
91
+ // Warning, not error: an untranslated locale variant is a real duplicate-
92
+ // content gap but a should-fix, not a ship-blocker — and multilingual sites
93
+ // can legitimately share some body text (disclaimers, spec tables).
94
+ severity: "warning",
95
+ confidence: "medium",
92
96
  message: `${members.length} locale variants of "${basePath}" share identical content ` +
93
97
  `(similarity ${simLabel}). Translate the body or consolidate to the canonical version.`,
94
98
  pageUrl: urls[0],
@@ -1 +1 @@
1
- {"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,OAAO;YACjB,OAAO,EACL,GAAG,OAAO,CAAC,MAAM,wBAAwB,QAAQ,4BAA4B;gBAC7E,eAAe,QAAQ,gEAAgE;YACzF,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC7B,GAAG,EAAE,qKAAqK;SAC3K,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,0EAA0E;YAC1E,4EAA4E;YAC5E,oEAAoE;YACpE,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,OAAO,CAAC,MAAM,wBAAwB,QAAQ,4BAA4B;gBAC7E,eAAe,QAAQ,gEAAgE;YACzF,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC7B,GAAG,EAAE,qKAAqK;SAC3K,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1,3 +1,17 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
- export declare function uniqueValueRule(pages: ParsedPage[], minUniqueWords: number): RuleResult[];
2
+ export interface UniqueValueThresholds {
3
+ /** Unique-content density below this fires (info). Default 0.20. */
4
+ passBelow: number;
5
+ /** Density below this escalates to error. Default 0.12. */
6
+ errorBelow: number;
7
+ }
8
+ /**
9
+ * Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
10
+ * token is weighted by normalized IDF (ln(N/df)/ln(N)) — 1 if page-exclusive, ~0
11
+ * if on every page — and averaged over the page's distinct tokens. A near-
12
+ * duplicate / boilerplate page scores low regardless of corpus size or length; a
13
+ * large original page stays high. Continuous, so it doesn't shuffle at the margin.
14
+ * Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
15
+ */
16
+ export declare function uniqueValueRule(pages: ParsedPage[], thresholds: UniqueValueThresholds): RuleResult[];
3
17
  //# sourceMappingURL=unique-value.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAe7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,cAAc,EAAE,MAAM,GAAG,UAAU,EAAE,CAuCzF"}
1
+ {"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,qBAAqB;IACpC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;CACpB;AAYD;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,UAAU,EAAE,qBAAqB,GAChC,UAAU,EAAE,CAwCd"}
@@ -1,51 +1,58 @@
1
1
  function tokenize(text) {
2
- // Strip leading/trailing punctuation so "word", "word." and "(word)" count as
3
- // the SAME token. Without this, surrounding punctuation spuriously inflated
4
- // the "unique" count (a word that's shared but happens to carry a trailing
5
- // comma on one page looked unique) — false precision in the shared/unique
6
- // split this rule now surfaces.
2
+ // Lowercase, split on whitespace, strip edge punctuation so "word", "word."
3
+ // and "(word)" are one token.
7
4
  return text
8
5
  .toLowerCase()
9
6
  .split(/\s+/)
10
7
  .map((t) => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
11
8
  .filter(Boolean);
12
9
  }
13
- export function uniqueValueRule(pages, minUniqueWords) {
14
- const frequencies = new Map();
15
- const pageTokens = pages.map((page) => tokenize(page.contentText));
16
- for (const tokens of pageTokens) {
17
- for (const token of new Set(tokens)) {
18
- frequencies.set(token, (frequencies.get(token) ?? 0) + 1);
19
- }
10
+ /**
11
+ * Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
12
+ * token is weighted by normalized IDF (ln(N/df)/ln(N)) 1 if page-exclusive, ~0
13
+ * if on every page — and averaged over the page's distinct tokens. A near-
14
+ * duplicate / boilerplate page scores low regardless of corpus size or length; a
15
+ * large original page stays high. Continuous, so it doesn't shuffle at the margin.
16
+ * Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
17
+ */
18
+ export function uniqueValueRule(pages, thresholds) {
19
+ const { passBelow, errorBelow } = thresholds;
20
+ const N = pages.length;
21
+ const lnN = Math.log(N);
22
+ if (N <= 1 || lnN === 0)
23
+ return []; // can't measure rarity against a single page
24
+ const df = new Map();
25
+ const pageDistinct = pages.map((p) => new Set(tokenize(p.contentText)));
26
+ for (const distinct of pageDistinct) {
27
+ for (const t of distinct)
28
+ df.set(t, (df.get(t) ?? 0) + 1);
20
29
  }
21
30
  const findings = [];
22
- pages.forEach((page, idx) => {
23
- const distinct = new Set(pageTokens[idx]);
24
- let uniqueCount = 0;
25
- let sharedCount = 0;
26
- for (const token of distinct) {
27
- if ((frequencies.get(token) ?? 0) === 1)
28
- uniqueCount += 1;
29
- else
30
- sharedCount += 1;
31
- }
32
- if (uniqueCount < minUniqueWords) {
33
- const needed = minUniqueWords - uniqueCount;
34
- findings.push({
35
- ruleId: "content/unique-value",
36
- severity: "error",
37
- // Surface the shared-vs-unique split so the author can see that most of
38
- // the page's words already appear elsewhere (the "name the overlap"
39
- // signal) not just a bare unique-word count.
40
- message: `${page.url} has only ${uniqueCount} page-unique words (min ${minUniqueWords}); ${sharedCount} of its ${distinct.size} distinct words also appear on other pages.`,
41
- pageUrl: page.url,
42
- // Axis-aware guidance: the #1 trap on pSEO sites is adding real, useful,
43
- // but per-axis-SHARED data (a role's regulations repeated across that
44
- // role's documents; a state's statutes across its pages) which doesn't
45
- // count. Spell that out so authors don't burn effort on it.
46
- fix: `Add ~${needed} more words that appear on NO other page. Content repeated across pages on the same entity axis — boilerplate, shared legal/spec blocks, or per-axis data (e.g. a role's regulations across that role's documents, a state's statutes across its pages) — does NOT count toward uniqueness, even when it's useful. Only page-specific text (a unique lead, this record's distinct facts, page-specific examples) moves this metric.`
47
- });
48
- }
31
+ pages.forEach((page, i) => {
32
+ const distinct = pageDistinct[i];
33
+ if (distinct.size === 0)
34
+ return; // empty page → thin-content handles it
35
+ let mass = 0;
36
+ for (const t of distinct)
37
+ mass += Math.log(N / (df.get(t) ?? 1)) / lnN;
38
+ const density = mass / distinct.size;
39
+ if (density >= passBelow)
40
+ return;
41
+ const severity = density < errorBelow ? "error" : "info";
42
+ const pct = (density * 100).toFixed(1);
43
+ findings.push({
44
+ ruleId: "content/unique-value",
45
+ severity,
46
+ message: `${page.url} has low unique-content density ${density.toFixed(3)} ` +
47
+ `(${pct}% of its ${distinct.size} distinct words are page-distinctive; floor ${passBelow.toFixed(2)}). ` +
48
+ `Most of its vocabulary also appears on other pages.`,
49
+ pageUrl: page.url,
50
+ fix: `Raise originality density: add page-specific text — a distinct lead, this ` +
51
+ `record's own facts, page-specific examples. Content repeated across pages on ` +
52
+ `the same axis (boilerplate, shared legal/spec blocks, per-axis data like a ` +
53
+ `role's regulations across that role's documents) is common vocabulary and ` +
54
+ `does NOT raise density, even when it is useful.`,
55
+ });
49
56
  });
50
57
  return findings;
51
58
  }
@@ -1 +1 @@
1
- {"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAEA,SAAS,QAAQ,CAAC,IAAY;IAC5B,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,0EAA0E;IAC1E,gCAAgC;IAChC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB,EAAE,cAAsB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnE,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;QAC1B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1C,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC;gBAAE,WAAW,IAAI,CAAC,CAAC;;gBACrD,WAAW,IAAI,CAAC,CAAC;QACxB,CAAC;QACD,IAAI,WAAW,GAAG,cAAc,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,cAAc,GAAG,WAAW,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,OAAO;gBACjB,wEAAwE;gBACxE,oEAAoE;gBACpE,+CAA+C;gBAC/C,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,aAAa,WAAW,2BAA2B,cAAc,MAAM,WAAW,WAAW,QAAQ,CAAC,IAAI,6CAA6C;gBAC3K,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,yEAAyE;gBACzE,sEAAsE;gBACtE,uEAAuE;gBACvE,4DAA4D;gBAC5D,GAAG,EAAE,QAAQ,MAAM,qaAAqa;aACzb,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AASA,SAAS,QAAQ,CAAC,IAAY;IAC5B,4EAA4E;IAC5E,8BAA8B;IAC9B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,UAAiC;IAEjC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,UAAU,CAAC;IAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,CAAC,6CAA6C;IAEjF,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IACxE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,CAAC,uCAAuC;QACxE,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QACrC,IAAI,OAAO,IAAI,SAAS;YAAE,OAAO;QAEjC,MAAM,QAAQ,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QACzD,MAAM,GAAG,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,mCAAmC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACnE,IAAI,GAAG,YAAY,QAAQ,CAAC,IAAI,+CAA+C,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACxG,qDAAqD;YACvD,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,+EAA+E;gBAC/E,6EAA6E;gBAC7E,4EAA4E;gBAC5E,iDAAiD;SACpD,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AAmIvE;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
1
+ {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AAuIvE;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
@@ -1,3 +1,4 @@
1
+ import { hasAuthoritativeCitation } from "../../algorithms/fact-extraction.js";
1
2
  const RULE_ID = "content/value-add";
2
3
  const EEAT_HTML_PATTERNS = [
3
4
  /last\s+updated/i,
@@ -15,7 +16,8 @@ function countEeatCategories(page) {
15
16
  count += 1;
16
17
  if (page.publishedDate)
17
18
  count += 1;
18
- if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)))
19
+ if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)) ||
20
+ hasAuthoritativeCitation(page.resolvedHrefs, page.url))
19
21
  count += 1;
20
22
  return count;
21
23
  }