@pseolint/core 0.6.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts +46 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -0
- package/dist/algorithms/fact-extraction.js +223 -0
- package/dist/algorithms/fact-extraction.js.map +1 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +55 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/origin-preflight.d.ts +89 -0
- package/dist/origin-preflight.d.ts.map +1 -0
- package/dist/origin-preflight.js +93 -0
- package/dist/origin-preflight.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +1 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +4 -33
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/citation-coverage.d.ts +11 -0
- package/dist/rules/content/citation-coverage.d.ts.map +1 -0
- package/dist/rules/content/citation-coverage.js +43 -0
- package/dist/rules/content/citation-coverage.js.map +1 -0
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +3 -1
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +33 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +1 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +22 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +17 -1
|
@@ -56,55 +56,125 @@ export function parseRobotsByUserAgent(robotsTxt) {
|
|
|
56
56
|
}
|
|
57
57
|
return result;
|
|
58
58
|
}
|
|
59
|
+
/**
|
|
60
|
+
* Parse robots.txt into a map of user-agent -> list of Allow patterns.
|
|
61
|
+
* Mirrors parseRobotsByUserAgent but captures Allow directives.
|
|
62
|
+
*/
|
|
63
|
+
export function parseRobotsAllowByUserAgent(robotsTxt) {
|
|
64
|
+
const lines = robotsTxt.split(/\r?\n/);
|
|
65
|
+
const result = new Map();
|
|
66
|
+
let currentAgents = [];
|
|
67
|
+
let expectingRules = false;
|
|
68
|
+
for (const raw of lines) {
|
|
69
|
+
const line = raw.trim();
|
|
70
|
+
if (!line || line.startsWith("#"))
|
|
71
|
+
continue;
|
|
72
|
+
if (/^user-agent\s*:/i.test(line)) {
|
|
73
|
+
const ua = line.replace(/^user-agent\s*:\s*/i, "").trim().toLowerCase();
|
|
74
|
+
if (!expectingRules) {
|
|
75
|
+
currentAgents.push(ua);
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
currentAgents = [ua];
|
|
79
|
+
expectingRules = false;
|
|
80
|
+
}
|
|
81
|
+
if (!result.has(ua))
|
|
82
|
+
result.set(ua, []);
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (/^(allow|disallow|crawl-delay|sitemap)\s*:/i.test(line)) {
|
|
86
|
+
expectingRules = true;
|
|
87
|
+
}
|
|
88
|
+
if (/^allow\s*:/i.test(line)) {
|
|
89
|
+
const value = line.replace(/^allow\s*:\s*/i, "").trim();
|
|
90
|
+
if (!value)
|
|
91
|
+
continue;
|
|
92
|
+
for (const agent of currentAgents) {
|
|
93
|
+
const bucket = result.get(agent);
|
|
94
|
+
if (bucket)
|
|
95
|
+
bucket.push(value);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
59
101
|
/** True if the Disallow list includes a root block (`/`). */
|
|
60
102
|
export function isFullyDisallowed(patterns) {
|
|
61
103
|
if (!patterns)
|
|
62
104
|
return false;
|
|
63
105
|
return patterns.some((p) => p === "/" || p === "/*");
|
|
64
106
|
}
|
|
107
|
+
/**
|
|
108
|
+
* RFC 9309 block status for an agent given its disallow and allow patterns.
|
|
109
|
+
* Returns:
|
|
110
|
+
* "none" — not blocked (no root disallow, or root disallow overridden by Allow: /)
|
|
111
|
+
* "partial" — root disallow with some Allow paths that reopen part of the site (but not all)
|
|
112
|
+
* "full" — root disallow with no overriding Allow
|
|
113
|
+
*/
|
|
114
|
+
export function blockStatus(disallowPatterns, allowPatterns) {
|
|
115
|
+
if (!isFullyDisallowed(disallowPatterns))
|
|
116
|
+
return "none";
|
|
117
|
+
const allows = allowPatterns ?? [];
|
|
118
|
+
// Allow: / (or Allow: /*) reopens everything — not blocked at all.
|
|
119
|
+
if (allows.some((p) => p === "/" || p === "/*"))
|
|
120
|
+
return "none";
|
|
121
|
+
// Any Allow directive at all means partial access remains.
|
|
122
|
+
if (allows.length > 0)
|
|
123
|
+
return "partial";
|
|
124
|
+
return "full";
|
|
125
|
+
}
|
|
65
126
|
/**
|
|
66
127
|
* Warn per blocked AI crawler; escalate to error when all configured crawlers are blocked.
|
|
67
128
|
* Wildcard blocks (`User-agent: *` + `Disallow: /`) also count as blocking each named crawler
|
|
68
129
|
* unless the crawler has its own more-permissive block.
|
|
130
|
+
* Per RFC 9309, Allow directives override Disallow when more specific (or equal length).
|
|
69
131
|
*/
|
|
70
132
|
export function crawlerAccessRule(robotsTxtContent, options) {
|
|
71
133
|
if (!robotsTxtContent)
|
|
72
134
|
return [];
|
|
73
135
|
const crawlers = options?.crawlers ?? DEFAULT_AI_CRAWLERS;
|
|
74
|
-
const
|
|
75
|
-
const
|
|
76
|
-
const
|
|
136
|
+
const byAgentDisallow = parseRobotsByUserAgent(robotsTxtContent);
|
|
137
|
+
const byAgentAllow = parseRobotsAllowByUserAgent(robotsTxtContent);
|
|
138
|
+
const wildcardStatus = blockStatus(byAgentDisallow.get("*"), byAgentAllow.get("*"));
|
|
139
|
+
// Categorize each crawler as "full", "partial", or "none".
|
|
140
|
+
const fullyBlocked = [];
|
|
141
|
+
const partiallyBlocked = [];
|
|
77
142
|
for (const crawler of crawlers) {
|
|
78
143
|
const key = crawler.toLowerCase();
|
|
79
|
-
const
|
|
80
|
-
if (
|
|
81
|
-
// No explicit
|
|
82
|
-
if (
|
|
83
|
-
|
|
144
|
+
const hasOwnGroup = byAgentDisallow.has(key) || byAgentAllow.has(key);
|
|
145
|
+
if (!hasOwnGroup) {
|
|
146
|
+
// No explicit group — inherit the wildcard status.
|
|
147
|
+
if (wildcardStatus === "full")
|
|
148
|
+
fullyBlocked.push(crawler);
|
|
149
|
+
else if (wildcardStatus === "partial")
|
|
150
|
+
partiallyBlocked.push(crawler);
|
|
84
151
|
continue;
|
|
85
152
|
}
|
|
86
|
-
|
|
87
|
-
|
|
153
|
+
const status = blockStatus(byAgentDisallow.get(key), byAgentAllow.get(key));
|
|
154
|
+
if (status === "full")
|
|
155
|
+
fullyBlocked.push(crawler);
|
|
156
|
+
else if (status === "partial")
|
|
157
|
+
partiallyBlocked.push(crawler);
|
|
88
158
|
}
|
|
89
|
-
if (
|
|
159
|
+
if (fullyBlocked.length === 0 && partiallyBlocked.length === 0)
|
|
90
160
|
return [];
|
|
91
161
|
const findings = [];
|
|
92
|
-
const
|
|
93
|
-
if (
|
|
162
|
+
const allFullyBlocked = fullyBlocked.length === crawlers.length && partiallyBlocked.length === 0;
|
|
163
|
+
if (allFullyBlocked) {
|
|
94
164
|
findings.push({
|
|
95
165
|
ruleId: "aeo/crawler-access",
|
|
96
166
|
severity: "error",
|
|
97
167
|
// High: blocking ALL crawlers is either deliberate (clear intent) or a clear
|
|
98
168
|
// mistake — either way the finding is unambiguous.
|
|
99
169
|
confidence: "high",
|
|
100
|
-
message: `robots.txt blocks all ${crawlers.length} configured AI crawlers: ${
|
|
170
|
+
message: `robots.txt blocks all ${crawlers.length} configured AI crawlers: ${fullyBlocked.join(", ")}.`,
|
|
101
171
|
fix: `Blocking every AI crawler makes your pages invisible to answer engines. ` +
|
|
102
172
|
`Sites uncited in AI Overviews lose ~68% of traffic vs ~12% for cited sites. ` +
|
|
103
173
|
`Remove the Disallow rules for these crawlers unless you have a specific legal or competitive reason to block them.`,
|
|
104
174
|
});
|
|
105
175
|
return findings;
|
|
106
176
|
}
|
|
107
|
-
for (const crawler of
|
|
177
|
+
for (const crawler of fullyBlocked) {
|
|
108
178
|
findings.push({
|
|
109
179
|
ruleId: "aeo/crawler-access",
|
|
110
180
|
severity: "warning",
|
|
@@ -117,6 +187,17 @@ export function crawlerAccessRule(robotsTxtContent, options) {
|
|
|
117
187
|
`If selective blocking is intentional (e.g. admin routes only), narrow the Disallow pattern instead of blocking the whole site.`,
|
|
118
188
|
});
|
|
119
189
|
}
|
|
190
|
+
for (const crawler of partiallyBlocked) {
|
|
191
|
+
findings.push({
|
|
192
|
+
ruleId: "aeo/crawler-access",
|
|
193
|
+
severity: "warning",
|
|
194
|
+
confidence: "medium",
|
|
195
|
+
message: `robots.txt partially blocks ${crawler} (Disallow: / with Allow override).`,
|
|
196
|
+
fix: `Your robots.txt has "Disallow: /" for ${crawler} with some Allow paths that reopen specific routes. ` +
|
|
197
|
+
`While this is a partial block, crawlers may still miss large parts of your site. ` +
|
|
198
|
+
`Consider narrowing the Disallow directive to only the paths you actually want to restrict.`,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
120
201
|
return findings;
|
|
121
202
|
}
|
|
122
203
|
//# sourceMappingURL=crawler-access.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler-access.js","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,cAAc;IACd,WAAW;IACX,eAAe;IACf,YAAY;IACZ,iBAAiB;IACjB,OAAO;IACP,mBAAmB;CACX,CAAC;AAEX;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,8EAA8E;gBAC9E,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,iBAAiB,CAAC,QAA8B;IAC9D,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAOD
|
|
1
|
+
{"version":3,"file":"crawler-access.js","sourceRoot":"","sources":["../../../src/rules/aeo/crawler-access.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,QAAQ;IACR,cAAc;IACd,WAAW;IACX,eAAe;IACf,YAAY;IACZ,iBAAiB;IACjB,OAAO;IACP,mBAAmB;CACX,CAAC;AAEX;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAiB;IACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,8EAA8E;gBAC9E,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3D,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,2BAA2B,CAAC,SAAiB;IAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC3C,IAAI,aAAa,GAAa,EAAE,CAAC;IACjC,IAAI,cAAc,GAAG,KAAK,CAAC;IAE3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAE5C,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACxE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrB,cAAc,GAAG,KAAK,CAAC;YACzB,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,IAAI,4CAA4C,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5D,cAAc,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACxD,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACjC,IAAI,MAAM;oBAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,iBAAiB,CAAC,QAA8B;IAC9D,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CACzB,gBAAsC,EACtC,aAAmC;IAEnC,IAAI,CAAC,iBAAiB,CAAC,gBAAgB,CAAC;QAAE,OAAO,MAAM,CAAC;IAExD,MAAM,MAAM,GAAG,aAAa,IAAI,EAAE,CAAC;IAEnC,mEAAmE;IACnE,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAE/D,2DAA2D;IAC3D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IAExC,OAAO,MAAM,CAAC;AAChB,CAAC;AAOD;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAC/B,gBAAwB,EACxB,OAA8B;IAE9B,IAAI,CAAC,gBAAgB;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,mBAAmB,CAAC;IAC1D,MAAM,eAAe,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IACjE,MAAM,YAAY,GAAG,2BAA2B,CAAC,gBAAgB,CAAC,CAAC;IAEnE,MAAM,cAAc,GAAG,WAAW,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAEpF,2DAA2D;IAC3D,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEtE,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,mDAAmD;YACnD,IAAI,cAAc,KAAK,MAAM;gBAAE,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBACrD,IAAI,cAAc,KAAK,SAAS;gBAAE,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,WAAW,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5E,IAAI,MAAM,KAAK,MAAM;YAAE,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;aAC7C,IAAI,MAAM,KAAK,SAAS;YAAE,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE1E,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GACnB,YAAY,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,CAAC;IAE3E,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,OAAO;YACjB,6EAA6E;YAC7E,mDAAmD;YACnD,UAAU,EAAE,MAAM;YAClB,OAAO,EAAE,yBAAyB,QAAQ,CAAC,MAAM,4BAA4B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;YACvG,GAAG,EACD,0EAA0E;gBAC1E,8EAA8E;gBAC9E,oHAAoH;SACvH,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,4EAA4E;YAC5E,gDAAgD;YAChD,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,qBAAqB,OAAO,GAAG;YACxC,GAAG,EACD,sDAAsD,OAAO,uBAAuB;gBACpF,YAAY,OAAO,8DAA8D;gBACjF,gIAAgI;SACnI,CAAC,CAAC;IACL,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,+BAA+B,OAAO,qCAAqC;YACpF,GAAG,EACD,yCAAyC,OAAO,sDAAsD;gBACtG,mFAAmF;gBACnF,4FAA4F;SAC/F,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"summary-bait.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,kBAAkB;IACjC,kFAAkF;IAClF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oGAAoG;IACpG,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,yFAAyF;IACzF,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AA8FD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"summary-bait.d.ts","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,kBAAkB;IACjC,kFAAkF;IAClF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,oGAAoG;IACpG,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,yFAAyF;IACzF,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AA8FD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,kBAAkB,GAC3B,UAAU,EAAE,CA+Cd"}
|
|
@@ -125,9 +125,10 @@ export function summaryBaitRule(pages, entityPatterns, options) {
|
|
|
125
125
|
continue;
|
|
126
126
|
findings.push({
|
|
127
127
|
ruleId: "aeo/summary-bait",
|
|
128
|
-
|
|
129
|
-
//
|
|
130
|
-
//
|
|
128
|
+
// Warning, not error: this is a forecast — we measure what AI MIGHT do (cite
|
|
129
|
+
// without sending the click), not what it WILL do for any given page. An
|
|
130
|
+
// error severity would overstate a probabilistic, page-shape signal.
|
|
131
|
+
severity: "warning",
|
|
131
132
|
confidence: "medium",
|
|
132
133
|
message: `${page.url} is optimized for summarization, not retention. ` +
|
|
133
134
|
`${Math.round(concentration * 100)}% of citable facts (${openerFacts.length}/${fullFacts.length}) ` +
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"summary-bait.js","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAW1D,+EAA+E;AAC/E,+EAA+E;AAC/E,sDAAsD;AACtD,MAAM,aAAa,GAAa;IAC9B,qBAAqB;IACrB,oBAAoB;IACpB,qFAAqF;IACrF,uHAAuH;IACvH,wBAAwB;IACxB,gCAAgC;CACjC,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,MAAM;IACN,0BAA0B;IAC1B,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,qBAAqB;IACrB,QAAQ;IACR,oBAAoB;IACpB,mBAAmB;IACnB,aAAa;IACb,eAAe;IACf,UAAU;IACV,aAAa;IACb,OAAO;IACP,UAAU;IACV,YAAY;IACZ,cAAc;IACd,SAAS;IACT,aAAa;CACd,CAAC;AACF,MAAM,gBAAgB,GAAG,8CAA8C,CAAC;AAExE,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,4EAA4E;IAC5E,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,MAAM,MAAM,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACxC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC;QACjB,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IACH,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;IAC9F,OAAO,OAAO,IAAI,kBAAkB,CAAC;AACvC,CAAC;AAED,SAAS,yBAAyB,CAAC,IAAY,EAAE,WAAmB;IAClE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,KAAK,MAAM,GAAG,IAAI,qBAAqB,EAAE,CAAC;QACxC,IAAI,CAAC;YACH,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IACpC,CAAC;IACD,IAAI,sEAAsE,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnG,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,IAAI,WAAW;YAAE,OAAO,KAAK,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpF,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,SAAS,EAAE,CAAC;YACzC,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC,CAAC;IACH,IAAI,WAAW;QAAE,OAAO,IAAI,CAAC;IAC7B,OAAO,0EAA0E,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACtG,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,cAAmC,EACnC,OAA4B;IAE5B,MAAM,WAAW,GAAG,OAAO,EAAE,eAAe,IAAI,GAAG,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,EAAE,gCAAgC,IAAI,GAAG,CAAC;IACnE,MAAM,QAAQ,GAAG,OAAO,EAAE,iBAAiB,IAAI,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACrD,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC;YAAE,SAAS;QAErE,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACrE,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ;YAAE,SAAS;QAE1C,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;QAC5D,IAAI,aAAa,GAAG,SAAS;YAAE,SAAS;QAExC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,
|
|
1
|
+
{"version":3,"file":"summary-bait.js","sourceRoot":"","sources":["../../../src/rules/aeo/summary-bait.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAW1D,+EAA+E;AAC/E,+EAA+E;AAC/E,sDAAsD;AACtD,MAAM,aAAa,GAAa;IAC9B,qBAAqB;IACrB,oBAAoB;IACpB,qFAAqF;IACrF,uHAAuH;IACvH,wBAAwB;IACxB,gCAAgC;CACjC,CAAC;AAEF,MAAM,qBAAqB,GAAG;IAC5B,MAAM;IACN,0BAA0B;IAC1B,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,qBAAqB;IACrB,QAAQ;IACR,oBAAoB;IACpB,mBAAmB;IACnB,aAAa;IACb,eAAe;IACf,UAAU;IACV,aAAa;IACb,OAAO;IACP,UAAU;IACV,YAAY;IACZ,cAAc;IACd,SAAS;IACT,aAAa;CACd,CAAC;AACF,MAAM,gBAAgB,GAAG,8CAA8C,CAAC;AAExE,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,QAA6B;IAClE,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,QAAQ;QAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,4EAA4E;IAC5E,4EAA4E;IAC5E,6EAA6E;IAC7E,sEAAsE;IACtE,MAAM,MAAM,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC3C,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACxC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAC;QACjB,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;IACH,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;IAC9F,OAAO,OAAO,IAAI,kBAAkB,CAAC;AACvC,CAAC;AAED,SAAS,yBAAyB,CAAC,IAAY,EAAE,WAAmB;IAClE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,KAAK,MAAM,GAAG,IAAI,qBAAqB,EAAE,CAAC;QACxC,IAAI,CAAC;YACH,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IACpC,CAAC;IACD,IAAI,sEAAsE,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACnG,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,IAAI,WAAW;YAAE,OAAO,KAAK,CAAC;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpF,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,SAAS,EAAE,CAAC;YACzC,WAAW,GAAG,IAAI,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC,CAAC,CAAC;IACH,IAAI,WAAW;QAAE,OAAO,IAAI,CAAC;IAC7B,OAAO,0EAA0E,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AACtG,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,cAAmC,EACnC,OAA4B;IAE5B,MAAM,WAAW,GAAG,OAAO,EAAE,eAAe,IAAI,GAAG,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,EAAE,gCAAgC,IAAI,GAAG,CAAC;IACnE,MAAM,QAAQ,GAAG,OAAO,EAAE,iBAAiB,IAAI,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QACrD,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC;YAAE,SAAS;QAErE,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACrE,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3E,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,IAAI,SAAS,CAAC,MAAM,GAAG,QAAQ;YAAE,SAAS;QAE1C,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;QAC5D,IAAI,aAAa,GAAG,SAAS;YAAE,SAAS;QAExC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,kBAAkB;YAC1B,6EAA6E;YAC7E,yEAAyE;YACzE,qEAAqE;YACrE,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,kDAAkD;gBAC7D,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,GAAG,CAAC,uBAAuB,WAAW,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,IAAI;gBACnG,oBAAoB,WAAW,mEAAmE;gBAClG,gGAAgG;gBAChG,sFAAsF;YACxF,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,wFAAwF;gBACxF,8FAA8F;gBAC9F,+FAA+F;gBAC/F,8BAA8B;gBAC9B,qGAAqG;gBACrG,oBAAoB;gBACpB,wEAAwE;SAC3E,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface CitationCoverageOptions {
|
|
3
|
+
/** Quantified-claim count at/above which an authoritative citation is expected. Default: 4. */
|
|
4
|
+
minClaims?: number;
|
|
5
|
+
/** Authoritative citations below which the rule fires (when claims >= minClaims). Default: 1. */
|
|
6
|
+
minAuthoritative?: number;
|
|
7
|
+
/** Extra authoritative domains, merged with the extractor default allowlist. */
|
|
8
|
+
allowlist?: readonly string[];
|
|
9
|
+
}
|
|
10
|
+
export declare function citationCoverageRule(pages: ParsedPage[], entityPatterns: EntityMaskPattern[], options?: CitationCoverageOptions): RuleResult[];
|
|
11
|
+
//# sourceMappingURL=citation-coverage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-coverage.d.ts","sourceRoot":"","sources":["../../../src/rules/content/citation-coverage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGhF,MAAM,WAAW,uBAAuB;IACtC,+FAA+F;IAC/F,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iGAAiG;IACjG,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,gFAAgF;IAChF,SAAS,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CAC/B;AAED,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,cAAc,EAAE,iBAAiB,EAAE,EACnC,OAAO,CAAC,EAAE,uBAAuB,GAChC,UAAU,EAAE,CA6Cd"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { extractPageFacts, DEFAULT_CITATION_ALLOWLIST } from "../../algorithms/fact-extraction.js";
|
|
2
|
+
export function citationCoverageRule(pages, entityPatterns, options) {
|
|
3
|
+
const minClaims = options?.minClaims ?? 4;
|
|
4
|
+
const minAuthoritative = options?.minAuthoritative ?? 1;
|
|
5
|
+
// Merge caller-supplied domains with the default allowlist (additive, per the
|
|
6
|
+
// option contract) rather than replacing it.
|
|
7
|
+
const allowlist = options?.allowlist
|
|
8
|
+
? [...DEFAULT_CITATION_ALLOWLIST, ...options.allowlist]
|
|
9
|
+
: undefined;
|
|
10
|
+
const findings = [];
|
|
11
|
+
for (const page of pages) {
|
|
12
|
+
const facts = extractPageFacts(page, entityPatterns, allowlist);
|
|
13
|
+
// "Quantified claims": distinct numeric facts + measurements + grounded claims.
|
|
14
|
+
const quantified = new Set([
|
|
15
|
+
...facts.citableFacts,
|
|
16
|
+
...facts.measurements.map((m) => m.value),
|
|
17
|
+
]);
|
|
18
|
+
const statClaims = quantified.size + facts.groundedClaims.length;
|
|
19
|
+
const authoritative = facts.citations.filter((c) => c.authority === "authoritative").length;
|
|
20
|
+
if (statClaims < minClaims)
|
|
21
|
+
continue;
|
|
22
|
+
if (authoritative >= minAuthoritative)
|
|
23
|
+
continue;
|
|
24
|
+
const entityNames = facts.namedEntities.slice(0, 4).map((e) => e.value).join(", ");
|
|
25
|
+
const entityNote = entityNames ? ` (${facts.namedEntities.length} named entities: ${entityNames})` : "";
|
|
26
|
+
findings.push({
|
|
27
|
+
ruleId: "content/citation-coverage",
|
|
28
|
+
severity: "warning",
|
|
29
|
+
// Low in general; the grounded-claim portion is speculative. A page can
|
|
30
|
+
// legitimately make claims without citing (opinion, first-party data).
|
|
31
|
+
confidence: "low",
|
|
32
|
+
message: `${page.url} makes ${statClaims} quantified claim${statClaims === 1 ? "" : "s"} ` +
|
|
33
|
+
`but cites ${authoritative} authoritative source${authoritative === 1 ? "" : "s"}${entityNote}.`,
|
|
34
|
+
pageUrl: page.url,
|
|
35
|
+
fix: "Cite the primary sources behind your numbers — link the statute, standard, dataset, " +
|
|
36
|
+
".gov/.edu page, or research that backs each statistic. AI Overviews and Google's " +
|
|
37
|
+
"helpful-content systems weight pages that ground claims in authoritative references. " +
|
|
38
|
+
"Note: this rule detects statistic+citation co-occurrence, not semantic correctness.",
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
return findings;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=citation-coverage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-coverage.js","sourceRoot":"","sources":["../../../src/rules/content/citation-coverage.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,0BAA0B,EAAE,MAAM,qCAAqC,CAAC;AAWnG,MAAM,UAAU,oBAAoB,CAClC,KAAmB,EACnB,cAAmC,EACnC,OAAiC;IAEjC,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,CAAC,CAAC;IAC1C,MAAM,gBAAgB,GAAG,OAAO,EAAE,gBAAgB,IAAI,CAAC,CAAC;IACxD,8EAA8E;IAC9E,6CAA6C;IAC7C,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS;QAClC,CAAC,CAAC,CAAC,GAAG,0BAA0B,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC;QACvD,CAAC,CAAC,SAAS,CAAC;IACd,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,EAAE,cAAc,EAAE,SAAS,CAAC,CAAC;QAChE,gFAAgF;QAChF,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS;YACjC,GAAG,KAAK,CAAC,YAAY;YACrB,GAAG,KAAK,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;SAC1C,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,GAAG,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC;QACjE,MAAM,aAAa,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC,MAAM,CAAC;QAE5F,IAAI,UAAU,GAAG,SAAS;YAAE,SAAS;QACrC,IAAI,aAAa,IAAI,gBAAgB;YAAE,SAAS;QAEhD,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnF,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,aAAa,CAAC,MAAM,oBAAoB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAExG,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,SAAS;YACnB,wEAAwE;YACxE,uEAAuE;YACvE,UAAU,EAAE,KAAK;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oBAAoB,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG;gBACjF,aAAa,aAAa,wBAAwB,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,UAAU,GAAG;YAClG,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,sFAAsF;gBACtF,mFAAmF;gBACnF,uFAAuF;gBACvF,qFAAqF;SACxF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"common-phrase-reuse.d.ts","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"common-phrase-reuse.d.ts","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyH7D;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAwBvE"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { proseTextExcludingExamples } from "../../algorithms/example-regions.js";
|
|
1
2
|
const RULE_ID = "content/common-phrase-reuse";
|
|
2
3
|
/**
|
|
3
4
|
* Detects overuse of pSEO marketing clichés in page body content.
|
|
@@ -113,9 +114,13 @@ function buildMessage(url, count, matchedPhrases) {
|
|
|
113
114
|
export function commonPhraseReuseRule(pages) {
|
|
114
115
|
const results = [];
|
|
115
116
|
for (const page of pages) {
|
|
116
|
-
|
|
117
|
+
// Judge the page's OWN prose: strip quoted-example/code regions so a page
|
|
118
|
+
// that *teaches* about clichés (an explainer or style guide) isn't flagged
|
|
119
|
+
// for the examples it quotes. Falls back to contentText when html is absent.
|
|
120
|
+
const prose = proseTextExcludingExamples(page);
|
|
121
|
+
if (!prose)
|
|
117
122
|
continue;
|
|
118
|
-
const matched = findMatchedPhrases(
|
|
123
|
+
const matched = findMatchedPhrases(prose);
|
|
119
124
|
if (matched.length < FIRE_THRESHOLD)
|
|
120
125
|
continue;
|
|
121
126
|
results.push({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"common-phrase-reuse.js","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"common-phrase-reuse.js","sourceRoot":"","sources":["../../../src/rules/content/common-phrase-reuse.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,0BAA0B,EAAE,MAAM,qCAAqC,CAAC;AAEjF,MAAM,OAAO,GAAG,6BAA6B,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,mBAAmB;AACnB,MAAM,gBAAgB,GAAG;IACvB,iBAAiB;IACjB,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,qBAAqB;IACrB,aAAa;IACb,oBAAoB;IACpB,uBAAuB;IACvB,cAAc;IACd,iBAAiB;CACT,CAAC;AAEX,yBAAyB;AACzB,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB,sBAAsB;IACtB,kBAAkB;IAClB,oBAAoB;IACpB,yBAAyB;IACzB,qBAAqB;IACrB,qBAAqB;IACrB,aAAa;IACb,cAAc;IACd,gBAAgB;CACR,CAAC;AAEX,6BAA6B;AAC7B,MAAM,kBAAkB,GAAG;IACzB,WAAW;IACX,kBAAkB;IAClB,kBAAkB;IAClB,qBAAqB;IACrB,mBAAmB;IACnB,kBAAkB;IAClB,mBAAmB;IACnB,sBAAsB;CACd,CAAC;AAEX,iBAAiB;AACjB,MAAM,cAAc,GAAG;IACrB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,mBAAmB;IACnB,oBAAoB;IACpB,gBAAgB;IAChB,mBAAmB;CACX,CAAC;AAEX,gBAAgB;AAChB,MAAM,aAAa,GAAG;IACpB,qBAAqB;IACrB,cAAc;IACd,uBAAuB;IACvB,iBAAiB;IACjB,iBAAiB;IACjB,aAAa;IACb,aAAa;CACL,CAAC;AAEX,MAAM,WAAW,GAAsB;IACrC,GAAG,gBAAgB;IACnB,GAAG,iBAAiB;IACpB,GAAG,kBAAkB;IACrB,GAAG,cAAc;IACjB,GAAG,aAAa;CACjB,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,SAAS,kBAAkB,CAAC,WAAmB;IAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IACxC,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,cAAwB;IACxE,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5E,OAAO,CACL,GAAG,GAAG,WAAW,KAAK,wCAAwC,QAAQ,KAAK;QAC3E,0FAA0F,CAC3F,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,0EAA0E;QAC1E,2EAA2E;QAC3E,6EAA6E;QAC7E,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,OAAO,CAAC,MAAM,GAAG,cAAc;YAAE,SAAS;QAE9C,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC;YACxD,GAAG,EAAE,yLAAyL;YAC9L,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"regurgitated-content.d.ts","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"regurgitated-content.d.ts","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAkG7D,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsCzE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as cheerio from "cheerio";
|
|
2
|
+
import { EXAMPLE_REGION_SELECTOR } from "../../algorithms/example-regions.js";
|
|
2
3
|
const RULE_ID = "content/regurgitated-content";
|
|
3
4
|
/**
|
|
4
5
|
* Detects the Google Places API regurgitation pattern: sites that lift
|
|
@@ -86,12 +87,20 @@ export function regurgitatedContentRule(pages) {
|
|
|
86
87
|
if (!html)
|
|
87
88
|
continue;
|
|
88
89
|
const $ = cheerio.load(html);
|
|
90
|
+
// Drop quoted-example/code regions before scanning so an explainer page that
|
|
91
|
+
// *documents* the regurgitation patterns (e.g. /rules/regurgitated-content,
|
|
92
|
+
// which quotes "powered by Google", the Static Maps URL, and the Places API
|
|
93
|
+
// JS marker as code) isn't flagged for teaching them. We deliberately keep
|
|
94
|
+
// <script>/<style> in place: a real Places-scraping site carries the JS in a
|
|
95
|
+
// live <script>, which must still trip the detector.
|
|
96
|
+
$(EXAMPLE_REGION_SELECTOR).remove();
|
|
97
|
+
const cleanedHtml = $.html();
|
|
89
98
|
const eeat = eeatSignalCount(page);
|
|
90
99
|
const signals = [
|
|
91
100
|
checkGoogleAttribution($),
|
|
92
101
|
checkGoogleImagesDominate($),
|
|
93
|
-
checkStaticMapsEmbed($,
|
|
94
|
-
checkPlacesApiJs(
|
|
102
|
+
checkStaticMapsEmbed($, cleanedHtml),
|
|
103
|
+
checkPlacesApiJs(cleanedHtml),
|
|
95
104
|
checkAggregatorFootprint($, eeat),
|
|
96
105
|
];
|
|
97
106
|
const fired = signals.filter((s) => s.fired);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,qCAAqC,CAAC;AAE9E,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;GAOG;AAEH,+DAA+D;AAC/D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,uBAAuB;IACvB,2BAA2B;IAC3B,0CAA0C;IAC1C,oCAAoC;CACrC,CAAC;AAEF,MAAM,iBAAiB,GAAG,8CAA8C,CAAC;AACzE,MAAM,iBAAiB,GAAG,iDAAiD,CAAC;AAE5E,MAAM,gBAAgB,GACpB,6FAA6F,CAAC;AAEhG,+EAA+E;AAC/E,kEAAkE;AAClE,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,0BAA0B,GAAG,CAAC,CAAC;AACrC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AACvC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAO5B,SAAS,sBAAsB,CAAC,CAAqB;IACnD,MAAM,KAAK,GAAG,2BAA2B,CAAC;IAC1C,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACxE,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9E,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,yBAAyB,CAAC,CAAqB;IACtD,MAAM,KAAK,GAAG,uCAAuC,CAAC;IACtD,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,GAAG,0BAA0B;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC7E,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAqB,EAAE,IAAY;IAC/D,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,MAAM,cAAc,GAClB,CAAC,CAAC,aAAa,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,IAAI,CAAC,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,IAAI,CAAC,CAAC;IACjE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,wBAAwB,CAAC,CAAqB,EAAE,IAAY;IACnE,MAAM,KAAK,GAAG,2DAA2D,CAAC;IAC1E,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACpD,MAAM,CAAC;IACV,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,6EAA6E;QAC7E,4EAA4E;QAC5E,4EAA4E;QAC5E,2EAA2E;QAC3E,6EAA6E;QAC7E,qDAAqD;QACrD,CAAC,CAAC,uBAAuB,CAAC,CAAC,MAAM,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAmB;YAC9B,sBAAsB,CAAC,CAAC,CAAC;YACzB,yBAAyB,CAAC,CAAC,CAAC;YAC5B,oBAAoB,CAAC,CAAC,EAAE,WAAW,CAAC;YACpC,gBAAgB,CAAC,WAAW,CAAC;YAC7B,wBAAwB,CAAC,CAAC,EAAE,IAAI,CAAC;SAClC,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,2CAA2C,UAAU,GAAG;YAC7F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,8TAA8T;YACnU,UAAU,EAAE,aAAa;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,
|
|
1
|
+
{"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoErE"}
|
|
@@ -88,7 +88,11 @@ export function translationNoOpRule(pages) {
|
|
|
88
88
|
: `${(minSim * 100).toFixed(0)}%--${(maxSim * 100).toFixed(0)}%`;
|
|
89
89
|
findings.push({
|
|
90
90
|
ruleId: "content/translation-no-op",
|
|
91
|
-
|
|
91
|
+
// Warning, not error: an untranslated locale variant is a real duplicate-
|
|
92
|
+
// content gap but a should-fix, not a ship-blocker — and multilingual sites
|
|
93
|
+
// can legitimately share some body text (disclaimers, spec tables).
|
|
94
|
+
severity: "warning",
|
|
95
|
+
confidence: "medium",
|
|
92
96
|
message: `${members.length} locale variants of "${basePath}" share identical content ` +
|
|
93
97
|
`(similarity ${simLabel}). Translate the body or consolidate to the canonical version.`,
|
|
94
98
|
pageUrl: urls[0],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,0EAA0E;YAC1E,4EAA4E;YAC5E,oEAAoE;YACpE,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,OAAO,CAAC,MAAM,wBAAwB,QAAQ,4BAA4B;gBAC7E,eAAe,QAAQ,gEAAgE;YACzF,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC7B,GAAG,EAAE,qKAAqK;SAC3K,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1,3 +1,17 @@
|
|
|
1
1
|
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
-
export
|
|
2
|
+
export interface UniqueValueThresholds {
|
|
3
|
+
/** Unique-content density below this fires (info). Default 0.20. */
|
|
4
|
+
passBelow: number;
|
|
5
|
+
/** Density below this escalates to error. Default 0.12. */
|
|
6
|
+
errorBelow: number;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
|
|
10
|
+
* token is weighted by normalized IDF (ln(N/df)/ln(N)) — 1 if page-exclusive, ~0
|
|
11
|
+
* if on every page — and averaged over the page's distinct tokens. A near-
|
|
12
|
+
* duplicate / boilerplate page scores low regardless of corpus size or length; a
|
|
13
|
+
* large original page stays high. Continuous, so it doesn't shuffle at the margin.
|
|
14
|
+
* Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
|
|
15
|
+
*/
|
|
16
|
+
export declare function uniqueValueRule(pages: ParsedPage[], thresholds: UniqueValueThresholds): RuleResult[];
|
|
3
17
|
//# sourceMappingURL=unique-value.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,qBAAqB;IACpC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;CACpB;AAYD;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,UAAU,EAAE,qBAAqB,GAChC,UAAU,EAAE,CAwCd"}
|
|
@@ -1,51 +1,58 @@
|
|
|
1
1
|
function tokenize(text) {
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
// the "unique" count (a word that's shared but happens to carry a trailing
|
|
5
|
-
// comma on one page looked unique) — false precision in the shared/unique
|
|
6
|
-
// split this rule now surfaces.
|
|
2
|
+
// Lowercase, split on whitespace, strip edge punctuation so "word", "word."
|
|
3
|
+
// and "(word)" are one token.
|
|
7
4
|
return text
|
|
8
5
|
.toLowerCase()
|
|
9
6
|
.split(/\s+/)
|
|
10
7
|
.map((t) => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
|
|
11
8
|
.filter(Boolean);
|
|
12
9
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
|
|
12
|
+
* token is weighted by normalized IDF (ln(N/df)/ln(N)) — 1 if page-exclusive, ~0
|
|
13
|
+
* if on every page — and averaged over the page's distinct tokens. A near-
|
|
14
|
+
* duplicate / boilerplate page scores low regardless of corpus size or length; a
|
|
15
|
+
* large original page stays high. Continuous, so it doesn't shuffle at the margin.
|
|
16
|
+
* Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
|
|
17
|
+
*/
|
|
18
|
+
export function uniqueValueRule(pages, thresholds) {
|
|
19
|
+
const { passBelow, errorBelow } = thresholds;
|
|
20
|
+
const N = pages.length;
|
|
21
|
+
const lnN = Math.log(N);
|
|
22
|
+
if (N <= 1 || lnN === 0)
|
|
23
|
+
return []; // can't measure rarity against a single page
|
|
24
|
+
const df = new Map();
|
|
25
|
+
const pageDistinct = pages.map((p) => new Set(tokenize(p.contentText)));
|
|
26
|
+
for (const distinct of pageDistinct) {
|
|
27
|
+
for (const t of distinct)
|
|
28
|
+
df.set(t, (df.get(t) ?? 0) + 1);
|
|
20
29
|
}
|
|
21
30
|
const findings = [];
|
|
22
|
-
pages.forEach((page,
|
|
23
|
-
const distinct =
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
});
|
|
48
|
-
}
|
|
31
|
+
pages.forEach((page, i) => {
|
|
32
|
+
const distinct = pageDistinct[i];
|
|
33
|
+
if (distinct.size === 0)
|
|
34
|
+
return; // empty page → thin-content handles it
|
|
35
|
+
let mass = 0;
|
|
36
|
+
for (const t of distinct)
|
|
37
|
+
mass += Math.log(N / (df.get(t) ?? 1)) / lnN;
|
|
38
|
+
const density = mass / distinct.size;
|
|
39
|
+
if (density >= passBelow)
|
|
40
|
+
return;
|
|
41
|
+
const severity = density < errorBelow ? "error" : "info";
|
|
42
|
+
const pct = (density * 100).toFixed(1);
|
|
43
|
+
findings.push({
|
|
44
|
+
ruleId: "content/unique-value",
|
|
45
|
+
severity,
|
|
46
|
+
message: `${page.url} has low unique-content density ${density.toFixed(3)} ` +
|
|
47
|
+
`(${pct}% of its ${distinct.size} distinct words are page-distinctive; floor ${passBelow.toFixed(2)}). ` +
|
|
48
|
+
`Most of its vocabulary also appears on other pages.`,
|
|
49
|
+
pageUrl: page.url,
|
|
50
|
+
fix: `Raise originality density: add page-specific text — a distinct lead, this ` +
|
|
51
|
+
`record's own facts, page-specific examples. Content repeated across pages on ` +
|
|
52
|
+
`the same axis (boilerplate, shared legal/spec blocks, per-axis data like a ` +
|
|
53
|
+
`role's regulations across that role's documents) is common vocabulary and ` +
|
|
54
|
+
`does NOT raise density, even when it is useful.`,
|
|
55
|
+
});
|
|
49
56
|
});
|
|
50
57
|
return findings;
|
|
51
58
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AASA,SAAS,QAAQ,CAAC,IAAY;IAC5B,4EAA4E;IAC5E,8BAA8B;IAC9B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,UAAiC;IAEjC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,UAAU,CAAC;IAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,CAAC,6CAA6C;IAEjF,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IACxE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,CAAC,uCAAuC;QACxE,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QACrC,IAAI,OAAO,IAAI,SAAS;YAAE,OAAO;QAEjC,MAAM,QAAQ,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QACzD,MAAM,GAAG,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,mCAAmC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACnE,IAAI,GAAG,YAAY,QAAQ,CAAC,IAAI,+CAA+C,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACxG,qDAAqD;YACvD,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,+EAA+E;gBAC/E,6EAA6E;gBAC7E,4EAA4E;gBAC5E,iDAAiD;SACpD,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AAuIvE;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { hasAuthoritativeCitation } from "../../algorithms/fact-extraction.js";
|
|
1
2
|
const RULE_ID = "content/value-add";
|
|
2
3
|
const EEAT_HTML_PATTERNS = [
|
|
3
4
|
/last\s+updated/i,
|
|
@@ -15,7 +16,8 @@ function countEeatCategories(page) {
|
|
|
15
16
|
count += 1;
|
|
16
17
|
if (page.publishedDate)
|
|
17
18
|
count += 1;
|
|
18
|
-
if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html))
|
|
19
|
+
if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)) ||
|
|
20
|
+
hasAuthoritativeCitation(page.resolvedHrefs, page.url))
|
|
19
21
|
count += 1;
|
|
20
22
|
return count;
|
|
21
23
|
}
|