@pseolint/core 0.6.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts +46 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -0
- package/dist/algorithms/fact-extraction.js +223 -0
- package/dist/algorithms/fact-extraction.js.map +1 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +55 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/origin-preflight.d.ts +89 -0
- package/dist/origin-preflight.d.ts.map +1 -0
- package/dist/origin-preflight.js +93 -0
- package/dist/origin-preflight.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +1 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +4 -33
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/citation-coverage.d.ts +11 -0
- package/dist/rules/content/citation-coverage.d.ts.map +1 -0
- package/dist/rules/content/citation-coverage.js +43 -0
- package/dist/rules/content/citation-coverage.js.map +1 -0
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +3 -1
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +33 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +1 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +22 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +17 -1
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { maskEntities } from "./entity-mask.js";
|
|
3
|
+
// --- Numeric "citable" facts: the frozen subset aeo/citable-facts counts. ---
|
|
4
|
+
// These six patterns are lifted verbatim from rules/aeo/citable-facts.ts and
|
|
5
|
+
// MUST stay byte-identical to preserve the calibration corpus.
|
|
6
|
+
const CITABLE_FACT_PATTERNS = [
|
|
7
|
+
{ name: "dollar", regex: /\$[\d,]+(\.\d{2})?/g },
|
|
8
|
+
{ name: "percent", regex: /\b\d+(\.\d+)?\s*%/g },
|
|
9
|
+
{
|
|
10
|
+
name: "timeframe",
|
|
11
|
+
regex: /\b\d+(?:-\d+)?\s*(business\s+days?|days?|weeks?|months?|years?|hours?|minutes?)\b/gi,
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
name: "date",
|
|
15
|
+
regex: /\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(?:,\s*\d{4})?\b/gi,
|
|
16
|
+
},
|
|
17
|
+
{ name: "isoDate", regex: /\b\d{4}-\d{2}-\d{2}\b/g },
|
|
18
|
+
{ name: "form", regex: /\bForm\s+[A-Z0-9][A-Z0-9-]*\b/g },
|
|
19
|
+
];
|
|
20
|
+
export function extractCitableFacts(text) {
|
|
21
|
+
const out = new Set();
|
|
22
|
+
for (const { regex } of CITABLE_FACT_PATTERNS) {
|
|
23
|
+
const matches = text.match(regex);
|
|
24
|
+
if (!matches)
|
|
25
|
+
continue;
|
|
26
|
+
for (const m of matches)
|
|
27
|
+
out.add(m.trim().toLowerCase());
|
|
28
|
+
}
|
|
29
|
+
return Array.from(out);
|
|
30
|
+
}
|
|
31
|
+
// --- Measurements: NEW numeric kinds, deliberately separate from citableFacts. ---
|
|
32
|
+
const MEASUREMENT_UNITS = "kg|g|lb|lbs|oz|mi|km|cm|mm|ft|in|MB|GB|TB|KB|ms|fps|mph|kWh";
|
|
33
|
+
const MEASUREMENT_PATTERNS = [
|
|
34
|
+
{ kind: "ratio", regex: /\b\d+(?:\.\d+)?\s*(?:out of|in)\s*\d+\b/gi },
|
|
35
|
+
{ kind: "ratio", regex: /\b\d+\s*:\s*\d+\b/g },
|
|
36
|
+
{ kind: "measurement", regex: new RegExp(`\\b\\d+(?:\\.\\d+)?\\s*(?:${MEASUREMENT_UNITS})\\b`, "g") },
|
|
37
|
+
];
|
|
38
|
+
export function extractMeasurements(maskedText) {
|
|
39
|
+
const seen = new Set();
|
|
40
|
+
const out = [];
|
|
41
|
+
for (const { kind, regex } of MEASUREMENT_PATTERNS) {
|
|
42
|
+
const matches = maskedText.match(regex);
|
|
43
|
+
if (!matches)
|
|
44
|
+
continue;
|
|
45
|
+
for (const m of matches) {
|
|
46
|
+
const value = m.replace(/\s+/g, " ").trim().toLowerCase();
|
|
47
|
+
if (seen.has(value))
|
|
48
|
+
continue;
|
|
49
|
+
seen.add(value);
|
|
50
|
+
out.push({ value, kind });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
const MULTI_WORD_PROPER_NOUN = /\b[A-Z][a-z]+(?:\s+(?:of\s+|de\s+|and\s+|the\s+)?[A-Z][a-z]+)+\b/g;
|
|
56
|
+
const ACRONYM = /\b(?:ISO|GDPR|HIPAA|FDA|SEC|FTC|EPA|W3C|IETF|RFC|NIST|OSHA|IRS|EU|UN|WHO|CCPA|PCI)\b/g;
|
|
57
|
+
const CUE_WORD = /\b(?:Inc|LLC|Ltd|Corp|GmbH|Act|Regulation|Directive|Agency|Department|Bureau|Commission|Authority|University|Institute|Association|Standard|Protocol)\b/;
|
|
58
|
+
const JSON_LD_ENTITY_TYPES = new Set([
|
|
59
|
+
"Organization", "GovernmentOrganization", "Corporation", "NGO",
|
|
60
|
+
"Person", "Product", "Brand",
|
|
61
|
+
]);
|
|
62
|
+
function jsonLdEntities(nodes) {
|
|
63
|
+
const out = [];
|
|
64
|
+
const visit = (node) => {
|
|
65
|
+
if (Array.isArray(node)) {
|
|
66
|
+
node.forEach(visit);
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
if (typeof node !== "object" || node === null)
|
|
70
|
+
return;
|
|
71
|
+
const obj = node;
|
|
72
|
+
const type = obj["@type"];
|
|
73
|
+
const name = obj["name"];
|
|
74
|
+
if (typeof name === "string" && typeof type === "string" && JSON_LD_ENTITY_TYPES.has(type)) {
|
|
75
|
+
out.push({ value: name.trim().toLowerCase(), source: "json-ld", type: "organization" });
|
|
76
|
+
}
|
|
77
|
+
for (const v of Object.values(obj))
|
|
78
|
+
visit(v);
|
|
79
|
+
};
|
|
80
|
+
nodes.forEach(visit);
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
export function extractNamedEntities(maskedText, jsonLd = []) {
|
|
84
|
+
const seen = new Set();
|
|
85
|
+
const out = [];
|
|
86
|
+
const push = (value, source) => {
|
|
87
|
+
const v = value.replace(/\s+/g, " ").trim().toLowerCase();
|
|
88
|
+
if (v.length < 2 || seen.has(v))
|
|
89
|
+
return;
|
|
90
|
+
seen.add(v);
|
|
91
|
+
out.push({ value: v, source });
|
|
92
|
+
};
|
|
93
|
+
for (const m of jsonLdEntities(jsonLd))
|
|
94
|
+
push(m.value, "json-ld");
|
|
95
|
+
for (const m of maskedText.match(ACRONYM) ?? [])
|
|
96
|
+
push(m, "cue-word");
|
|
97
|
+
for (const m of maskedText.match(MULTI_WORD_PROPER_NOUN) ?? []) {
|
|
98
|
+
push(m, CUE_WORD.test(m) ? "cue-word" : "proper-noun");
|
|
99
|
+
}
|
|
100
|
+
return out;
|
|
101
|
+
}
|
|
102
|
+
export const DEFAULT_CITATION_ALLOWLIST = [
|
|
103
|
+
"wikipedia.org", "w3.org", "iso.org", "ietf.org", "rfc-editor.org",
|
|
104
|
+
"doi.org", "nih.gov", "ncbi.nlm.nih.gov", "who.int", "schema.org",
|
|
105
|
+
"oecd.org", "worldbank.org", "europa.eu",
|
|
106
|
+
// Google's own published documentation is the primary authoritative source for
|
|
107
|
+
// claims about Google's ranking and spam systems (Search Essentials, spam
|
|
108
|
+
// policies, helpful-content guidance) and for Core Web Vitals (web.dev).
|
|
109
|
+
// Scoped to the docs subdomain — a bare google.com link (Maps, search results)
|
|
110
|
+
// is deliberately NOT credited as authoritative.
|
|
111
|
+
"developers.google.com", "web.dev",
|
|
112
|
+
];
|
|
113
|
+
const MULTI_PART_SUFFIXES = new Set([
|
|
114
|
+
"co.uk", "ac.uk", "gov.uk", "org.uk", "com.au", "gov.au", "edu.au",
|
|
115
|
+
"co.jp", "co.nz", "co.za", "com.br",
|
|
116
|
+
]);
|
|
117
|
+
function hostOf(url) {
|
|
118
|
+
try {
|
|
119
|
+
return new URL(url).hostname.toLowerCase();
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
export function registrableDomain(host) {
|
|
126
|
+
const labels = host.replace(/^www\./, "").split(".");
|
|
127
|
+
if (labels.length <= 2)
|
|
128
|
+
return labels.join(".");
|
|
129
|
+
const lastTwo = labels.slice(-2).join(".");
|
|
130
|
+
if (MULTI_PART_SUFFIXES.has(lastTwo))
|
|
131
|
+
return labels.slice(-3).join(".");
|
|
132
|
+
return lastTwo;
|
|
133
|
+
}
|
|
134
|
+
function isAuthoritativeTld(host) {
|
|
135
|
+
return /\.(?:gov|edu|mil|int)$/.test(host) || /\.(?:gov|edu|ac)\.[a-z]{2}$/.test(host);
|
|
136
|
+
}
|
|
137
|
+
export function classifyCitations(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
138
|
+
const pageHost = hostOf(pageUrl);
|
|
139
|
+
const pageDomain = pageHost ? registrableDomain(pageHost) : null;
|
|
140
|
+
const seen = new Set();
|
|
141
|
+
const out = [];
|
|
142
|
+
for (const href of resolvedHrefs) {
|
|
143
|
+
const host = hostOf(href);
|
|
144
|
+
if (!host)
|
|
145
|
+
continue;
|
|
146
|
+
const domain = registrableDomain(host);
|
|
147
|
+
if (pageDomain && domain === pageDomain)
|
|
148
|
+
continue; // internal link
|
|
149
|
+
if (seen.has(href))
|
|
150
|
+
continue;
|
|
151
|
+
seen.add(href);
|
|
152
|
+
if (isAuthoritativeTld(host)) {
|
|
153
|
+
out.push({ href, domain, authority: "authoritative", reason: "tld" });
|
|
154
|
+
}
|
|
155
|
+
else if (allowlist.some((d) => host === d || host.endsWith(`.${d}`))) {
|
|
156
|
+
out.push({ href, domain, authority: "authoritative", reason: "allowlist" });
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
out.push({ href, domain, authority: "general" });
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return out;
|
|
163
|
+
}
|
|
164
|
+
export function hasAuthoritativeCitation(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
165
|
+
return classifyCitations(resolvedHrefs, pageUrl, allowlist).some((c) => c.authority === "authoritative");
|
|
166
|
+
}
|
|
167
|
+
const SENTENCE_SPLIT = /(?<=[.!?])\s+(?=[A-Z0-9"'(])/;
|
|
168
|
+
function resolveHrefs(hrefs, base) {
|
|
169
|
+
const out = [];
|
|
170
|
+
for (const h of hrefs) {
|
|
171
|
+
try {
|
|
172
|
+
out.push(new URL(h, base).href);
|
|
173
|
+
}
|
|
174
|
+
catch { /* skip unparseable */ }
|
|
175
|
+
}
|
|
176
|
+
return out;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Deterministic approximation of "a verifiable claim": a block (<p>/<li>) that
|
|
180
|
+
* contains a statistic AND an outbound citation. Approximated at block level,
|
|
181
|
+
* not exact sentence level — documented limitation. Detects co-occurrence, not
|
|
182
|
+
* semantic truth. Consume at `speculative` confidence.
|
|
183
|
+
*/
|
|
184
|
+
export function extractGroundedClaims(html, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
185
|
+
const $ = load(html);
|
|
186
|
+
$("nav, header, footer, aside, script, style, noscript").remove();
|
|
187
|
+
const claims = [];
|
|
188
|
+
const scope = $("article").length > 0 ? $("article") : $("main").length > 0 ? $("main") : $("body");
|
|
189
|
+
scope.find("p, li").each((_i, el) => {
|
|
190
|
+
const $el = $(el);
|
|
191
|
+
const rawLinks = $el.find("a[href]").map((_j, a) => String($(a).attr("href") ?? "")).get();
|
|
192
|
+
const citations = classifyCitations(resolveHrefs(rawLinks, pageUrl), pageUrl, allowlist);
|
|
193
|
+
if (citations.length === 0)
|
|
194
|
+
return;
|
|
195
|
+
const text = $el.text().replace(/\s+/g, " ").trim();
|
|
196
|
+
for (const sentence of text.split(SENTENCE_SPLIT)) {
|
|
197
|
+
const facts = [
|
|
198
|
+
...extractCitableFacts(sentence),
|
|
199
|
+
...extractMeasurements(sentence).map((m) => m.value),
|
|
200
|
+
];
|
|
201
|
+
if (facts.length === 0)
|
|
202
|
+
continue;
|
|
203
|
+
claims.push({
|
|
204
|
+
sentence: sentence.trim().slice(0, 240),
|
|
205
|
+
facts,
|
|
206
|
+
citations: citations.map((c) => c.href),
|
|
207
|
+
});
|
|
208
|
+
break; // one grounded claim per block is enough; avoids over-counting
|
|
209
|
+
}
|
|
210
|
+
});
|
|
211
|
+
return claims;
|
|
212
|
+
}
|
|
213
|
+
export function extractPageFacts(page, entityPatterns, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
214
|
+
const masked = maskEntities(page.contentText, entityPatterns);
|
|
215
|
+
return {
|
|
216
|
+
citableFacts: extractCitableFacts(masked),
|
|
217
|
+
measurements: extractMeasurements(masked),
|
|
218
|
+
namedEntities: extractNamedEntities(masked, page.jsonLd),
|
|
219
|
+
citations: classifyCitations(page.resolvedHrefs, page.url, allowlist),
|
|
220
|
+
groundedClaims: extractGroundedClaims(page.html, page.url, allowlist),
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
//# sourceMappingURL=fact-extraction.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact-extraction.js","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAwChD,+EAA+E;AAC/E,6EAA6E;AAC7E,+DAA+D;AAC/D,MAAM,qBAAqB,GAA2C;IACpE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,qBAAqB,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GACrB,6DAA6D,CAAC;AAChE,MAAM,oBAAoB,GAA6C;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,2CAA2C,EAAE;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAC9C,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,6BAA6B,iBAAiB,MAAM,EAAE,GAAG,CAAC,EAAE;CACtG,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,UAAkB;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC9B,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,sBAAsB,GAAG,mEAAmE,CAAC;AACnG,MAAM,OAAO,GAAG,uFAAuF,CAAC;AACxG,MAAM,QAAQ,GAAG,yJAAyJ,CAAC;AAE3K,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,cAAc,EAAE,wBAAwB,EAAE,aAAa,EAAE,KAAK;IAC9D,QAAQ,EAAE,SAAS,EAAE,OAAO;CAC7B,CAAC,CAAC;AAEH,SAAS,cAAc,CAAC,KAAgB;IACtC,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,CAAC,IAAa,EAAQ,EAAE;QACpC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAAC,OAAO;QAAC,CAAC;QACzD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;YAAE,OAAO;QACtD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1F,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC;IACF,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,SAAoB,EAAE;IAC7E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,MAA6B,EAAQ,EAAE;QAClE,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO;QACxC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACZ,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/D,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAsB;IAC3D,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,gBAAgB;IAClE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,SAAS,EAAE,YAAY;IACjE,UAAU,EAAE,eAAe,EAAE,WAAW;IACxC,+EAA+E;IAC/E,0EAA0E;IAC1E,yEAAyE;IACzE,+EAA+E;IAC/E,iDAAiD;IACjD,uBAAuB,EAAE,SAAS;CACnC,CAAC;AAEF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IAClE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;CACpC,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QAAC,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,IAAI,CAAC;IAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,IAAI,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,UAAU,IAAI,MAAM,KAAK,UAAU;YAAE,SAAS,CAAC,gBAAgB;QACnE,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,IAAI,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACxE,CAAC;aAAM,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;QAC9E,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC;AAC3G,CAAC;AAED,MAAM,cAAc,GAAG,8BAA8B,CAAC;AAEtD,SAAS,YAAY,CAAC,KAAe,EAAE,IAAY;IACjD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CACnC,IAAY,EACZ,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC,CAAC,qDAAqD,CAAC,CAAC,MAAM,EAAE,CAAC;IAClE,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACpG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAC3F,MAAM,SAAS,GAAG,iBAAiB,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QACzF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG;gBACZ,GAAG,mBAAmB,CAAC,QAAQ,CAAC;gBAChC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;aACrD,CAAC;YACF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACjC,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBACvC,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aACxC,CAAC,CAAC;YACH,MAAM,CAAC,+DAA+D;QACxE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAmF,EACnF,cAAmC,EACnC,YAA+B,0BAA0B;IAEzD,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC9D,OAAO;QACL,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,aAAa,EAAE,oBAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;QACxD,SAAS,EAAE,iBAAiB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;QACrE,cAAc,EAAE,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;KACtE,CAAC;AACJ,CAAC"}
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAqE1I,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAEvE;AA2yBD;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAoYD,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBjG;AA+pBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA4yC/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -5,6 +5,7 @@ import { parseHtmlPage } from "./parser.js";
|
|
|
5
5
|
import { pageSkipReason } from "./page-filter.js";
|
|
6
6
|
import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
|
|
7
7
|
import { eeatSignalsRule } from "./rules/content/eeat-signals.js";
|
|
8
|
+
import { citationCoverageRule } from "./rules/content/citation-coverage.js";
|
|
8
9
|
import { metaUniquenessRule } from "./rules/content/meta-uniqueness.js";
|
|
9
10
|
import { missingAuthorRule } from "./rules/content/missing-author.js";
|
|
10
11
|
import { uniqueValueRule } from "./rules/content/unique-value.js";
|
|
@@ -72,6 +73,10 @@ import { CORE_RULESET_VERSION } from "./ruleset-version.js";
|
|
|
72
73
|
import { planScrapeStrategy, DEFAULT_AGE_FLOOR_DAYS } from "./scrape-strategy.js";
|
|
73
74
|
import { detectTemplates, buildUrlToTemplateMap, shouldActivateTemplateScoring } from "./template-detection.js";
|
|
74
75
|
import { scoreTemplates, siteVerdictFromTemplates } from "./per-template-scoring.js";
|
|
76
|
+
import { deriveEntityPatterns } from "./algorithms/auto-entity-mask.js";
|
|
77
|
+
import { CompositeAuthorityProvider } from "./algorithms/authority/provider.js";
|
|
78
|
+
import { OpenPageRankProvider } from "./algorithms/authority/openpagerank.js";
|
|
79
|
+
import { registrableDomain } from "./algorithms/fact-extraction.js";
|
|
75
80
|
const DEFAULTS = {
|
|
76
81
|
nearDuplicateThreshold: 0.85,
|
|
77
82
|
entitySwapThreshold: 0.95,
|
|
@@ -80,10 +85,12 @@ const DEFAULTS = {
|
|
|
80
85
|
publicationVelocityMaxPerDayCorpusFraction: 0.10,
|
|
81
86
|
boilerplateMaxRatio: 0.7,
|
|
82
87
|
templateDiversityMinUniqueRatio: 0.35,
|
|
83
|
-
|
|
88
|
+
uniqueValueDensity: { passBelow: 0.20, errorBelow: 0.12 },
|
|
84
89
|
metaUniquenessMinJaccard: 0.9,
|
|
85
90
|
linkDepthMaxClicks: 3,
|
|
86
91
|
templateCoverageMinPages: 5,
|
|
92
|
+
citationCoverageMinClaims: 4,
|
|
93
|
+
citationCoverageMinAuthoritative: 1,
|
|
87
94
|
answerFirstMaxWords: 100,
|
|
88
95
|
citableFactsMin: 3,
|
|
89
96
|
citableFactsTarget: 8,
|
|
@@ -209,7 +216,7 @@ const SCORING_PROFILES = {
|
|
|
209
216
|
// first-principles analysis predicts will false-positive on catalog-
|
|
210
217
|
// shaped sites (Zapier integrations, G2 categories, Wise currency pairs,
|
|
211
218
|
// etc.). A reputable-pSEO calibration corpus + runner has been added
|
|
212
|
-
// (scripts/calibration-
|
|
219
|
+
// (scripts/calibration-corpus.ts); these overrides will be
|
|
213
220
|
// tightened or loosened based on actual fire-rates measured against
|
|
214
221
|
// sites that demonstrably win in production. See
|
|
215
222
|
// docs/superpowers/specs/2026-05-03-calibration-against-reputable-pseo.md.
|
|
@@ -414,6 +421,9 @@ const RULE_IMPACTS = {
|
|
|
414
421
|
"content/title-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 25 }, // 2026-05-03 round 11: title is high-impact but the original 50-cap was disproportionate to other content rules and tipped Typeform into critical on a 6-finding cluster. Keep the rule at native error severity (duplicate titles ARE real bugs); just don't let one rule dominate the integrity bucket.
|
|
415
422
|
"content/heading-structure": { baseImpact: 5, perInstance: 1, maxImpact: 20 },
|
|
416
423
|
"content/image-alt-text": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
424
|
+
// Citation coverage is low-confidence (block-level grounded-claim heuristic);
|
|
425
|
+
// keep its impact modest so it nudges rather than dominates the score.
|
|
426
|
+
"content/citation-coverage": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
|
|
417
427
|
"content/translation-no-op": { baseImpact: 30, perInstance: 10, maxImpact: 60 },
|
|
418
428
|
// v1 warning-severity heuristic; lower than translation-no-op since it's speculative
|
|
419
429
|
"content/regurgitated-content": { baseImpact: 15, perInstance: 5, maxImpact: 35 },
|
|
@@ -642,7 +652,7 @@ sampled = false) {
|
|
|
642
652
|
}
|
|
643
653
|
// Content rules
|
|
644
654
|
if (isEnabled("content/unique-value") && modeOk("content/unique-value")) {
|
|
645
|
-
pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.
|
|
655
|
+
pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.uniqueValueDensity)));
|
|
646
656
|
}
|
|
647
657
|
if (isEnabled("content/meta-uniqueness") && modeOk("content/meta-uniqueness")) {
|
|
648
658
|
pushAll(findings, tag(metaUniquenessRule(pages, entityPatterns, resolvedRules.metaUniquenessMinJaccard)));
|
|
@@ -653,6 +663,13 @@ sampled = false) {
|
|
|
653
663
|
if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
|
|
654
664
|
pushAll(findings, tag(eeatSignalsRule(pages)));
|
|
655
665
|
}
|
|
666
|
+
if (isEnabled("content/citation-coverage") && modeOk("content/citation-coverage")) {
|
|
667
|
+
pushAll(findings, tag(citationCoverageRule(pages, entityPatterns, {
|
|
668
|
+
minClaims: resolvedRules.citationCoverageMinClaims,
|
|
669
|
+
minAuthoritative: resolvedRules.citationCoverageMinAuthoritative,
|
|
670
|
+
allowlist: resolvedRules.citationAllowlist,
|
|
671
|
+
})));
|
|
672
|
+
}
|
|
656
673
|
// 2026-05-03 v0.5.2 blind-spot fixes — title uniqueness + heading
|
|
657
674
|
// structure + image alt-text were tier-1 gaps in the blind-spot audit.
|
|
658
675
|
if (isEnabled("content/title-uniqueness") && modeOk("content/title-uniqueness")) {
|
|
@@ -678,7 +695,7 @@ sampled = false) {
|
|
|
678
695
|
}
|
|
679
696
|
// Link rules — use the global link graph
|
|
680
697
|
if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
|
|
681
|
-
pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl)));
|
|
698
|
+
pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl, sampled)));
|
|
682
699
|
}
|
|
683
700
|
if (isEnabled("links/dead-ends") && modeOk("links/dead-ends")) {
|
|
684
701
|
pushAll(findings, tag(deadEndsRule(pages, knownUrls, rootUrl)));
|
|
@@ -689,7 +706,7 @@ sampled = false) {
|
|
|
689
706
|
}
|
|
690
707
|
}
|
|
691
708
|
if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
|
|
692
|
-
pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls)));
|
|
709
|
+
pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls, sampled)));
|
|
693
710
|
}
|
|
694
711
|
if (isEnabled("links/host-section-divergence") && modeOk("links/host-section-divergence")) {
|
|
695
712
|
pushAll(findings, tag(hostSectionDivergenceRule(pages, adjacency)));
|
|
@@ -1883,10 +1900,13 @@ export async function auditSource(source, options) {
|
|
|
1883
1900
|
?? DEFAULTS.publicationVelocityMaxPerDayCorpusFraction,
|
|
1884
1901
|
boilerplateMaxRatio: options?.rules?.boilerplateMaxRatio ?? DEFAULTS.boilerplateMaxRatio,
|
|
1885
1902
|
templateDiversityMinUniqueRatio: options?.rules?.templateDiversityMinUniqueRatio ?? DEFAULTS.templateDiversityMinUniqueRatio,
|
|
1886
|
-
|
|
1903
|
+
uniqueValueDensity: options?.rules?.uniqueValueDensity ?? DEFAULTS.uniqueValueDensity,
|
|
1887
1904
|
metaUniquenessMinJaccard: options?.rules?.metaUniquenessMinJaccard ?? DEFAULTS.metaUniquenessMinJaccard,
|
|
1888
1905
|
linkDepthMaxClicks: options?.rules?.linkDepthMaxClicks ?? DEFAULTS.linkDepthMaxClicks,
|
|
1889
1906
|
templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
|
|
1907
|
+
citationCoverageMinClaims: options?.rules?.citationCoverageMinClaims ?? DEFAULTS.citationCoverageMinClaims,
|
|
1908
|
+
citationCoverageMinAuthoritative: options?.rules?.citationCoverageMinAuthoritative ?? DEFAULTS.citationCoverageMinAuthoritative,
|
|
1909
|
+
citationAllowlist: options?.rules?.citationAllowlist,
|
|
1890
1910
|
answerFirstMaxWords: options?.rules?.answerFirstMaxWords ?? DEFAULTS.answerFirstMaxWords,
|
|
1891
1911
|
citableFactsMin: options?.rules?.citableFactsMin ?? DEFAULTS.citableFactsMin,
|
|
1892
1912
|
citableFactsTarget: options?.rules?.citableFactsTarget ?? DEFAULTS.citableFactsTarget,
|
|
@@ -2346,7 +2366,10 @@ export async function auditSource(source, options) {
|
|
|
2346
2366
|
const auditMode = options?.mode ?? "full";
|
|
2347
2367
|
// Site-wide rules (run once, outside group loop)
|
|
2348
2368
|
if (sitemapUrlSet && sitemapUrlSet.size > 0 && auditMode !== "diff") {
|
|
2349
|
-
const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet
|
|
2369
|
+
const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet, {
|
|
2370
|
+
sampled: isSampledAudit,
|
|
2371
|
+
normalizeUrlOptions,
|
|
2372
|
+
});
|
|
2350
2373
|
pushAll(allFindings, sitemapFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
2351
2374
|
if (robotsTxtContent) {
|
|
2352
2375
|
const robotsFindings = robotsComplianceRule(parsedPages, sitemapUrlSet, robotsTxtContent);
|
|
@@ -2372,6 +2395,7 @@ export async function auditSource(source, options) {
|
|
|
2372
2395
|
pushAll(allFindings, dataIdenticalFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
2373
2396
|
}
|
|
2374
2397
|
}
|
|
2398
|
+
const derivedEntityPatterns = options?.autoEntityMask === false ? [] : deriveEntityPatterns(parsedPagesAll);
|
|
2375
2399
|
for (const [groupName, groupPages] of classified) {
|
|
2376
2400
|
if (groupPages.length === 0)
|
|
2377
2401
|
continue;
|
|
@@ -2380,7 +2404,7 @@ export async function auditSource(source, options) {
|
|
|
2380
2404
|
continue;
|
|
2381
2405
|
const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
|
|
2382
2406
|
const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
|
|
2383
|
-
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full",
|
|
2407
|
+
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, [...DEFAULT_ENTITY_PATTERNS, ...derivedEntityPatterns], groupConfig?.overrides, options?.mode ?? "full",
|
|
2384
2408
|
// 2026-05-06 calibration fix: pinnedUrls mode fetches a hand-picked subset
|
|
2385
2409
|
// of the full site — the link graph across those pages is structurally
|
|
2386
2410
|
// incomplete, just like a random-sampled crawl. Pass `true` so
|
|
@@ -2486,12 +2510,31 @@ export async function auditSource(source, options) {
|
|
|
2486
2510
|
const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification, parsedPages.length);
|
|
2487
2511
|
const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
|
|
2488
2512
|
const issues = bucketIssues(enriched.findings);
|
|
2513
|
+
// Resolve a domain-authority score to moderate the verdict. Explicit option
|
|
2514
|
+
// wins; otherwise a provider (custom, or default OPR composite). null/absent
|
|
2515
|
+
// → no moderation (fail-safe).
|
|
2516
|
+
let resolvedAuthorityScore = options?.authorityScore;
|
|
2517
|
+
let resolvedAuthorityDomain;
|
|
2518
|
+
if (resolvedAuthorityScore === undefined) {
|
|
2519
|
+
const provider = options?.authorityProvider ??
|
|
2520
|
+
new CompositeAuthorityProvider([new OpenPageRankProvider(options?.openPageRankApiKey ?? "")]);
|
|
2521
|
+
try {
|
|
2522
|
+
const host = new URL(source.startsWith("http") ? source : `https://${source}`).hostname;
|
|
2523
|
+
resolvedAuthorityDomain = registrableDomain(host);
|
|
2524
|
+
const a = await provider.authorityFor(resolvedAuthorityDomain);
|
|
2525
|
+
if (a !== null)
|
|
2526
|
+
resolvedAuthorityScore = a;
|
|
2527
|
+
}
|
|
2528
|
+
catch {
|
|
2529
|
+
/* source is a local dir / unparseable → no authority */
|
|
2530
|
+
}
|
|
2531
|
+
}
|
|
2489
2532
|
// v0.6.0 — spec §15.1: site verdict comes from siteVerdictFromTemplates when
|
|
2490
2533
|
// ≥1 template has ≥5% coverage. Falls back to the legacy risk-ladder verdict
|
|
2491
2534
|
// when no template meets the threshold (single-template sites, `unclear`/
|
|
2492
2535
|
// `small-marketing` classifications, or the long-tail-only case).
|
|
2493
2536
|
// The `risk` score is intentionally unchanged — §15.1 governs verdict only.
|
|
2494
|
-
const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk),
|
|
2537
|
+
const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk), resolvedAuthorityScore);
|
|
2495
2538
|
const templateVerdict = siteVerdictFromTemplates(siteTemplates);
|
|
2496
2539
|
const verdict = templateVerdict !== null ? templateVerdict : legacyVerdict;
|
|
2497
2540
|
const headline = buildHeadline(bucketCounts);
|
|
@@ -2530,6 +2573,9 @@ export async function auditSource(source, options) {
|
|
|
2530
2573
|
auditedUrls: parsedPages.length > 0
|
|
2531
2574
|
? [...parsedPages.map((p) => p.url)].sort()
|
|
2532
2575
|
: undefined,
|
|
2576
|
+
...(resolvedAuthorityScore !== undefined
|
|
2577
|
+
? { authority: { score: resolvedAuthorityScore, domain: resolvedAuthorityDomain ?? "" } }
|
|
2578
|
+
: {}),
|
|
2533
2579
|
};
|
|
2534
2580
|
// Partial-report flag: the backpressure watchdog aborted mid-crawl and we
|
|
2535
2581
|
// salvaged whatever pages had been fetched. Consumers MUST treat coverage as
|