@pseolint/core 0.6.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +3 -3
  2. package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
  3. package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
  4. package/dist/algorithms/authority/commoncrawl.js +17 -0
  5. package/dist/algorithms/authority/commoncrawl.js.map +1 -0
  6. package/dist/algorithms/authority/openpagerank.d.ts +19 -0
  7. package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
  8. package/dist/algorithms/authority/openpagerank.js +42 -0
  9. package/dist/algorithms/authority/openpagerank.js.map +1 -0
  10. package/dist/algorithms/authority/provider.d.ts +16 -0
  11. package/dist/algorithms/authority/provider.d.ts.map +1 -0
  12. package/dist/algorithms/authority/provider.js +24 -0
  13. package/dist/algorithms/authority/provider.js.map +1 -0
  14. package/dist/algorithms/auto-entity-mask.d.ts +19 -0
  15. package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
  16. package/dist/algorithms/auto-entity-mask.js +102 -0
  17. package/dist/algorithms/auto-entity-mask.js.map +1 -0
  18. package/dist/algorithms/example-regions.d.ts +22 -0
  19. package/dist/algorithms/example-regions.d.ts.map +1 -0
  20. package/dist/algorithms/example-regions.js +32 -0
  21. package/dist/algorithms/example-regions.js.map +1 -0
  22. package/dist/algorithms/fact-extraction.d.ts +46 -0
  23. package/dist/algorithms/fact-extraction.d.ts.map +1 -0
  24. package/dist/algorithms/fact-extraction.js +223 -0
  25. package/dist/algorithms/fact-extraction.js.map +1 -0
  26. package/dist/auditor.d.ts.map +1 -1
  27. package/dist/auditor.js +55 -9
  28. package/dist/auditor.js.map +1 -1
  29. package/dist/enrich-findings.d.ts.map +1 -1
  30. package/dist/enrich-findings.js +9 -8
  31. package/dist/enrich-findings.js.map +1 -1
  32. package/dist/index.d.ts +11 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +9 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/origin-preflight.d.ts +89 -0
  37. package/dist/origin-preflight.d.ts.map +1 -0
  38. package/dist/origin-preflight.js +93 -0
  39. package/dist/origin-preflight.js.map +1 -0
  40. package/dist/rule-references.d.ts.map +1 -1
  41. package/dist/rule-references.js +1 -0
  42. package/dist/rule-references.js.map +1 -1
  43. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  44. package/dist/rules/aeo/citable-facts.js +4 -33
  45. package/dist/rules/aeo/citable-facts.js.map +1 -1
  46. package/dist/rules/aeo/crawler-access.d.ts +14 -0
  47. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  48. package/dist/rules/aeo/crawler-access.js +96 -15
  49. package/dist/rules/aeo/crawler-access.js.map +1 -1
  50. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  51. package/dist/rules/aeo/summary-bait.js +4 -3
  52. package/dist/rules/aeo/summary-bait.js.map +1 -1
  53. package/dist/rules/content/citation-coverage.d.ts +11 -0
  54. package/dist/rules/content/citation-coverage.d.ts.map +1 -0
  55. package/dist/rules/content/citation-coverage.js +43 -0
  56. package/dist/rules/content/citation-coverage.js.map +1 -0
  57. package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
  58. package/dist/rules/content/common-phrase-reuse.js +7 -2
  59. package/dist/rules/content/common-phrase-reuse.js.map +1 -1
  60. package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
  61. package/dist/rules/content/regurgitated-content.js +11 -2
  62. package/dist/rules/content/regurgitated-content.js.map +1 -1
  63. package/dist/rules/content/translation-no-op.d.ts.map +1 -1
  64. package/dist/rules/content/translation-no-op.js +5 -1
  65. package/dist/rules/content/translation-no-op.js.map +1 -1
  66. package/dist/rules/content/unique-value.d.ts +15 -1
  67. package/dist/rules/content/unique-value.d.ts.map +1 -1
  68. package/dist/rules/content/unique-value.js +46 -39
  69. package/dist/rules/content/unique-value.js.map +1 -1
  70. package/dist/rules/content/value-add.d.ts.map +1 -1
  71. package/dist/rules/content/value-add.js +3 -1
  72. package/dist/rules/content/value-add.js.map +1 -1
  73. package/dist/rules/links/cluster-connectivity.d.ts +7 -1
  74. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
  75. package/dist/rules/links/cluster-connectivity.js +8 -2
  76. package/dist/rules/links/cluster-connectivity.js.map +1 -1
  77. package/dist/rules/links/orphan-pages.d.ts +8 -1
  78. package/dist/rules/links/orphan-pages.d.ts.map +1 -1
  79. package/dist/rules/links/orphan-pages.js +10 -1
  80. package/dist/rules/links/orphan-pages.js.map +1 -1
  81. package/dist/rules/schema/consistency.d.ts.map +1 -1
  82. package/dist/rules/schema/consistency.js +33 -21
  83. package/dist/rules/schema/consistency.js.map +1 -1
  84. package/dist/rules/scope.d.ts.map +1 -1
  85. package/dist/rules/scope.js +1 -0
  86. package/dist/rules/scope.js.map +1 -1
  87. package/dist/rules/spam/entity-swap.d.ts.map +1 -1
  88. package/dist/rules/spam/entity-swap.js +51 -9
  89. package/dist/rules/spam/entity-swap.js.map +1 -1
  90. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  91. package/dist/rules/spam/thin-content.js +5 -1
  92. package/dist/rules/spam/thin-content.js.map +1 -1
  93. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
  94. package/dist/rules/tech/canonical-consistency.js +144 -28
  95. package/dist/rules/tech/canonical-consistency.js.map +1 -1
  96. package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
  97. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
  98. package/dist/rules/tech/sitemap-completeness.js +21 -5
  99. package/dist/rules/tech/sitemap-completeness.js.map +1 -1
  100. package/dist/rules/tech/soft-404.d.ts +11 -0
  101. package/dist/rules/tech/soft-404.d.ts.map +1 -1
  102. package/dist/rules/tech/soft-404.js +47 -5
  103. package/dist/rules/tech/soft-404.js.map +1 -1
  104. package/dist/site-classifier.d.ts.map +1 -1
  105. package/dist/site-classifier.js +1 -0
  106. package/dist/site-classifier.js.map +1 -1
  107. package/dist/template-detection.d.ts +1 -0
  108. package/dist/template-detection.d.ts.map +1 -1
  109. package/dist/template-detection.js +1 -1
  110. package/dist/template-detection.js.map +1 -1
  111. package/dist/types.d.ts +22 -1
  112. package/dist/types.d.ts.map +1 -1
  113. package/package.json +17 -1
@@ -0,0 +1,223 @@
1
+ import { load } from "cheerio";
2
+ import { maskEntities } from "./entity-mask.js";
3
+ // --- Numeric "citable" facts: the frozen subset aeo/citable-facts counts. ---
4
+ // These six patterns are lifted verbatim from rules/aeo/citable-facts.ts and
5
+ // MUST stay byte-identical to preserve the calibration corpus.
6
+ const CITABLE_FACT_PATTERNS = [
7
+ { name: "dollar", regex: /\$[\d,]+(\.\d{2})?/g },
8
+ { name: "percent", regex: /\b\d+(\.\d+)?\s*%/g },
9
+ {
10
+ name: "timeframe",
11
+ regex: /\b\d+(?:-\d+)?\s*(business\s+days?|days?|weeks?|months?|years?|hours?|minutes?)\b/gi,
12
+ },
13
+ {
14
+ name: "date",
15
+ regex: /\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(?:,\s*\d{4})?\b/gi,
16
+ },
17
+ { name: "isoDate", regex: /\b\d{4}-\d{2}-\d{2}\b/g },
18
+ { name: "form", regex: /\bForm\s+[A-Z0-9][A-Z0-9-]*\b/g },
19
+ ];
20
+ export function extractCitableFacts(text) {
21
+ const out = new Set();
22
+ for (const { regex } of CITABLE_FACT_PATTERNS) {
23
+ const matches = text.match(regex);
24
+ if (!matches)
25
+ continue;
26
+ for (const m of matches)
27
+ out.add(m.trim().toLowerCase());
28
+ }
29
+ return Array.from(out);
30
+ }
31
+ // --- Measurements: NEW numeric kinds, deliberately separate from citableFacts. ---
32
+ const MEASUREMENT_UNITS = "kg|g|lb|lbs|oz|mi|km|cm|mm|ft|in|MB|GB|TB|KB|ms|fps|mph|kWh";
33
+ const MEASUREMENT_PATTERNS = [
34
+ { kind: "ratio", regex: /\b\d+(?:\.\d+)?\s*(?:out of|in)\s*\d+\b/gi },
35
+ { kind: "ratio", regex: /\b\d+\s*:\s*\d+\b/g },
36
+ { kind: "measurement", regex: new RegExp(`\\b\\d+(?:\\.\\d+)?\\s*(?:${MEASUREMENT_UNITS})\\b`, "g") },
37
+ ];
38
+ export function extractMeasurements(maskedText) {
39
+ const seen = new Set();
40
+ const out = [];
41
+ for (const { kind, regex } of MEASUREMENT_PATTERNS) {
42
+ const matches = maskedText.match(regex);
43
+ if (!matches)
44
+ continue;
45
+ for (const m of matches) {
46
+ const value = m.replace(/\s+/g, " ").trim().toLowerCase();
47
+ if (seen.has(value))
48
+ continue;
49
+ seen.add(value);
50
+ out.push({ value, kind });
51
+ }
52
+ }
53
+ return out;
54
+ }
55
+ const MULTI_WORD_PROPER_NOUN = /\b[A-Z][a-z]+(?:\s+(?:of\s+|de\s+|and\s+|the\s+)?[A-Z][a-z]+)+\b/g;
56
+ const ACRONYM = /\b(?:ISO|GDPR|HIPAA|FDA|SEC|FTC|EPA|W3C|IETF|RFC|NIST|OSHA|IRS|EU|UN|WHO|CCPA|PCI)\b/g;
57
+ const CUE_WORD = /\b(?:Inc|LLC|Ltd|Corp|GmbH|Act|Regulation|Directive|Agency|Department|Bureau|Commission|Authority|University|Institute|Association|Standard|Protocol)\b/;
58
+ const JSON_LD_ENTITY_TYPES = new Set([
59
+ "Organization", "GovernmentOrganization", "Corporation", "NGO",
60
+ "Person", "Product", "Brand",
61
+ ]);
62
+ function jsonLdEntities(nodes) {
63
+ const out = [];
64
+ const visit = (node) => {
65
+ if (Array.isArray(node)) {
66
+ node.forEach(visit);
67
+ return;
68
+ }
69
+ if (typeof node !== "object" || node === null)
70
+ return;
71
+ const obj = node;
72
+ const type = obj["@type"];
73
+ const name = obj["name"];
74
+ if (typeof name === "string" && typeof type === "string" && JSON_LD_ENTITY_TYPES.has(type)) {
75
+ out.push({ value: name.trim().toLowerCase(), source: "json-ld", type: "organization" });
76
+ }
77
+ for (const v of Object.values(obj))
78
+ visit(v);
79
+ };
80
+ nodes.forEach(visit);
81
+ return out;
82
+ }
83
+ export function extractNamedEntities(maskedText, jsonLd = []) {
84
+ const seen = new Set();
85
+ const out = [];
86
+ const push = (value, source) => {
87
+ const v = value.replace(/\s+/g, " ").trim().toLowerCase();
88
+ if (v.length < 2 || seen.has(v))
89
+ return;
90
+ seen.add(v);
91
+ out.push({ value: v, source });
92
+ };
93
+ for (const m of jsonLdEntities(jsonLd))
94
+ push(m.value, "json-ld");
95
+ for (const m of maskedText.match(ACRONYM) ?? [])
96
+ push(m, "cue-word");
97
+ for (const m of maskedText.match(MULTI_WORD_PROPER_NOUN) ?? []) {
98
+ push(m, CUE_WORD.test(m) ? "cue-word" : "proper-noun");
99
+ }
100
+ return out;
101
+ }
102
+ export const DEFAULT_CITATION_ALLOWLIST = [
103
+ "wikipedia.org", "w3.org", "iso.org", "ietf.org", "rfc-editor.org",
104
+ "doi.org", "nih.gov", "ncbi.nlm.nih.gov", "who.int", "schema.org",
105
+ "oecd.org", "worldbank.org", "europa.eu",
106
+ // Google's own published documentation is the primary authoritative source for
107
+ // claims about Google's ranking and spam systems (Search Essentials, spam
108
+ // policies, helpful-content guidance) and for Core Web Vitals (web.dev).
109
+ // Scoped to the docs subdomain — a bare google.com link (Maps, search results)
110
+ // is deliberately NOT credited as authoritative.
111
+ "developers.google.com", "web.dev",
112
+ ];
113
+ const MULTI_PART_SUFFIXES = new Set([
114
+ "co.uk", "ac.uk", "gov.uk", "org.uk", "com.au", "gov.au", "edu.au",
115
+ "co.jp", "co.nz", "co.za", "com.br",
116
+ ]);
117
+ function hostOf(url) {
118
+ try {
119
+ return new URL(url).hostname.toLowerCase();
120
+ }
121
+ catch {
122
+ return null;
123
+ }
124
+ }
125
+ export function registrableDomain(host) {
126
+ const labels = host.replace(/^www\./, "").split(".");
127
+ if (labels.length <= 2)
128
+ return labels.join(".");
129
+ const lastTwo = labels.slice(-2).join(".");
130
+ if (MULTI_PART_SUFFIXES.has(lastTwo))
131
+ return labels.slice(-3).join(".");
132
+ return lastTwo;
133
+ }
134
+ function isAuthoritativeTld(host) {
135
+ return /\.(?:gov|edu|mil|int)$/.test(host) || /\.(?:gov|edu|ac)\.[a-z]{2}$/.test(host);
136
+ }
137
+ export function classifyCitations(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
138
+ const pageHost = hostOf(pageUrl);
139
+ const pageDomain = pageHost ? registrableDomain(pageHost) : null;
140
+ const seen = new Set();
141
+ const out = [];
142
+ for (const href of resolvedHrefs) {
143
+ const host = hostOf(href);
144
+ if (!host)
145
+ continue;
146
+ const domain = registrableDomain(host);
147
+ if (pageDomain && domain === pageDomain)
148
+ continue; // internal link
149
+ if (seen.has(href))
150
+ continue;
151
+ seen.add(href);
152
+ if (isAuthoritativeTld(host)) {
153
+ out.push({ href, domain, authority: "authoritative", reason: "tld" });
154
+ }
155
+ else if (allowlist.some((d) => host === d || host.endsWith(`.${d}`))) {
156
+ out.push({ href, domain, authority: "authoritative", reason: "allowlist" });
157
+ }
158
+ else {
159
+ out.push({ href, domain, authority: "general" });
160
+ }
161
+ }
162
+ return out;
163
+ }
164
+ export function hasAuthoritativeCitation(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
165
+ return classifyCitations(resolvedHrefs, pageUrl, allowlist).some((c) => c.authority === "authoritative");
166
+ }
167
+ const SENTENCE_SPLIT = /(?<=[.!?])\s+(?=[A-Z0-9"'(])/;
168
+ function resolveHrefs(hrefs, base) {
169
+ const out = [];
170
+ for (const h of hrefs) {
171
+ try {
172
+ out.push(new URL(h, base).href);
173
+ }
174
+ catch { /* skip unparseable */ }
175
+ }
176
+ return out;
177
+ }
178
+ /**
179
+ * Deterministic approximation of "a verifiable claim": a block (<p>/<li>) that
180
+ * contains a statistic AND an outbound citation. Approximated at block level,
181
+ * not exact sentence level — documented limitation. Detects co-occurrence, not
182
+ * semantic truth. Consume at `speculative` confidence.
183
+ */
184
+ export function extractGroundedClaims(html, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
185
+ const $ = load(html);
186
+ $("nav, header, footer, aside, script, style, noscript").remove();
187
+ const claims = [];
188
+ const scope = $("article").length > 0 ? $("article") : $("main").length > 0 ? $("main") : $("body");
189
+ scope.find("p, li").each((_i, el) => {
190
+ const $el = $(el);
191
+ const rawLinks = $el.find("a[href]").map((_j, a) => String($(a).attr("href") ?? "")).get();
192
+ const citations = classifyCitations(resolveHrefs(rawLinks, pageUrl), pageUrl, allowlist);
193
+ if (citations.length === 0)
194
+ return;
195
+ const text = $el.text().replace(/\s+/g, " ").trim();
196
+ for (const sentence of text.split(SENTENCE_SPLIT)) {
197
+ const facts = [
198
+ ...extractCitableFacts(sentence),
199
+ ...extractMeasurements(sentence).map((m) => m.value),
200
+ ];
201
+ if (facts.length === 0)
202
+ continue;
203
+ claims.push({
204
+ sentence: sentence.trim().slice(0, 240),
205
+ facts,
206
+ citations: citations.map((c) => c.href),
207
+ });
208
+ break; // one grounded claim per block is enough; avoids over-counting
209
+ }
210
+ });
211
+ return claims;
212
+ }
213
+ export function extractPageFacts(page, entityPatterns, allowlist = DEFAULT_CITATION_ALLOWLIST) {
214
+ const masked = maskEntities(page.contentText, entityPatterns);
215
+ return {
216
+ citableFacts: extractCitableFacts(masked),
217
+ measurements: extractMeasurements(masked),
218
+ namedEntities: extractNamedEntities(masked, page.jsonLd),
219
+ citations: classifyCitations(page.resolvedHrefs, page.url, allowlist),
220
+ groundedClaims: extractGroundedClaims(page.html, page.url, allowlist),
221
+ };
222
+ }
223
+ //# sourceMappingURL=fact-extraction.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fact-extraction.js","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAwChD,+EAA+E;AAC/E,6EAA6E;AAC7E,+DAA+D;AAC/D,MAAM,qBAAqB,GAA2C;IACpE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,qBAAqB,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GACrB,6DAA6D,CAAC;AAChE,MAAM,oBAAoB,GAA6C;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,2CAA2C,EAAE;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAC9C,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,6BAA6B,iBAAiB,MAAM,EAAE,GAAG,CAAC,EAAE;CACtG,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,UAAkB;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC9B,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,sBAAsB,GAAG,mEAAmE,CAAC;AACnG,MAAM,OAAO,GAAG,uFAAuF,CAAC;AACxG,MAAM,QAAQ,GAAG,yJAAyJ,CAAC;AAE3K,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,cAAc,EAAE,wBAAwB,EAAE,aAAa,EAAE,KAAK;IAC9D,QAAQ,EAAE,SAAS,EAAE,OAAO;CAC7B,CAAC,CAAC;AAEH,SAAS,cAAc,CAAC,KAAgB;IACtC,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,CAAC,IAAa,EAAQ,EAAE;QACpC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAAC,OAAO;QAAC,CAAC;QACzD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;YAAE,OAAO;QACtD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1F,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC;IACF,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,SAAoB,EAAE;IAC7E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,MAA6B,EAAQ,EAAE;QAClE,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO;QACxC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACZ,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/D,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAsB;IAC3D,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,gBAAgB;IAClE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,SAAS,EAAE,YAAY;IACjE,UAAU,EAAE,eAAe,EAAE,WAAW;IACxC,+EAA+E;IAC/E,0EAA0E;IAC1E,yEAAyE;IACzE,+EAA+E;IAC/E,iDAAiD;IACjD,uBAAuB,EAAE,SAAS;CACnC,CAAC;AAEF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IAClE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;CACpC,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QAAC,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,IAAI,CAAC;IAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,IAAI,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,UAAU,IAAI,MAAM,KAAK,UAAU;YAAE,SAAS,CAAC,gBAAgB;QACnE,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,IAAI,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACxE,CAAC;aAAM,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;QAC9E,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC;AAC3G,CAAC;AAED,MAAM,cAAc,GAAG,8BAA8B,CAAC;AAEtD,SAAS,YAAY,CAAC,KAAe,EAAE,IAAY;IACjD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CACnC,IAAY,EACZ,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC,CAAC,qDAAqD,CAAC,CAAC,MAAM,EAAE,CAAC;IAClE,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACpG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAC3F,MAAM,SAAS,GAAG,iBAAiB,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QACzF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG;gBACZ,GAAG,mBAAmB,CAAC,QAAQ,CAAC;gBAChC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;aACrD,CAAC;YACF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACjC,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBACvC,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aACxC,CAAC,CAAC;YACH,MAAM,CAAC,+DAA+D;QACxE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAmF,EACnF,cAAmC,EACnC,YAA+B,0BAA0B;IAEzD,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC9D,OAAO;QACL,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,aAAa,EAAE,oBAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;QACxD,SAAS,EAAE,iBAAiB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;QACrE,cAAc,EAAE,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;KACtE,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAmEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AA+D1I,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAEvE;AA6xBD;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAoYD,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBjG;AA+pBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA2wC/F"}
1
+ {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAqE1I,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAEvE;AA2yBD;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAoYD,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBjG;AA+pBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA4yC/F"}
package/dist/auditor.js CHANGED
@@ -5,6 +5,7 @@ import { parseHtmlPage } from "./parser.js";
5
5
  import { pageSkipReason } from "./page-filter.js";
6
6
  import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
7
7
  import { eeatSignalsRule } from "./rules/content/eeat-signals.js";
8
+ import { citationCoverageRule } from "./rules/content/citation-coverage.js";
8
9
  import { metaUniquenessRule } from "./rules/content/meta-uniqueness.js";
9
10
  import { missingAuthorRule } from "./rules/content/missing-author.js";
10
11
  import { uniqueValueRule } from "./rules/content/unique-value.js";
@@ -72,6 +73,10 @@ import { CORE_RULESET_VERSION } from "./ruleset-version.js";
72
73
  import { planScrapeStrategy, DEFAULT_AGE_FLOOR_DAYS } from "./scrape-strategy.js";
73
74
  import { detectTemplates, buildUrlToTemplateMap, shouldActivateTemplateScoring } from "./template-detection.js";
74
75
  import { scoreTemplates, siteVerdictFromTemplates } from "./per-template-scoring.js";
76
+ import { deriveEntityPatterns } from "./algorithms/auto-entity-mask.js";
77
+ import { CompositeAuthorityProvider } from "./algorithms/authority/provider.js";
78
+ import { OpenPageRankProvider } from "./algorithms/authority/openpagerank.js";
79
+ import { registrableDomain } from "./algorithms/fact-extraction.js";
75
80
  const DEFAULTS = {
76
81
  nearDuplicateThreshold: 0.85,
77
82
  entitySwapThreshold: 0.95,
@@ -80,10 +85,12 @@ const DEFAULTS = {
80
85
  publicationVelocityMaxPerDayCorpusFraction: 0.10,
81
86
  boilerplateMaxRatio: 0.7,
82
87
  templateDiversityMinUniqueRatio: 0.35,
83
- uniqueValueMinWords: 100,
88
+ uniqueValueDensity: { passBelow: 0.20, errorBelow: 0.12 },
84
89
  metaUniquenessMinJaccard: 0.9,
85
90
  linkDepthMaxClicks: 3,
86
91
  templateCoverageMinPages: 5,
92
+ citationCoverageMinClaims: 4,
93
+ citationCoverageMinAuthoritative: 1,
87
94
  answerFirstMaxWords: 100,
88
95
  citableFactsMin: 3,
89
96
  citableFactsTarget: 8,
@@ -209,7 +216,7 @@ const SCORING_PROFILES = {
209
216
  // first-principles analysis predicts will false-positive on catalog-
210
217
  // shaped sites (Zapier integrations, G2 categories, Wise currency pairs,
211
218
  // etc.). A reputable-pSEO calibration corpus + runner has been added
212
- // (scripts/calibration-reputable-pseo.ts); these overrides will be
219
+ // (scripts/calibration-corpus.ts); these overrides will be
213
220
  // tightened or loosened based on actual fire-rates measured against
214
221
  // sites that demonstrably win in production. See
215
222
  // docs/superpowers/specs/2026-05-03-calibration-against-reputable-pseo.md.
@@ -414,6 +421,9 @@ const RULE_IMPACTS = {
414
421
  "content/title-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 25 }, // 2026-05-03 round 11: title is high-impact but the original 50-cap was disproportionate to other content rules and tipped Typeform into critical on a 6-finding cluster. Keep the rule at native error severity (duplicate titles ARE real bugs); just don't let one rule dominate the integrity bucket.
415
422
  "content/heading-structure": { baseImpact: 5, perInstance: 1, maxImpact: 20 },
416
423
  "content/image-alt-text": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
424
+ // Citation coverage is low-confidence (block-level grounded-claim heuristic);
425
+ // keep its impact modest so it nudges rather than dominates the score.
426
+ "content/citation-coverage": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
417
427
  "content/translation-no-op": { baseImpact: 30, perInstance: 10, maxImpact: 60 },
418
428
  // v1 warning-severity heuristic; lower than translation-no-op since it's speculative
419
429
  "content/regurgitated-content": { baseImpact: 15, perInstance: 5, maxImpact: 35 },
@@ -642,7 +652,7 @@ sampled = false) {
642
652
  }
643
653
  // Content rules
644
654
  if (isEnabled("content/unique-value") && modeOk("content/unique-value")) {
645
- pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.uniqueValueMinWords)));
655
+ pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.uniqueValueDensity)));
646
656
  }
647
657
  if (isEnabled("content/meta-uniqueness") && modeOk("content/meta-uniqueness")) {
648
658
  pushAll(findings, tag(metaUniquenessRule(pages, entityPatterns, resolvedRules.metaUniquenessMinJaccard)));
@@ -653,6 +663,13 @@ sampled = false) {
653
663
  if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
654
664
  pushAll(findings, tag(eeatSignalsRule(pages)));
655
665
  }
666
+ if (isEnabled("content/citation-coverage") && modeOk("content/citation-coverage")) {
667
+ pushAll(findings, tag(citationCoverageRule(pages, entityPatterns, {
668
+ minClaims: resolvedRules.citationCoverageMinClaims,
669
+ minAuthoritative: resolvedRules.citationCoverageMinAuthoritative,
670
+ allowlist: resolvedRules.citationAllowlist,
671
+ })));
672
+ }
656
673
  // 2026-05-03 v0.5.2 blind-spot fixes — title uniqueness + heading
657
674
  // structure + image alt-text were tier-1 gaps in the blind-spot audit.
658
675
  if (isEnabled("content/title-uniqueness") && modeOk("content/title-uniqueness")) {
@@ -678,7 +695,7 @@ sampled = false) {
678
695
  }
679
696
  // Link rules — use the global link graph
680
697
  if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
681
- pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl)));
698
+ pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl, sampled)));
682
699
  }
683
700
  if (isEnabled("links/dead-ends") && modeOk("links/dead-ends")) {
684
701
  pushAll(findings, tag(deadEndsRule(pages, knownUrls, rootUrl)));
@@ -689,7 +706,7 @@ sampled = false) {
689
706
  }
690
707
  }
691
708
  if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
692
- pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls)));
709
+ pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls, sampled)));
693
710
  }
694
711
  if (isEnabled("links/host-section-divergence") && modeOk("links/host-section-divergence")) {
695
712
  pushAll(findings, tag(hostSectionDivergenceRule(pages, adjacency)));
@@ -1883,10 +1900,13 @@ export async function auditSource(source, options) {
1883
1900
  ?? DEFAULTS.publicationVelocityMaxPerDayCorpusFraction,
1884
1901
  boilerplateMaxRatio: options?.rules?.boilerplateMaxRatio ?? DEFAULTS.boilerplateMaxRatio,
1885
1902
  templateDiversityMinUniqueRatio: options?.rules?.templateDiversityMinUniqueRatio ?? DEFAULTS.templateDiversityMinUniqueRatio,
1886
- uniqueValueMinWords: options?.rules?.uniqueValueMinWords ?? DEFAULTS.uniqueValueMinWords,
1903
+ uniqueValueDensity: options?.rules?.uniqueValueDensity ?? DEFAULTS.uniqueValueDensity,
1887
1904
  metaUniquenessMinJaccard: options?.rules?.metaUniquenessMinJaccard ?? DEFAULTS.metaUniquenessMinJaccard,
1888
1905
  linkDepthMaxClicks: options?.rules?.linkDepthMaxClicks ?? DEFAULTS.linkDepthMaxClicks,
1889
1906
  templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
1907
+ citationCoverageMinClaims: options?.rules?.citationCoverageMinClaims ?? DEFAULTS.citationCoverageMinClaims,
1908
+ citationCoverageMinAuthoritative: options?.rules?.citationCoverageMinAuthoritative ?? DEFAULTS.citationCoverageMinAuthoritative,
1909
+ citationAllowlist: options?.rules?.citationAllowlist,
1890
1910
  answerFirstMaxWords: options?.rules?.answerFirstMaxWords ?? DEFAULTS.answerFirstMaxWords,
1891
1911
  citableFactsMin: options?.rules?.citableFactsMin ?? DEFAULTS.citableFactsMin,
1892
1912
  citableFactsTarget: options?.rules?.citableFactsTarget ?? DEFAULTS.citableFactsTarget,
@@ -2346,7 +2366,10 @@ export async function auditSource(source, options) {
2346
2366
  const auditMode = options?.mode ?? "full";
2347
2367
  // Site-wide rules (run once, outside group loop)
2348
2368
  if (sitemapUrlSet && sitemapUrlSet.size > 0 && auditMode !== "diff") {
2349
- const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet);
2369
+ const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet, {
2370
+ sampled: isSampledAudit,
2371
+ normalizeUrlOptions,
2372
+ });
2350
2373
  pushAll(allFindings, sitemapFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
2351
2374
  if (robotsTxtContent) {
2352
2375
  const robotsFindings = robotsComplianceRule(parsedPages, sitemapUrlSet, robotsTxtContent);
@@ -2372,6 +2395,7 @@ export async function auditSource(source, options) {
2372
2395
  pushAll(allFindings, dataIdenticalFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
2373
2396
  }
2374
2397
  }
2398
+ const derivedEntityPatterns = options?.autoEntityMask === false ? [] : deriveEntityPatterns(parsedPagesAll);
2375
2399
  for (const [groupName, groupPages] of classified) {
2376
2400
  if (groupPages.length === 0)
2377
2401
  continue;
@@ -2380,7 +2404,7 @@ export async function auditSource(source, options) {
2380
2404
  continue;
2381
2405
  const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
2382
2406
  const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
2383
- const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full",
2407
+ const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, [...DEFAULT_ENTITY_PATTERNS, ...derivedEntityPatterns], groupConfig?.overrides, options?.mode ?? "full",
2384
2408
  // 2026-05-06 calibration fix: pinnedUrls mode fetches a hand-picked subset
2385
2409
  // of the full site — the link graph across those pages is structurally
2386
2410
  // incomplete, just like a random-sampled crawl. Pass `true` so
@@ -2486,12 +2510,31 @@ export async function auditSource(source, options) {
2486
2510
  const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification, parsedPages.length);
2487
2511
  const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
2488
2512
  const issues = bucketIssues(enriched.findings);
2513
+ // Resolve a domain-authority score to moderate the verdict. Explicit option
2514
+ // wins; otherwise a provider (custom, or default OPR composite). null/absent
2515
+ // → no moderation (fail-safe).
2516
+ let resolvedAuthorityScore = options?.authorityScore;
2517
+ let resolvedAuthorityDomain;
2518
+ if (resolvedAuthorityScore === undefined) {
2519
+ const provider = options?.authorityProvider ??
2520
+ new CompositeAuthorityProvider([new OpenPageRankProvider(options?.openPageRankApiKey ?? "")]);
2521
+ try {
2522
+ const host = new URL(source.startsWith("http") ? source : `https://${source}`).hostname;
2523
+ resolvedAuthorityDomain = registrableDomain(host);
2524
+ const a = await provider.authorityFor(resolvedAuthorityDomain);
2525
+ if (a !== null)
2526
+ resolvedAuthorityScore = a;
2527
+ }
2528
+ catch {
2529
+ /* source is a local dir / unparseable → no authority */
2530
+ }
2531
+ }
2489
2532
  // v0.6.0 — spec §15.1: site verdict comes from siteVerdictFromTemplates when
2490
2533
  // ≥1 template has ≥5% coverage. Falls back to the legacy risk-ladder verdict
2491
2534
  // when no template meets the threshold (single-template sites, `unclear`/
2492
2535
  // `small-marketing` classifications, or the long-tail-only case).
2493
2536
  // The `risk` score is intentionally unchanged — §15.1 governs verdict only.
2494
- const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk), options?.authorityScore);
2537
+ const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk), resolvedAuthorityScore);
2495
2538
  const templateVerdict = siteVerdictFromTemplates(siteTemplates);
2496
2539
  const verdict = templateVerdict !== null ? templateVerdict : legacyVerdict;
2497
2540
  const headline = buildHeadline(bucketCounts);
@@ -2530,6 +2573,9 @@ export async function auditSource(source, options) {
2530
2573
  auditedUrls: parsedPages.length > 0
2531
2574
  ? [...parsedPages.map((p) => p.url)].sort()
2532
2575
  : undefined,
2576
+ ...(resolvedAuthorityScore !== undefined
2577
+ ? { authority: { score: resolvedAuthorityScore, domain: resolvedAuthorityDomain ?? "" } }
2578
+ : {}),
2533
2579
  };
2534
2580
  // Partial-report flag: the backpressure watchdog aborted mid-crawl and we
2535
2581
  // salvaged whatever pages had been fetched. Consumers MUST treat coverage as