@pseolint/core 0.6.6 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/algorithms/fact-extraction.d.ts +46 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -0
- package/dist/algorithms/fact-extraction.js +217 -0
- package/dist/algorithms/fact-extraction.js.map +1 -0
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +16 -0
- package/dist/auditor.js.map +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/origin-preflight.d.ts +89 -0
- package/dist/origin-preflight.d.ts.map +1 -0
- package/dist/origin-preflight.js +93 -0
- package/dist/origin-preflight.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +1 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +4 -33
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/content/citation-coverage.d.ts +11 -0
- package/dist/rules/content/citation-coverage.d.ts.map +1 -0
- package/dist/rules/content/citation-coverage.js +43 -0
- package/dist/rules/content/citation-coverage.js.map +1 -0
- package/dist/rules/content/value-add.d.ts.map +1 -1
- package/dist/rules/content/value-add.js +3 -1
- package/dist/rules/content/value-add.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +1 -0
- package/dist/rules/scope.js.map +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +1 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/types.d.ts +6 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +93 -93
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# @pseolint/core
|
|
2
2
|
|
|
3
|
-
> Programmatic SEO audit engine —
|
|
3
|
+
> Programmatic SEO audit engine — 44 rules, surfaced per-template, on every monitored release.
|
|
4
4
|
|
|
5
|
-
The core engine behind [pseolint](https://www.npmjs.com/package/pseolint) v0.
|
|
5
|
+
The core engine behind [pseolint](https://www.npmjs.com/package/pseolint) v0.7.0. Use this package to embed pSEO auditing into your own tools, CI pipelines, or SaaS products.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -34,7 +34,7 @@ for (const t of result.templates) {
|
|
|
34
34
|
|
|
35
35
|
## What It Checks
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
44 rules grouped into 4 scoring super-categories (v0.4): **Integrity** (spam + content + cannibal, weight 0.50), **Discoverability** (links + tech, 0.20), **Citation** (aeo + schema, 0.25), **Data** (0.05). Source-tree namespaces remain `spam/*`, `aeo/*`, etc. for stable rule IDs.
|
|
38
38
|
|
|
39
39
|
- **Spam / SpamBrain risk** (8) — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern (cluster-collapsed since v0.5.2)
|
|
40
40
|
- **Technical SEO** (9) — canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, hreflang reciprocity, robots-sitemap presence, **og-completeness** (v0.5.2)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage } from "../types.js";
|
|
2
|
+
export type FactKind = "money" | "percent" | "timeframe" | "date" | "isoDate" | "form" | "ratio" | "measurement";
|
|
3
|
+
export interface FactSpan {
|
|
4
|
+
value: string;
|
|
5
|
+
kind: FactKind;
|
|
6
|
+
}
|
|
7
|
+
export interface NamedEntity {
|
|
8
|
+
value: string;
|
|
9
|
+
source: "proper-noun" | "cue-word" | "json-ld";
|
|
10
|
+
type?: "organization" | "person" | "product" | "law" | "standard" | "place" | "other";
|
|
11
|
+
}
|
|
12
|
+
export interface Citation {
|
|
13
|
+
href: string;
|
|
14
|
+
domain: string;
|
|
15
|
+
authority: "authoritative" | "general";
|
|
16
|
+
reason?: "tld" | "allowlist";
|
|
17
|
+
}
|
|
18
|
+
export interface GroundedClaim {
|
|
19
|
+
sentence: string;
|
|
20
|
+
facts: string[];
|
|
21
|
+
citations: string[];
|
|
22
|
+
}
|
|
23
|
+
export interface PageFacts {
|
|
24
|
+
/** EXACTLY today's extractRawFacts() output (run on entity-masked text). Frozen. */
|
|
25
|
+
citableFacts: string[];
|
|
26
|
+
measurements: FactSpan[];
|
|
27
|
+
namedEntities: NamedEntity[];
|
|
28
|
+
citations: Citation[];
|
|
29
|
+
groundedClaims: GroundedClaim[];
|
|
30
|
+
}
|
|
31
|
+
export declare function extractCitableFacts(text: string): string[];
|
|
32
|
+
export declare function extractMeasurements(maskedText: string): FactSpan[];
|
|
33
|
+
export declare function extractNamedEntities(maskedText: string, jsonLd?: unknown[]): NamedEntity[];
|
|
34
|
+
export declare const DEFAULT_CITATION_ALLOWLIST: readonly string[];
|
|
35
|
+
export declare function registrableDomain(host: string): string;
|
|
36
|
+
export declare function classifyCitations(resolvedHrefs: readonly string[], pageUrl: string, allowlist?: readonly string[]): Citation[];
|
|
37
|
+
export declare function hasAuthoritativeCitation(resolvedHrefs: readonly string[], pageUrl: string, allowlist?: readonly string[]): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Deterministic approximation of "a verifiable claim": a block (<p>/<li>) that
|
|
40
|
+
* contains a statistic AND an outbound citation. Approximated at block level,
|
|
41
|
+
* not exact sentence level — documented limitation. Detects co-occurrence, not
|
|
42
|
+
* semantic truth. Consume at `speculative` confidence.
|
|
43
|
+
*/
|
|
44
|
+
export declare function extractGroundedClaims(html: string, pageUrl: string, allowlist?: readonly string[]): GroundedClaim[];
|
|
45
|
+
export declare function extractPageFacts(page: Pick<ParsedPage, "url" | "contentText" | "html" | "resolvedHrefs" | "jsonLd">, entityPatterns: EntityMaskPattern[], allowlist?: readonly string[]): PageFacts;
|
|
46
|
+
//# sourceMappingURL=fact-extraction.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact-extraction.d.ts","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,MAAM,QAAQ,GAChB,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,GAC/D,OAAO,GAAG,aAAa,CAAC;AAE5B,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,QAAQ,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;IAC/C,IAAI,CAAC,EAAE,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,KAAK,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,CAAC;CACvF;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,eAAe,GAAG,SAAS,CAAC;IACvC,MAAM,CAAC,EAAE,KAAK,GAAG,WAAW,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,oFAAoF;IACpF,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,aAAa,EAAE,WAAW,EAAE,CAAC;IAC7B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAqBD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAQ1D;AAWD,wBAAgB,mBAAmB,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,EAAE,CAclE;AA4BD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,EAAO,GAAG,WAAW,EAAE,CAe9F;AAED,eAAO,MAAM,0BAA0B,EAAE,SAAS,MAAM,EAIvD,CAAC;AAWF,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMtD;AAMD,wBAAgB,iBAAiB,CAC/B,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,QAAQ,EAAE,CAqBZ;AAED,wBAAgB,wBAAwB,CACtC,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,OAAO,CAET;AAYD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,aAAa,EAAE,CA0BjB;AAED,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,GAAG,aAAa,GAAG,MAAM,GAAG,eAAe,GAAG,QAAQ,CAAC,EACnF,cAAc,EAAE,iBAAiB,EAAE,EACnC,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,SAAS,CASX"}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { maskEntities } from "./entity-mask.js";
|
|
3
|
+
// --- Numeric "citable" facts: the frozen subset aeo/citable-facts counts. ---
|
|
4
|
+
// These six patterns are lifted verbatim from rules/aeo/citable-facts.ts and
|
|
5
|
+
// MUST stay byte-identical to preserve the calibration corpus.
|
|
6
|
+
const CITABLE_FACT_PATTERNS = [
|
|
7
|
+
{ name: "dollar", regex: /\$[\d,]+(\.\d{2})?/g },
|
|
8
|
+
{ name: "percent", regex: /\b\d+(\.\d+)?\s*%/g },
|
|
9
|
+
{
|
|
10
|
+
name: "timeframe",
|
|
11
|
+
regex: /\b\d+(?:-\d+)?\s*(business\s+days?|days?|weeks?|months?|years?|hours?|minutes?)\b/gi,
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
name: "date",
|
|
15
|
+
regex: /\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(?:,\s*\d{4})?\b/gi,
|
|
16
|
+
},
|
|
17
|
+
{ name: "isoDate", regex: /\b\d{4}-\d{2}-\d{2}\b/g },
|
|
18
|
+
{ name: "form", regex: /\bForm\s+[A-Z0-9][A-Z0-9-]*\b/g },
|
|
19
|
+
];
|
|
20
|
+
export function extractCitableFacts(text) {
|
|
21
|
+
const out = new Set();
|
|
22
|
+
for (const { regex } of CITABLE_FACT_PATTERNS) {
|
|
23
|
+
const matches = text.match(regex);
|
|
24
|
+
if (!matches)
|
|
25
|
+
continue;
|
|
26
|
+
for (const m of matches)
|
|
27
|
+
out.add(m.trim().toLowerCase());
|
|
28
|
+
}
|
|
29
|
+
return Array.from(out);
|
|
30
|
+
}
|
|
31
|
+
// --- Measurements: NEW numeric kinds, deliberately separate from citableFacts. ---
|
|
32
|
+
const MEASUREMENT_UNITS = "kg|g|lb|lbs|oz|mi|km|cm|mm|ft|in|MB|GB|TB|KB|ms|fps|mph|kWh";
|
|
33
|
+
const MEASUREMENT_PATTERNS = [
|
|
34
|
+
{ kind: "ratio", regex: /\b\d+(?:\.\d+)?\s*(?:out of|in)\s*\d+\b/gi },
|
|
35
|
+
{ kind: "ratio", regex: /\b\d+\s*:\s*\d+\b/g },
|
|
36
|
+
{ kind: "measurement", regex: new RegExp(`\\b\\d+(?:\\.\\d+)?\\s*(?:${MEASUREMENT_UNITS})\\b`, "g") },
|
|
37
|
+
];
|
|
38
|
+
export function extractMeasurements(maskedText) {
|
|
39
|
+
const seen = new Set();
|
|
40
|
+
const out = [];
|
|
41
|
+
for (const { kind, regex } of MEASUREMENT_PATTERNS) {
|
|
42
|
+
const matches = maskedText.match(regex);
|
|
43
|
+
if (!matches)
|
|
44
|
+
continue;
|
|
45
|
+
for (const m of matches) {
|
|
46
|
+
const value = m.replace(/\s+/g, " ").trim().toLowerCase();
|
|
47
|
+
if (seen.has(value))
|
|
48
|
+
continue;
|
|
49
|
+
seen.add(value);
|
|
50
|
+
out.push({ value, kind });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
const MULTI_WORD_PROPER_NOUN = /\b[A-Z][a-z]+(?:\s+(?:of\s+|de\s+|and\s+|the\s+)?[A-Z][a-z]+)+\b/g;
|
|
56
|
+
const ACRONYM = /\b(?:ISO|GDPR|HIPAA|FDA|SEC|FTC|EPA|W3C|IETF|RFC|NIST|OSHA|IRS|EU|UN|WHO|CCPA|PCI)\b/g;
|
|
57
|
+
const CUE_WORD = /\b(?:Inc|LLC|Ltd|Corp|GmbH|Act|Regulation|Directive|Agency|Department|Bureau|Commission|Authority|University|Institute|Association|Standard|Protocol)\b/;
|
|
58
|
+
const JSON_LD_ENTITY_TYPES = new Set([
|
|
59
|
+
"Organization", "GovernmentOrganization", "Corporation", "NGO",
|
|
60
|
+
"Person", "Product", "Brand",
|
|
61
|
+
]);
|
|
62
|
+
function jsonLdEntities(nodes) {
|
|
63
|
+
const out = [];
|
|
64
|
+
const visit = (node) => {
|
|
65
|
+
if (Array.isArray(node)) {
|
|
66
|
+
node.forEach(visit);
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
if (typeof node !== "object" || node === null)
|
|
70
|
+
return;
|
|
71
|
+
const obj = node;
|
|
72
|
+
const type = obj["@type"];
|
|
73
|
+
const name = obj["name"];
|
|
74
|
+
if (typeof name === "string" && typeof type === "string" && JSON_LD_ENTITY_TYPES.has(type)) {
|
|
75
|
+
out.push({ value: name.trim().toLowerCase(), source: "json-ld", type: "organization" });
|
|
76
|
+
}
|
|
77
|
+
for (const v of Object.values(obj))
|
|
78
|
+
visit(v);
|
|
79
|
+
};
|
|
80
|
+
nodes.forEach(visit);
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
export function extractNamedEntities(maskedText, jsonLd = []) {
|
|
84
|
+
const seen = new Set();
|
|
85
|
+
const out = [];
|
|
86
|
+
const push = (value, source) => {
|
|
87
|
+
const v = value.replace(/\s+/g, " ").trim().toLowerCase();
|
|
88
|
+
if (v.length < 2 || seen.has(v))
|
|
89
|
+
return;
|
|
90
|
+
seen.add(v);
|
|
91
|
+
out.push({ value: v, source });
|
|
92
|
+
};
|
|
93
|
+
for (const m of jsonLdEntities(jsonLd))
|
|
94
|
+
push(m.value, "json-ld");
|
|
95
|
+
for (const m of maskedText.match(ACRONYM) ?? [])
|
|
96
|
+
push(m, "cue-word");
|
|
97
|
+
for (const m of maskedText.match(MULTI_WORD_PROPER_NOUN) ?? []) {
|
|
98
|
+
push(m, CUE_WORD.test(m) ? "cue-word" : "proper-noun");
|
|
99
|
+
}
|
|
100
|
+
return out;
|
|
101
|
+
}
|
|
102
|
+
export const DEFAULT_CITATION_ALLOWLIST = [
|
|
103
|
+
"wikipedia.org", "w3.org", "iso.org", "ietf.org", "rfc-editor.org",
|
|
104
|
+
"doi.org", "nih.gov", "ncbi.nlm.nih.gov", "who.int", "schema.org",
|
|
105
|
+
"oecd.org", "worldbank.org", "europa.eu",
|
|
106
|
+
];
|
|
107
|
+
const MULTI_PART_SUFFIXES = new Set([
|
|
108
|
+
"co.uk", "ac.uk", "gov.uk", "org.uk", "com.au", "gov.au", "edu.au",
|
|
109
|
+
"co.jp", "co.nz", "co.za", "com.br",
|
|
110
|
+
]);
|
|
111
|
+
function hostOf(url) {
|
|
112
|
+
try {
|
|
113
|
+
return new URL(url).hostname.toLowerCase();
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
export function registrableDomain(host) {
|
|
120
|
+
const labels = host.replace(/^www\./, "").split(".");
|
|
121
|
+
if (labels.length <= 2)
|
|
122
|
+
return labels.join(".");
|
|
123
|
+
const lastTwo = labels.slice(-2).join(".");
|
|
124
|
+
if (MULTI_PART_SUFFIXES.has(lastTwo))
|
|
125
|
+
return labels.slice(-3).join(".");
|
|
126
|
+
return lastTwo;
|
|
127
|
+
}
|
|
128
|
+
function isAuthoritativeTld(host) {
|
|
129
|
+
return /\.(?:gov|edu|mil|int)$/.test(host) || /\.(?:gov|edu|ac)\.[a-z]{2}$/.test(host);
|
|
130
|
+
}
|
|
131
|
+
export function classifyCitations(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
132
|
+
const pageHost = hostOf(pageUrl);
|
|
133
|
+
const pageDomain = pageHost ? registrableDomain(pageHost) : null;
|
|
134
|
+
const seen = new Set();
|
|
135
|
+
const out = [];
|
|
136
|
+
for (const href of resolvedHrefs) {
|
|
137
|
+
const host = hostOf(href);
|
|
138
|
+
if (!host)
|
|
139
|
+
continue;
|
|
140
|
+
const domain = registrableDomain(host);
|
|
141
|
+
if (pageDomain && domain === pageDomain)
|
|
142
|
+
continue; // internal link
|
|
143
|
+
if (seen.has(href))
|
|
144
|
+
continue;
|
|
145
|
+
seen.add(href);
|
|
146
|
+
if (isAuthoritativeTld(host)) {
|
|
147
|
+
out.push({ href, domain, authority: "authoritative", reason: "tld" });
|
|
148
|
+
}
|
|
149
|
+
else if (allowlist.some((d) => host === d || host.endsWith(`.${d}`))) {
|
|
150
|
+
out.push({ href, domain, authority: "authoritative", reason: "allowlist" });
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
out.push({ href, domain, authority: "general" });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return out;
|
|
157
|
+
}
|
|
158
|
+
export function hasAuthoritativeCitation(resolvedHrefs, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
159
|
+
return classifyCitations(resolvedHrefs, pageUrl, allowlist).some((c) => c.authority === "authoritative");
|
|
160
|
+
}
|
|
161
|
+
const SENTENCE_SPLIT = /(?<=[.!?])\s+(?=[A-Z0-9"'(])/;
|
|
162
|
+
function resolveHrefs(hrefs, base) {
|
|
163
|
+
const out = [];
|
|
164
|
+
for (const h of hrefs) {
|
|
165
|
+
try {
|
|
166
|
+
out.push(new URL(h, base).href);
|
|
167
|
+
}
|
|
168
|
+
catch { /* skip unparseable */ }
|
|
169
|
+
}
|
|
170
|
+
return out;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Deterministic approximation of "a verifiable claim": a block (<p>/<li>) that
|
|
174
|
+
* contains a statistic AND an outbound citation. Approximated at block level,
|
|
175
|
+
* not exact sentence level — documented limitation. Detects co-occurrence, not
|
|
176
|
+
* semantic truth. Consume at `speculative` confidence.
|
|
177
|
+
*/
|
|
178
|
+
export function extractGroundedClaims(html, pageUrl, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
179
|
+
const $ = load(html);
|
|
180
|
+
$("nav, header, footer, aside, script, style, noscript").remove();
|
|
181
|
+
const claims = [];
|
|
182
|
+
const scope = $("article").length > 0 ? $("article") : $("main").length > 0 ? $("main") : $("body");
|
|
183
|
+
scope.find("p, li").each((_i, el) => {
|
|
184
|
+
const $el = $(el);
|
|
185
|
+
const rawLinks = $el.find("a[href]").map((_j, a) => String($(a).attr("href") ?? "")).get();
|
|
186
|
+
const citations = classifyCitations(resolveHrefs(rawLinks, pageUrl), pageUrl, allowlist);
|
|
187
|
+
if (citations.length === 0)
|
|
188
|
+
return;
|
|
189
|
+
const text = $el.text().replace(/\s+/g, " ").trim();
|
|
190
|
+
for (const sentence of text.split(SENTENCE_SPLIT)) {
|
|
191
|
+
const facts = [
|
|
192
|
+
...extractCitableFacts(sentence),
|
|
193
|
+
...extractMeasurements(sentence).map((m) => m.value),
|
|
194
|
+
];
|
|
195
|
+
if (facts.length === 0)
|
|
196
|
+
continue;
|
|
197
|
+
claims.push({
|
|
198
|
+
sentence: sentence.trim().slice(0, 240),
|
|
199
|
+
facts,
|
|
200
|
+
citations: citations.map((c) => c.href),
|
|
201
|
+
});
|
|
202
|
+
break; // one grounded claim per block is enough; avoids over-counting
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
return claims;
|
|
206
|
+
}
|
|
207
|
+
export function extractPageFacts(page, entityPatterns, allowlist = DEFAULT_CITATION_ALLOWLIST) {
|
|
208
|
+
const masked = maskEntities(page.contentText, entityPatterns);
|
|
209
|
+
return {
|
|
210
|
+
citableFacts: extractCitableFacts(masked),
|
|
211
|
+
measurements: extractMeasurements(masked),
|
|
212
|
+
namedEntities: extractNamedEntities(masked, page.jsonLd),
|
|
213
|
+
citations: classifyCitations(page.resolvedHrefs, page.url, allowlist),
|
|
214
|
+
groundedClaims: extractGroundedClaims(page.html, page.url, allowlist),
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
//# sourceMappingURL=fact-extraction.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact-extraction.js","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAwChD,+EAA+E;AAC/E,6EAA6E;AAC7E,+DAA+D;AAC/D,MAAM,qBAAqB,GAA2C;IACpE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,qBAAqB,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GACrB,6DAA6D,CAAC;AAChE,MAAM,oBAAoB,GAA6C;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,2CAA2C,EAAE;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAC9C,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,6BAA6B,iBAAiB,MAAM,EAAE,GAAG,CAAC,EAAE;CACtG,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,UAAkB;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC9B,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,sBAAsB,GAAG,mEAAmE,CAAC;AACnG,MAAM,OAAO,GAAG,uFAAuF,CAAC;AACxG,MAAM,QAAQ,GAAG,yJAAyJ,CAAC;AAE3K,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,cAAc,EAAE,wBAAwB,EAAE,aAAa,EAAE,KAAK;IAC9D,QAAQ,EAAE,SAAS,EAAE,OAAO;CAC7B,CAAC,CAAC;AAEH,SAAS,cAAc,CAAC,KAAgB;IACtC,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,CAAC,IAAa,EAAQ,EAAE;QACpC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAAC,OAAO;QAAC,CAAC;QACzD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;YAAE,OAAO;QACtD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1F,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC;IACF,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,SAAoB,EAAE;IAC7E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,MAA6B,EAAQ,EAAE;QAClE,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO;QACxC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACZ,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/D,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAsB;IAC3D,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,gBAAgB;IAClE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,SAAS,EAAE,YAAY;IACjE,UAAU,EAAE,eAAe,EAAE,WAAW;CACzC,CAAC;AAEF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IAClE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;CACpC,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QAAC,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,IAAI,CAAC;IAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,IAAI,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,UAAU,IAAI,MAAM,KAAK,UAAU;YAAE,SAAS,CAAC,gBAAgB;QACnE,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,IAAI,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACxE,CAAC;aAAM,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;QAC9E,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC;AAC3G,CAAC;AAED,MAAM,cAAc,GAAG,8BAA8B,CAAC;AAEtD,SAAS,YAAY,CAAC,KAAe,EAAE,IAAY;IACjD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CACnC,IAAY,EACZ,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC,CAAC,qDAAqD,CAAC,CAAC,MAAM,EAAE,CAAC;IAClE,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACpG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAC3F,MAAM,SAAS,GAAG,iBAAiB,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QACzF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG;gBACZ,GAAG,mBAAmB,CAAC,QAAQ,CAAC;gBAChC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;aACrD,CAAC;YACF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACjC,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBACvC,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aACxC,CAAC,CAAC;YACH,MAAM,CAAC,+DAA+D;QACxE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAmF,EACnF,cAAmC,EACnC,YAA+B,0BAA0B;IAEzD,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC9D,OAAO;QACL,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,aAAa,EAAE,oBAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;QACxD,SAAS,EAAE,iBAAiB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;QACrE,cAAc,EAAE,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;KACtE,CAAC;AACJ,CAAC"}
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAiE1I,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAEvE;AA2yBD;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAoYD,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBjG;AA+pBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAgxC/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -5,6 +5,7 @@ import { parseHtmlPage } from "./parser.js";
|
|
|
5
5
|
import { pageSkipReason } from "./page-filter.js";
|
|
6
6
|
import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
|
|
7
7
|
import { eeatSignalsRule } from "./rules/content/eeat-signals.js";
|
|
8
|
+
import { citationCoverageRule } from "./rules/content/citation-coverage.js";
|
|
8
9
|
import { metaUniquenessRule } from "./rules/content/meta-uniqueness.js";
|
|
9
10
|
import { missingAuthorRule } from "./rules/content/missing-author.js";
|
|
10
11
|
import { uniqueValueRule } from "./rules/content/unique-value.js";
|
|
@@ -84,6 +85,8 @@ const DEFAULTS = {
|
|
|
84
85
|
metaUniquenessMinJaccard: 0.9,
|
|
85
86
|
linkDepthMaxClicks: 3,
|
|
86
87
|
templateCoverageMinPages: 5,
|
|
88
|
+
citationCoverageMinClaims: 4,
|
|
89
|
+
citationCoverageMinAuthoritative: 1,
|
|
87
90
|
answerFirstMaxWords: 100,
|
|
88
91
|
citableFactsMin: 3,
|
|
89
92
|
citableFactsTarget: 8,
|
|
@@ -414,6 +417,9 @@ const RULE_IMPACTS = {
|
|
|
414
417
|
"content/title-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 25 }, // 2026-05-03 round 11: title is high-impact but the original 50-cap was disproportionate to other content rules and tipped Typeform into critical on a 6-finding cluster. Keep the rule at native error severity (duplicate titles ARE real bugs); just don't let one rule dominate the integrity bucket.
|
|
415
418
|
"content/heading-structure": { baseImpact: 5, perInstance: 1, maxImpact: 20 },
|
|
416
419
|
"content/image-alt-text": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
420
|
+
// Citation coverage is low-confidence (block-level grounded-claim heuristic);
|
|
421
|
+
// keep its impact modest so it nudges rather than dominates the score.
|
|
422
|
+
"content/citation-coverage": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
|
|
417
423
|
"content/translation-no-op": { baseImpact: 30, perInstance: 10, maxImpact: 60 },
|
|
418
424
|
// v1 warning-severity heuristic; lower than translation-no-op since it's speculative
|
|
419
425
|
"content/regurgitated-content": { baseImpact: 15, perInstance: 5, maxImpact: 35 },
|
|
@@ -653,6 +659,13 @@ sampled = false) {
|
|
|
653
659
|
if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
|
|
654
660
|
pushAll(findings, tag(eeatSignalsRule(pages)));
|
|
655
661
|
}
|
|
662
|
+
if (isEnabled("content/citation-coverage") && modeOk("content/citation-coverage")) {
|
|
663
|
+
pushAll(findings, tag(citationCoverageRule(pages, entityPatterns, {
|
|
664
|
+
minClaims: resolvedRules.citationCoverageMinClaims,
|
|
665
|
+
minAuthoritative: resolvedRules.citationCoverageMinAuthoritative,
|
|
666
|
+
allowlist: resolvedRules.citationAllowlist,
|
|
667
|
+
})));
|
|
668
|
+
}
|
|
656
669
|
// 2026-05-03 v0.5.2 blind-spot fixes — title uniqueness + heading
|
|
657
670
|
// structure + image alt-text were tier-1 gaps in the blind-spot audit.
|
|
658
671
|
if (isEnabled("content/title-uniqueness") && modeOk("content/title-uniqueness")) {
|
|
@@ -1887,6 +1900,9 @@ export async function auditSource(source, options) {
|
|
|
1887
1900
|
metaUniquenessMinJaccard: options?.rules?.metaUniquenessMinJaccard ?? DEFAULTS.metaUniquenessMinJaccard,
|
|
1888
1901
|
linkDepthMaxClicks: options?.rules?.linkDepthMaxClicks ?? DEFAULTS.linkDepthMaxClicks,
|
|
1889
1902
|
templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
|
|
1903
|
+
citationCoverageMinClaims: options?.rules?.citationCoverageMinClaims ?? DEFAULTS.citationCoverageMinClaims,
|
|
1904
|
+
citationCoverageMinAuthoritative: options?.rules?.citationCoverageMinAuthoritative ?? DEFAULTS.citationCoverageMinAuthoritative,
|
|
1905
|
+
citationAllowlist: options?.rules?.citationAllowlist,
|
|
1890
1906
|
answerFirstMaxWords: options?.rules?.answerFirstMaxWords ?? DEFAULTS.answerFirstMaxWords,
|
|
1891
1907
|
citableFactsMin: options?.rules?.citableFactsMin ?? DEFAULTS.citableFactsMin,
|
|
1892
1908
|
citableFactsTarget: options?.rules?.citableFactsTarget ?? DEFAULTS.citableFactsTarget,
|