@pseolint/core 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
- package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
- package/dist/algorithms/authority/commoncrawl.js +17 -0
- package/dist/algorithms/authority/commoncrawl.js.map +1 -0
- package/dist/algorithms/authority/openpagerank.d.ts +19 -0
- package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
- package/dist/algorithms/authority/openpagerank.js +42 -0
- package/dist/algorithms/authority/openpagerank.js.map +1 -0
- package/dist/algorithms/authority/provider.d.ts +16 -0
- package/dist/algorithms/authority/provider.d.ts.map +1 -0
- package/dist/algorithms/authority/provider.js +24 -0
- package/dist/algorithms/authority/provider.js.map +1 -0
- package/dist/algorithms/auto-entity-mask.d.ts +19 -0
- package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
- package/dist/algorithms/auto-entity-mask.js +102 -0
- package/dist/algorithms/auto-entity-mask.js.map +1 -0
- package/dist/algorithms/example-regions.d.ts +22 -0
- package/dist/algorithms/example-regions.d.ts.map +1 -0
- package/dist/algorithms/example-regions.js +32 -0
- package/dist/algorithms/example-regions.js.map +1 -0
- package/dist/algorithms/fact-extraction.d.ts.map +1 -1
- package/dist/algorithms/fact-extraction.js +6 -0
- package/dist/algorithms/fact-extraction.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +39 -9
- package/dist/auditor.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +9 -8
- package/dist/enrich-findings.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts +14 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +96 -15
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +4 -3
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
- package/dist/rules/content/common-phrase-reuse.js +7 -2
- package/dist/rules/content/common-phrase-reuse.js.map +1 -1
- package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
- package/dist/rules/content/regurgitated-content.js +11 -2
- package/dist/rules/content/regurgitated-content.js.map +1 -1
- package/dist/rules/content/translation-no-op.d.ts.map +1 -1
- package/dist/rules/content/translation-no-op.js +5 -1
- package/dist/rules/content/translation-no-op.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts +15 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +46 -39
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/links/cluster-connectivity.d.ts +7 -1
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
- package/dist/rules/links/cluster-connectivity.js +8 -2
- package/dist/rules/links/cluster-connectivity.js.map +1 -1
- package/dist/rules/links/orphan-pages.d.ts +8 -1
- package/dist/rules/links/orphan-pages.d.ts.map +1 -1
- package/dist/rules/links/orphan-pages.js +10 -1
- package/dist/rules/links/orphan-pages.js.map +1 -1
- package/dist/rules/schema/consistency.d.ts.map +1 -1
- package/dist/rules/schema/consistency.js +33 -21
- package/dist/rules/schema/consistency.js.map +1 -1
- package/dist/rules/spam/entity-swap.d.ts.map +1 -1
- package/dist/rules/spam/entity-swap.js +51 -9
- package/dist/rules/spam/entity-swap.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +5 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
- package/dist/rules/tech/canonical-consistency.js +144 -28
- package/dist/rules/tech/canonical-consistency.js.map +1 -1
- package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
- package/dist/rules/tech/sitemap-completeness.js +21 -5
- package/dist/rules/tech/sitemap-completeness.js.map +1 -1
- package/dist/rules/tech/soft-404.d.ts +11 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -1
- package/dist/rules/tech/soft-404.js +47 -5
- package/dist/rules/tech/soft-404.js.map +1 -1
- package/dist/template-detection.d.ts +1 -0
- package/dist/template-detection.d.ts.map +1 -1
- package/dist/template-detection.js +1 -1
- package/dist/template-detection.js.map +1 -1
- package/dist/types.d.ts +16 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +109 -93
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { AuthorityProvider } from "./provider.js";
|
|
2
|
+
/**
|
|
3
|
+
* Authority from a pre-processed Common Crawl host-webgraph table
|
|
4
|
+
* (registrable domain -> harmonic-centrality rank normalized to 0–100).
|
|
5
|
+
* Owned/permissive data (CC license; attribution courtesy). The table is built
|
|
6
|
+
* offline (gated); this provider is a pure lookup. Empty table -> null.
|
|
7
|
+
*/
|
|
8
|
+
export declare class CommonCrawlProvider implements AuthorityProvider {
|
|
9
|
+
private readonly table;
|
|
10
|
+
constructor(table: ReadonlyMap<string, number>);
|
|
11
|
+
authorityFor(domain: string): Promise<number | null>;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=commoncrawl.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"commoncrawl.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/commoncrawl.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAEvD;;;;;GAKG;AACH,qBAAa,mBAAoB,YAAW,iBAAiB;IAC/C,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC;IAEzD,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAI3D"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Authority from a pre-processed Common Crawl host-webgraph table
|
|
3
|
+
* (registrable domain -> harmonic-centrality rank normalized to 0–100).
|
|
4
|
+
* Owned/permissive data (CC license; attribution courtesy). The table is built
|
|
5
|
+
* offline (gated); this provider is a pure lookup. Empty table -> null.
|
|
6
|
+
*/
|
|
7
|
+
export class CommonCrawlProvider {
|
|
8
|
+
table;
|
|
9
|
+
constructor(table) {
|
|
10
|
+
this.table = table;
|
|
11
|
+
}
|
|
12
|
+
async authorityFor(domain) {
|
|
13
|
+
const v = this.table.get(domain);
|
|
14
|
+
return typeof v === "number" && Number.isFinite(v) ? v : null;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=commoncrawl.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"commoncrawl.js","sourceRoot":"","sources":["../../../src/algorithms/authority/commoncrawl.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,MAAM,OAAO,mBAAmB;IACD;IAA7B,YAA6B,KAAkC;QAAlC,UAAK,GAAL,KAAK,CAA6B;IAAG,CAAC;IAEnE,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACjC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChE,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { AuthorityProvider } from "./provider.js";
|
|
2
|
+
type FetchFn = (url: string, init?: {
|
|
3
|
+
headers?: Record<string, string>;
|
|
4
|
+
}) => Promise<Response>;
|
|
5
|
+
/**
|
|
6
|
+
* Open PageRank authority source. Returns 0–100 (page_rank_decimal × 10).
|
|
7
|
+
* Requires a free API key; with no key it returns null (no calls). Any network
|
|
8
|
+
* or per-domain error → null. Attribution ("Open PageRank by DomCop") is the
|
|
9
|
+
* caller's responsibility when displaying.
|
|
10
|
+
*/
|
|
11
|
+
export declare class OpenPageRankProvider implements AuthorityProvider {
|
|
12
|
+
private readonly apiKey;
|
|
13
|
+
private readonly fetchFn;
|
|
14
|
+
private readonly timeoutMs;
|
|
15
|
+
constructor(apiKey: string, fetchFn?: FetchFn, timeoutMs?: number);
|
|
16
|
+
authorityFor(domain: string): Promise<number | null>;
|
|
17
|
+
}
|
|
18
|
+
export {};
|
|
19
|
+
//# sourceMappingURL=openpagerank.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openpagerank.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/openpagerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAEvD,KAAK,OAAO,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;AAQ/F;;;;;GAKG;AACH,qBAAa,oBAAqB,YAAW,iBAAiB;IAE1D,OAAO,CAAC,QAAQ,CAAC,MAAM;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS;gBAFT,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,OAAgD,EACzD,SAAS,SAAO;IAG7B,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAoB3D"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Open PageRank authority source. Returns 0–100 (page_rank_decimal × 10).
|
|
3
|
+
* Requires a free API key; with no key it returns null (no calls). Any network
|
|
4
|
+
* or per-domain error → null. Attribution ("Open PageRank by DomCop") is the
|
|
5
|
+
* caller's responsibility when displaying.
|
|
6
|
+
*/
|
|
7
|
+
export class OpenPageRankProvider {
|
|
8
|
+
apiKey;
|
|
9
|
+
fetchFn;
|
|
10
|
+
timeoutMs;
|
|
11
|
+
constructor(apiKey, fetchFn = globalThis.fetch, timeoutMs = 8000) {
|
|
12
|
+
this.apiKey = apiKey;
|
|
13
|
+
this.fetchFn = fetchFn;
|
|
14
|
+
this.timeoutMs = timeoutMs;
|
|
15
|
+
}
|
|
16
|
+
async authorityFor(domain) {
|
|
17
|
+
if (!this.apiKey)
|
|
18
|
+
return null;
|
|
19
|
+
const url = `https://openpagerank.com/api/v1.0/getPageRank?domains[]=${encodeURIComponent(domain)}`;
|
|
20
|
+
let res;
|
|
21
|
+
try {
|
|
22
|
+
res = await this.fetchFn(url, { headers: { "API-OPR": this.apiKey } });
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
if (!res.ok)
|
|
28
|
+
return null;
|
|
29
|
+
let body;
|
|
30
|
+
try {
|
|
31
|
+
body = (await res.json());
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
const entry = body.response?.find((e) => e.domain === domain) ?? body.response?.[0];
|
|
37
|
+
if (!entry || entry.status_code !== 200 || typeof entry.page_rank_decimal !== "number")
|
|
38
|
+
return null;
|
|
39
|
+
return Math.round(entry.page_rank_decimal * 10);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=openpagerank.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openpagerank.js","sourceRoot":"","sources":["../../../src/algorithms/authority/openpagerank.ts"],"names":[],"mappings":"AAUA;;;;;GAKG;AACH,MAAM,OAAO,oBAAoB;IAEZ;IACA;IACA;IAHnB,YACmB,MAAc,EACd,UAAmB,UAAU,CAAC,KAA2B,EACzD,YAAY,IAAI;QAFhB,WAAM,GAAN,MAAM,CAAQ;QACd,YAAO,GAAP,OAAO,CAAkD;QACzD,cAAS,GAAT,SAAS,CAAO;IAChC,CAAC;IAEJ,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QAC9B,MAAM,GAAG,GAAG,2DAA2D,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC;QACpG,IAAI,GAAa,CAAC;QAClB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QACzB,IAAI,IAA+B,CAAC;QACpC,IAAI,CAAC;YACH,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAA8B,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;QACpF,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,WAAW,KAAK,GAAG,IAAI,OAAO,KAAK,CAAC,iBAAiB,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QACpG,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,iBAAiB,GAAG,EAAE,CAAC,CAAC;IAClD,CAAC;CACF"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/** A source of domain authority on a 0–100 scale (higher = more authoritative). */
|
|
2
|
+
export interface AuthorityProvider {
|
|
3
|
+
/** Authority for a registrable domain; null when unknown/unavailable. */
|
|
4
|
+
authorityFor(domain: string): Promise<number | null>;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Combines several providers. Returns the MAX non-null score (any source
|
|
8
|
+
* vouching for authority is sufficient evidence). All-null → null → callers
|
|
9
|
+
* apply no moderation (fail-safe). A source that throws is treated as null.
|
|
10
|
+
*/
|
|
11
|
+
export declare class CompositeAuthorityProvider implements AuthorityProvider {
|
|
12
|
+
private readonly sources;
|
|
13
|
+
constructor(sources: ReadonlyArray<AuthorityProvider>);
|
|
14
|
+
authorityFor(domain: string): Promise<number | null>;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/provider.ts"],"names":[],"mappings":"AAAA,mFAAmF;AACnF,MAAM,WAAW,iBAAiB;IAChC,yEAAyE;IACzE,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CACtD;AAED;;;;GAIG;AACH,qBAAa,0BAA2B,YAAW,iBAAiB;IACtD,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAEhE,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAa3D"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Combines several providers. Returns the MAX non-null score (any source
|
|
3
|
+
* vouching for authority is sufficient evidence). All-null → null → callers
|
|
4
|
+
* apply no moderation (fail-safe). A source that throws is treated as null.
|
|
5
|
+
*/
|
|
6
|
+
export class CompositeAuthorityProvider {
|
|
7
|
+
sources;
|
|
8
|
+
constructor(sources) {
|
|
9
|
+
this.sources = sources;
|
|
10
|
+
}
|
|
11
|
+
async authorityFor(domain) {
|
|
12
|
+
const results = await Promise.all(this.sources.map(async (s) => {
|
|
13
|
+
try {
|
|
14
|
+
return await s.authorityFor(domain);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}));
|
|
20
|
+
const vals = results.filter((v) => v !== null && Number.isFinite(v));
|
|
21
|
+
return vals.length ? Math.max(...vals) : null;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.js","sourceRoot":"","sources":["../../../src/algorithms/authority/provider.ts"],"names":[],"mappings":"AAMA;;;;GAIG;AACH,MAAM,OAAO,0BAA0B;IACR;IAA7B,YAA6B,OAAyC;QAAzC,YAAO,GAAP,OAAO,CAAkC;IAAG,CAAC;IAE1E,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;YAC3B,IAAI,CAAC;gBACH,OAAO,MAAM,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QACF,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChD,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage } from "../types.js";
|
|
2
|
+
export interface DeriveOptions {
|
|
3
|
+
/** Only derive from URL-template clusters with at least this many siblings. */
|
|
4
|
+
minClusterSize?: number;
|
|
5
|
+
/** Ignore tokens shorter than this. */
|
|
6
|
+
minTokenLength?: number;
|
|
7
|
+
/** Placeholder substituted for masked entities. */
|
|
8
|
+
placeholder?: string;
|
|
9
|
+
/** Enable URL-slug token derivation. */
|
|
10
|
+
urlSlug?: boolean;
|
|
11
|
+
/** Enable capitalized-content-token derivation. */
|
|
12
|
+
contentDiff?: boolean;
|
|
13
|
+
/** Hard cap on total derived tokens (over-masking guard). */
|
|
14
|
+
maxTokens?: number;
|
|
15
|
+
}
|
|
16
|
+
type MaskPage = Pick<ParsedPage, "url" | "contentText">;
|
|
17
|
+
export declare function deriveEntityPatterns(pages: ReadonlyArray<MaskPage>, opts?: DeriveOptions): EntityMaskPattern[];
|
|
18
|
+
export {};
|
|
19
|
+
//# sourceMappingURL=auto-entity-mask.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auto-entity-mask.d.ts","sourceRoot":"","sources":["../../src/algorithms/auto-entity-mask.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGjE,MAAM,WAAW,aAAa;IAC5B,+EAA+E;IAC/E,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uCAAuC;IACvC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mDAAmD;IACnD,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,KAAK,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,KAAK,GAAG,aAAa,CAAC,CAAC;AA4DxD,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,aAAa,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,EAAE,aAAa,GAAG,iBAAiB,EAAE,CA0C9G"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { normalizePathToTemplate } from "../template-detection.js";
|
|
2
|
+
/** Tiny stopword set so varying function-words never become entities. */
|
|
3
|
+
const STOPWORDS = new Set([
|
|
4
|
+
"the", "and", "for", "with", "from", "this", "that", "your", "our", "are",
|
|
5
|
+
"you", "all", "new", "best", "top", "how", "what", "why", "who", "about",
|
|
6
|
+
"page", "home", "more", "get", "buy", "free", "online", "now",
|
|
7
|
+
]);
|
|
8
|
+
function pathOf(url) {
|
|
9
|
+
try {
|
|
10
|
+
return new URL(url).pathname;
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return url.split("?")[0].split("#")[0];
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function rawSegments(path) {
|
|
17
|
+
return path.replace(/^\/+|\/+$/g, "").split("/").filter(Boolean);
|
|
18
|
+
}
|
|
19
|
+
/** Tokens from `:slug` path segments only (numeric `:n` segments are not name-entities). */
|
|
20
|
+
function urlSlugTokens(path) {
|
|
21
|
+
const tmplSegs = normalizePathToTemplate(path).replace(/^\//, "").split("/");
|
|
22
|
+
const raw = rawSegments(path);
|
|
23
|
+
const out = [];
|
|
24
|
+
tmplSegs.forEach((t, i) => {
|
|
25
|
+
if (t === ":slug" && raw[i]) {
|
|
26
|
+
for (const tok of raw[i].split(/[-_]/))
|
|
27
|
+
out.push(tok.toLowerCase());
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
const CONTENT_ENTITY_RE = /\b[A-Z][a-zA-Z]{2,}\b/g;
|
|
33
|
+
function contentEntityTokens(text) {
|
|
34
|
+
return (text.match(CONTENT_ENTITY_RE) ?? []).map((t) => t.toLowerCase());
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Tokens whose presence VARIES across cluster members: present in at least one
|
|
38
|
+
* member but not in all. Constant template vocabulary (in every member) is
|
|
39
|
+
* excluded; per-page entities (in a subset) are kept.
|
|
40
|
+
*/
|
|
41
|
+
function varyingTokens(perMember, minLen) {
|
|
42
|
+
const memberSets = perMember.map((toks) => new Set(toks.filter((t) => t.length >= minLen && !STOPWORDS.has(t))));
|
|
43
|
+
const presence = new Map();
|
|
44
|
+
for (const s of memberSets)
|
|
45
|
+
for (const t of s)
|
|
46
|
+
presence.set(t, (presence.get(t) ?? 0) + 1);
|
|
47
|
+
const n = memberSets.length;
|
|
48
|
+
const out = new Set();
|
|
49
|
+
for (const [t, c] of presence)
|
|
50
|
+
if (c >= 1 && c < n)
|
|
51
|
+
out.add(t);
|
|
52
|
+
return out;
|
|
53
|
+
}
|
|
54
|
+
function escapeRegex(s) {
|
|
55
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
56
|
+
}
|
|
57
|
+
export function deriveEntityPatterns(pages, opts) {
|
|
58
|
+
const minCluster = opts?.minClusterSize ?? 3;
|
|
59
|
+
const minLen = opts?.minTokenLength ?? 3;
|
|
60
|
+
const placeholder = opts?.placeholder ?? "[ENTITY]";
|
|
61
|
+
const useUrl = opts?.urlSlug ?? true;
|
|
62
|
+
const useContent = opts?.contentDiff ?? true;
|
|
63
|
+
const maxTokens = opts?.maxTokens ?? 500;
|
|
64
|
+
// Cluster pages by normalized URL template.
|
|
65
|
+
const clusters = new Map();
|
|
66
|
+
for (const p of pages) {
|
|
67
|
+
const tmpl = normalizePathToTemplate(pathOf(p.url));
|
|
68
|
+
const bucket = clusters.get(tmpl);
|
|
69
|
+
if (bucket)
|
|
70
|
+
bucket.push(p);
|
|
71
|
+
else
|
|
72
|
+
clusters.set(tmpl, [p]);
|
|
73
|
+
}
|
|
74
|
+
const entities = new Set();
|
|
75
|
+
for (const members of clusters.values()) {
|
|
76
|
+
if (members.length < minCluster)
|
|
77
|
+
continue;
|
|
78
|
+
if (useUrl) {
|
|
79
|
+
for (const t of varyingTokens(members.map((m) => urlSlugTokens(pathOf(m.url))), minLen))
|
|
80
|
+
entities.add(t);
|
|
81
|
+
}
|
|
82
|
+
if (useContent) {
|
|
83
|
+
for (const t of varyingTokens(members.map((m) => contentEntityTokens(m.contentText ?? "")), minLen))
|
|
84
|
+
entities.add(t);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const tokens = [...entities].sort().slice(0, maxTokens);
|
|
88
|
+
if (tokens.length === 0)
|
|
89
|
+
return [];
|
|
90
|
+
const CHUNK = 200;
|
|
91
|
+
const patterns = [];
|
|
92
|
+
for (let i = 0; i < tokens.length; i += CHUNK) {
|
|
93
|
+
// Each token is metacharacter-escaped (escapeRegex) and joined into a bounded,
|
|
94
|
+
// backtracking-free alternation `\b(?:a|b|c)\b` — no nested quantifiers, so this
|
|
95
|
+
// dynamic RegExp is ReDoS-safe by construction.
|
|
96
|
+
const alt = tokens.slice(i, i + CHUNK).map(escapeRegex).join("|");
|
|
97
|
+
// nosemgrep: javascript.lang.security.audit.detect-non-literal-regexp.detect-non-literal-regexp
|
|
98
|
+
patterns.push({ placeholder, pattern: new RegExp(`\\b(?:${alt})\\b`, "gi") });
|
|
99
|
+
}
|
|
100
|
+
return patterns;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=auto-entity-mask.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auto-entity-mask.js","sourceRoot":"","sources":["../../src/algorithms/auto-entity-mask.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAmBnE,yEAAyE;AACzE,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK;IACzE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO;IACxE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK;CAC9D,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACnE,CAAC;AAED,4FAA4F;AAC5F,SAAS,aAAa,CAAC,IAAY;IACjC,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7E,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACxB,IAAI,CAAC,KAAK,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,KAAK,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;gBAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;QACtE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,iBAAiB,GAAG,wBAAwB,CAAC;AACnD,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AAC3E,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,SAAqB,EAAE,MAAc;IAC1D,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAC9B,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/E,CAAC;IACF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,KAAK,MAAM,CAAC,IAAI,UAAU;QAAE,KAAK,MAAM,CAAC,IAAI,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3F,MAAM,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAC5B,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,QAAQ;QAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC/D,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,WAAW,CAAC,CAAS;IAC5B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAA8B,EAAE,IAAoB;IACvF,MAAM,UAAU,GAAG,IAAI,EAAE,cAAc,IAAI,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,IAAI,EAAE,cAAc,IAAI,CAAC,CAAC;IACzC,MAAM,WAAW,GAAG,IAAI,EAAE,WAAW,IAAI,UAAU,CAAC;IACpD,MAAM,MAAM,GAAG,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,EAAE,WAAW,IAAI,IAAI,CAAC;IAC7C,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,GAAG,CAAC;IAEzC,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,uBAAuB,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,MAAM;YAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YACtB,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,OAAO,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACxC,IAAI,OAAO,CAAC,MAAM,GAAG,UAAU;YAAE,SAAS;QAC1C,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC;gBAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3G,CAAC;QACD,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC;gBAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACvH,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IACxD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,MAAM,KAAK,GAAG,GAAG,CAAC;IAClB,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;QAC9C,+EAA+E;QAC/E,iFAAiF;QACjF,gDAAgD;QAChD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,gGAAgG;QAChG,QAAQ,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,OAAO,EAAE,IAAI,MAAM,CAAC,SAAS,GAAG,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { ParsedPage } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Selector for "quoted example / code / sample" regions — markup that documents
|
|
4
|
+
* a pattern rather than expressing the page's own editorial voice.
|
|
5
|
+
*
|
|
6
|
+
* Content-quality heuristics that judge the page's OWN prose (cliché density,
|
|
7
|
+
* regurgitated-content signals) exclude these regions so a docs / explainer
|
|
8
|
+
* page that *quotes* a bad pattern to teach it isn't penalised for describing
|
|
9
|
+
* it. A real spam page puts clichés in flowing prose, not inside `<code>` or a
|
|
10
|
+
* `<blockquote>` example box — so the exclusion narrows false positives without
|
|
11
|
+
* opening a meaningful evasion path for a low-confidence proxy.
|
|
12
|
+
*/
|
|
13
|
+
export declare const EXAMPLE_REGION_SELECTOR = "pre, code, blockquote, figure, samp, kbd, [data-example]";
|
|
14
|
+
/**
|
|
15
|
+
* Page body text with site chrome AND quoted-example regions removed.
|
|
16
|
+
*
|
|
17
|
+
* Falls back to the pre-parsed `contentText` when no html is available (e.g.
|
|
18
|
+
* synthetic unit-test pages). The fallback keeps example text in place — it's a
|
|
19
|
+
* best-effort path; the html path is what production audits exercise.
|
|
20
|
+
*/
|
|
21
|
+
export declare function proseTextExcludingExamples(page: ParsedPage): string;
|
|
22
|
+
//# sourceMappingURL=example-regions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"example-regions.d.ts","sourceRoot":"","sources":["../../src/algorithms/example-regions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE9C;;;;;;;;;;GAUG;AACH,eAAO,MAAM,uBAAuB,6DACwB,CAAC;AAI7D;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CASnE"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
/**
|
|
3
|
+
* Selector for "quoted example / code / sample" regions — markup that documents
|
|
4
|
+
* a pattern rather than expressing the page's own editorial voice.
|
|
5
|
+
*
|
|
6
|
+
* Content-quality heuristics that judge the page's OWN prose (cliché density,
|
|
7
|
+
* regurgitated-content signals) exclude these regions so a docs / explainer
|
|
8
|
+
* page that *quotes* a bad pattern to teach it isn't penalised for describing
|
|
9
|
+
* it. A real spam page puts clichés in flowing prose, not inside `<code>` or a
|
|
10
|
+
* `<blockquote>` example box — so the exclusion narrows false positives without
|
|
11
|
+
* opening a meaningful evasion path for a low-confidence proxy.
|
|
12
|
+
*/
|
|
13
|
+
export const EXAMPLE_REGION_SELECTOR = "pre, code, blockquote, figure, samp, kbd, [data-example]";
|
|
14
|
+
const CHROME_SELECTOR = "header, footer, nav, script, style, noscript";
|
|
15
|
+
/**
|
|
16
|
+
* Page body text with site chrome AND quoted-example regions removed.
|
|
17
|
+
*
|
|
18
|
+
* Falls back to the pre-parsed `contentText` when no html is available (e.g.
|
|
19
|
+
* synthetic unit-test pages). The fallback keeps example text in place — it's a
|
|
20
|
+
* best-effort path; the html path is what production audits exercise.
|
|
21
|
+
*/
|
|
22
|
+
export function proseTextExcludingExamples(page) {
|
|
23
|
+
if (page.html && page.html.trim()) {
|
|
24
|
+
const $ = load(page.html);
|
|
25
|
+
$(`${CHROME_SELECTOR}, ${EXAMPLE_REGION_SELECTOR}`).remove();
|
|
26
|
+
const body = $("body");
|
|
27
|
+
const text = body.length ? body.text() : $.root().text();
|
|
28
|
+
return text.replace(/\s+/g, " ").trim();
|
|
29
|
+
}
|
|
30
|
+
return page.contentText ?? "";
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=example-regions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"example-regions.js","sourceRoot":"","sources":["../../src/algorithms/example-regions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAG/B;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAClC,0DAA0D,CAAC;AAE7D,MAAM,eAAe,GAAG,8CAA8C,CAAC;AAEvE;;;;;;GAMG;AACH,MAAM,UAAU,0BAA0B,CAAC,IAAgB;IACzD,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC,CAAC,GAAG,eAAe,KAAK,uBAAuB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;QAC7D,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACzD,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;AAChC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fact-extraction.d.ts","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,MAAM,QAAQ,GAChB,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,GAC/D,OAAO,GAAG,aAAa,CAAC;AAE5B,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,QAAQ,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;IAC/C,IAAI,CAAC,EAAE,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,KAAK,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,CAAC;CACvF;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,eAAe,GAAG,SAAS,CAAC;IACvC,MAAM,CAAC,EAAE,KAAK,GAAG,WAAW,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,oFAAoF;IACpF,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,aAAa,EAAE,WAAW,EAAE,CAAC;IAC7B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAqBD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAQ1D;AAWD,wBAAgB,mBAAmB,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,EAAE,CAclE;AA4BD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,EAAO,GAAG,WAAW,EAAE,CAe9F;AAED,eAAO,MAAM,0BAA0B,EAAE,SAAS,MAAM,
|
|
1
|
+
{"version":3,"file":"fact-extraction.d.ts","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,MAAM,QAAQ,GAChB,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,GAC/D,OAAO,GAAG,aAAa,CAAC;AAE5B,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,QAAQ,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;IAC/C,IAAI,CAAC,EAAE,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,KAAK,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,CAAC;CACvF;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,eAAe,GAAG,SAAS,CAAC;IACvC,MAAM,CAAC,EAAE,KAAK,GAAG,WAAW,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,oFAAoF;IACpF,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,aAAa,EAAE,WAAW,EAAE,CAAC;IAC7B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAqBD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAQ1D;AAWD,wBAAgB,mBAAmB,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,EAAE,CAclE;AA4BD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,EAAO,GAAG,WAAW,EAAE,CAe9F;AAED,eAAO,MAAM,0BAA0B,EAAE,SAAS,MAAM,EAUvD,CAAC;AAWF,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMtD;AAMD,wBAAgB,iBAAiB,CAC/B,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,QAAQ,EAAE,CAqBZ;AAED,wBAAgB,wBAAwB,CACtC,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,OAAO,CAET;AAYD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,aAAa,EAAE,CA0BjB;AAED,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,GAAG,aAAa,GAAG,MAAM,GAAG,eAAe,GAAG,QAAQ,CAAC,EACnF,cAAc,EAAE,iBAAiB,EAAE,EACnC,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,SAAS,CASX"}
|
|
@@ -103,6 +103,12 @@ export const DEFAULT_CITATION_ALLOWLIST = [
|
|
|
103
103
|
"wikipedia.org", "w3.org", "iso.org", "ietf.org", "rfc-editor.org",
|
|
104
104
|
"doi.org", "nih.gov", "ncbi.nlm.nih.gov", "who.int", "schema.org",
|
|
105
105
|
"oecd.org", "worldbank.org", "europa.eu",
|
|
106
|
+
// Google's own published documentation is the primary authoritative source for
|
|
107
|
+
// claims about Google's ranking and spam systems (Search Essentials, spam
|
|
108
|
+
// policies, helpful-content guidance) and for Core Web Vitals (web.dev).
|
|
109
|
+
// Scoped to the docs subdomain — a bare google.com link (Maps, search results)
|
|
110
|
+
// is deliberately NOT credited as authoritative.
|
|
111
|
+
"developers.google.com", "web.dev",
|
|
106
112
|
];
|
|
107
113
|
const MULTI_PART_SUFFIXES = new Set([
|
|
108
114
|
"co.uk", "ac.uk", "gov.uk", "org.uk", "com.au", "gov.au", "edu.au",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fact-extraction.js","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAwChD,+EAA+E;AAC/E,6EAA6E;AAC7E,+DAA+D;AAC/D,MAAM,qBAAqB,GAA2C;IACpE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,qBAAqB,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GACrB,6DAA6D,CAAC;AAChE,MAAM,oBAAoB,GAA6C;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,2CAA2C,EAAE;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAC9C,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,6BAA6B,iBAAiB,MAAM,EAAE,GAAG,CAAC,EAAE;CACtG,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,UAAkB;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC9B,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,sBAAsB,GAAG,mEAAmE,CAAC;AACnG,MAAM,OAAO,GAAG,uFAAuF,CAAC;AACxG,MAAM,QAAQ,GAAG,yJAAyJ,CAAC;AAE3K,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,cAAc,EAAE,wBAAwB,EAAE,aAAa,EAAE,KAAK;IAC9D,QAAQ,EAAE,SAAS,EAAE,OAAO;CAC7B,CAAC,CAAC;AAEH,SAAS,cAAc,CAAC,KAAgB;IACtC,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,CAAC,IAAa,EAAQ,EAAE;QACpC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAAC,OAAO;QAAC,CAAC;QACzD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;YAAE,OAAO;QACtD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1F,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC;IACF,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,SAAoB,EAAE;IAC7E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,MAA6B,EAAQ,EAAE;QAClE,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO;QACxC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACZ,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/D,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAsB;IAC3D,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,gBAAgB;IAClE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,SAAS,EAAE,YAAY;IACjE,UAAU,EAAE,eAAe,EAAE,WAAW;
|
|
1
|
+
{"version":3,"file":"fact-extraction.js","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAwChD,+EAA+E;AAC/E,6EAA6E;AAC7E,+DAA+D;AAC/D,MAAM,qBAAqB,GAA2C;IACpE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,qBAAqB,EAAE;IAChD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAChD;QACE,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,qFAAqF;KAC7F;IACD;QACE,IAAI,EAAE,MAAM;QACZ,KAAK,EACH,uHAAuH;KAC1H;IACD,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,wBAAwB,EAAE;IACpD,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,gCAAgC,EAAE;CAC1D,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,qBAAqB,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GACrB,6DAA6D,CAAC;AAChE,MAAM,oBAAoB,GAA6C;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,2CAA2C,EAAE;IACrE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE;IAC9C,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,6BAA6B,iBAAiB,MAAM,EAAE,GAAG,CAAC,EAAE;CACtG,CAAC;AAEF,MAAM,UAAU,mBAAmB,CAAC,UAAkB;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC1D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC9B,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,sBAAsB,GAAG,mEAAmE,CAAC;AACnG,MAAM,OAAO,GAAG,uFAAuF,CAAC;AACxG,MAAM,QAAQ,GAAG,yJAAyJ,CAAC;AAE3K,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,cAAc,EAAE,wBAAwB,EAAE,aAAa,EAAE,KAAK;IAC9D,QAAQ,EAAE,SAAS,EAAE,OAAO;CAC7B,CAAC,CAAC;AAEH,SAAS,cAAc,CAAC,KAAgB;IACtC,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,CAAC,IAAa,EAAQ,EAAE;QACpC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAAC,OAAO;QAAC,CAAC;QACzD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;YAAE,OAAO;QACtD,MAAM,GAAG,GAAG,IAA+B,CAAC;QAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC;QAC1F,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC;IACF,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,UAAkB,EAAE,SAAoB,EAAE;IAC7E,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAkB,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,MAA6B,EAAQ,EAAE;QAClE,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO;QACxC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACZ,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,cAAc,CAAC,MAAM,CAAC;QAAE,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACrE,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,sBAAsB,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/D,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAsB;IAC3D,eAAe,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,gBAAgB;IAClE,SAAS,EAAE,SAAS,EAAE,kBAAkB,EAAE,SAAS,EAAE,YAAY;IACjE,UAAU,EAAE,eAAe,EAAE,WAAW;IACxC,+EAA+E;IAC/E,0EAA0E;IAC1E,yEAAyE;IACzE,+EAA+E;IAC/E,iDAAiD;IACjD,uBAAuB,EAAE,SAAS;CACnC,CAAC;AAEF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IAClE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;CACpC,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QAAC,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,IAAI,CAAC;IAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,IAAI,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IACjC,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,UAAU,IAAI,MAAM,KAAK,UAAU;YAAE,SAAS,CAAC,gBAAgB;QACnE,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,SAAS;QAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACf,IAAI,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACxE,CAAC;aAAM,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;QAC9E,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,aAAgC,EAChC,OAAe,EACf,YAA+B,0BAA0B;IAEzD,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,eAAe,CAAC,CAAC;AAC3G,CAAC;AAED,MAAM,cAAc,GAAG,8BAA8B,CAAC;AAEtD,SAAS,YAAY,CAAC,KAAe,EAAE,IAAY;IACjD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CACnC,IAAY,EACZ,OAAe,EACf,YAA+B,0BAA0B;IAEzD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC,CAAC,qDAAqD,CAAC,CAAC,MAAM,EAAE,CAAC;IAClE,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACpG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAC3F,MAAM,SAAS,GAAG,iBAAiB,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QACzF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG;gBACZ,GAAG,mBAAmB,CAAC,QAAQ,CAAC;gBAChC,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;aACrD,CAAC;YACF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YACjC,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBACvC,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aACxC,CAAC,CAAC;YACH,MAAM,CAAC,+DAA+D;QACxE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAmF,EACnF,cAAmC,EACnC,YAA+B,0BAA0B;IAEzD,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAC9D,OAAO;QACL,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC;QACzC,aAAa,EAAE,oBAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;QACxD,SAAS,EAAE,iBAAiB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;QACrE,cAAc,EAAE,qBAAqB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC;KACtE,CAAC;AACJ,CAAC"}
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAoEA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAGZ,WAAW,EAUX,UAAU,EAIX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAA8D,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAqE1I,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAEvE;AA2yBD;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAoYD,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAgBjG;AA+pBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA4yC/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -73,6 +73,10 @@ import { CORE_RULESET_VERSION } from "./ruleset-version.js";
|
|
|
73
73
|
import { planScrapeStrategy, DEFAULT_AGE_FLOOR_DAYS } from "./scrape-strategy.js";
|
|
74
74
|
import { detectTemplates, buildUrlToTemplateMap, shouldActivateTemplateScoring } from "./template-detection.js";
|
|
75
75
|
import { scoreTemplates, siteVerdictFromTemplates } from "./per-template-scoring.js";
|
|
76
|
+
import { deriveEntityPatterns } from "./algorithms/auto-entity-mask.js";
|
|
77
|
+
import { CompositeAuthorityProvider } from "./algorithms/authority/provider.js";
|
|
78
|
+
import { OpenPageRankProvider } from "./algorithms/authority/openpagerank.js";
|
|
79
|
+
import { registrableDomain } from "./algorithms/fact-extraction.js";
|
|
76
80
|
const DEFAULTS = {
|
|
77
81
|
nearDuplicateThreshold: 0.85,
|
|
78
82
|
entitySwapThreshold: 0.95,
|
|
@@ -81,7 +85,7 @@ const DEFAULTS = {
|
|
|
81
85
|
publicationVelocityMaxPerDayCorpusFraction: 0.10,
|
|
82
86
|
boilerplateMaxRatio: 0.7,
|
|
83
87
|
templateDiversityMinUniqueRatio: 0.35,
|
|
84
|
-
|
|
88
|
+
uniqueValueDensity: { passBelow: 0.20, errorBelow: 0.12 },
|
|
85
89
|
metaUniquenessMinJaccard: 0.9,
|
|
86
90
|
linkDepthMaxClicks: 3,
|
|
87
91
|
templateCoverageMinPages: 5,
|
|
@@ -212,7 +216,7 @@ const SCORING_PROFILES = {
|
|
|
212
216
|
// first-principles analysis predicts will false-positive on catalog-
|
|
213
217
|
// shaped sites (Zapier integrations, G2 categories, Wise currency pairs,
|
|
214
218
|
// etc.). A reputable-pSEO calibration corpus + runner has been added
|
|
215
|
-
// (scripts/calibration-
|
|
219
|
+
// (scripts/calibration-corpus.ts); these overrides will be
|
|
216
220
|
// tightened or loosened based on actual fire-rates measured against
|
|
217
221
|
// sites that demonstrably win in production. See
|
|
218
222
|
// docs/superpowers/specs/2026-05-03-calibration-against-reputable-pseo.md.
|
|
@@ -648,7 +652,7 @@ sampled = false) {
|
|
|
648
652
|
}
|
|
649
653
|
// Content rules
|
|
650
654
|
if (isEnabled("content/unique-value") && modeOk("content/unique-value")) {
|
|
651
|
-
pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.
|
|
655
|
+
pushAll(findings, tag(uniqueValueRule(pages, resolvedRules.uniqueValueDensity)));
|
|
652
656
|
}
|
|
653
657
|
if (isEnabled("content/meta-uniqueness") && modeOk("content/meta-uniqueness")) {
|
|
654
658
|
pushAll(findings, tag(metaUniquenessRule(pages, entityPatterns, resolvedRules.metaUniquenessMinJaccard)));
|
|
@@ -691,7 +695,7 @@ sampled = false) {
|
|
|
691
695
|
}
|
|
692
696
|
// Link rules — use the global link graph
|
|
693
697
|
if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
|
|
694
|
-
pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl)));
|
|
698
|
+
pushAll(findings, tag(orphanPagesRule(pages, inbound, rootUrl, sampled)));
|
|
695
699
|
}
|
|
696
700
|
if (isEnabled("links/dead-ends") && modeOk("links/dead-ends")) {
|
|
697
701
|
pushAll(findings, tag(deadEndsRule(pages, knownUrls, rootUrl)));
|
|
@@ -702,7 +706,7 @@ sampled = false) {
|
|
|
702
706
|
}
|
|
703
707
|
}
|
|
704
708
|
if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
|
|
705
|
-
pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls)));
|
|
709
|
+
pushAll(findings, tag(clusterConnectivityRule(pages, knownUrls, sampled)));
|
|
706
710
|
}
|
|
707
711
|
if (isEnabled("links/host-section-divergence") && modeOk("links/host-section-divergence")) {
|
|
708
712
|
pushAll(findings, tag(hostSectionDivergenceRule(pages, adjacency)));
|
|
@@ -1896,7 +1900,7 @@ export async function auditSource(source, options) {
|
|
|
1896
1900
|
?? DEFAULTS.publicationVelocityMaxPerDayCorpusFraction,
|
|
1897
1901
|
boilerplateMaxRatio: options?.rules?.boilerplateMaxRatio ?? DEFAULTS.boilerplateMaxRatio,
|
|
1898
1902
|
templateDiversityMinUniqueRatio: options?.rules?.templateDiversityMinUniqueRatio ?? DEFAULTS.templateDiversityMinUniqueRatio,
|
|
1899
|
-
|
|
1903
|
+
uniqueValueDensity: options?.rules?.uniqueValueDensity ?? DEFAULTS.uniqueValueDensity,
|
|
1900
1904
|
metaUniquenessMinJaccard: options?.rules?.metaUniquenessMinJaccard ?? DEFAULTS.metaUniquenessMinJaccard,
|
|
1901
1905
|
linkDepthMaxClicks: options?.rules?.linkDepthMaxClicks ?? DEFAULTS.linkDepthMaxClicks,
|
|
1902
1906
|
templateCoverageMinPages: options?.rules?.templateCoverageMinPages ?? DEFAULTS.templateCoverageMinPages,
|
|
@@ -2362,7 +2366,10 @@ export async function auditSource(source, options) {
|
|
|
2362
2366
|
const auditMode = options?.mode ?? "full";
|
|
2363
2367
|
// Site-wide rules (run once, outside group loop)
|
|
2364
2368
|
if (sitemapUrlSet && sitemapUrlSet.size > 0 && auditMode !== "diff") {
|
|
2365
|
-
const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet
|
|
2369
|
+
const sitemapFindings = sitemapCompletenessRule(parsedPages, sitemapUrlSet, {
|
|
2370
|
+
sampled: isSampledAudit,
|
|
2371
|
+
normalizeUrlOptions,
|
|
2372
|
+
});
|
|
2366
2373
|
pushAll(allFindings, sitemapFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
2367
2374
|
if (robotsTxtContent) {
|
|
2368
2375
|
const robotsFindings = robotsComplianceRule(parsedPages, sitemapUrlSet, robotsTxtContent);
|
|
@@ -2388,6 +2395,7 @@ export async function auditSource(source, options) {
|
|
|
2388
2395
|
pushAll(allFindings, dataIdenticalFindings.map((f) => ({ ...f, ref: f.ref ?? RULE_REFERENCES[f.ruleId] })));
|
|
2389
2396
|
}
|
|
2390
2397
|
}
|
|
2398
|
+
const derivedEntityPatterns = options?.autoEntityMask === false ? [] : deriveEntityPatterns(parsedPagesAll);
|
|
2391
2399
|
for (const [groupName, groupPages] of classified) {
|
|
2392
2400
|
if (groupPages.length === 0)
|
|
2393
2401
|
continue;
|
|
@@ -2396,7 +2404,7 @@ export async function auditSource(source, options) {
|
|
|
2396
2404
|
continue;
|
|
2397
2405
|
const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
|
|
2398
2406
|
const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
|
|
2399
|
-
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full",
|
|
2407
|
+
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, [...DEFAULT_ENTITY_PATTERNS, ...derivedEntityPatterns], groupConfig?.overrides, options?.mode ?? "full",
|
|
2400
2408
|
// 2026-05-06 calibration fix: pinnedUrls mode fetches a hand-picked subset
|
|
2401
2409
|
// of the full site — the link graph across those pages is structurally
|
|
2402
2410
|
// incomplete, just like a random-sampled crawl. Pass `true` so
|
|
@@ -2502,12 +2510,31 @@ export async function auditSource(source, options) {
|
|
|
2502
2510
|
const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification, parsedPages.length);
|
|
2503
2511
|
const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
|
|
2504
2512
|
const issues = bucketIssues(enriched.findings);
|
|
2513
|
+
// Resolve a domain-authority score to moderate the verdict. Explicit option
|
|
2514
|
+
// wins; otherwise a provider (custom, or default OPR composite). null/absent
|
|
2515
|
+
// → no moderation (fail-safe).
|
|
2516
|
+
let resolvedAuthorityScore = options?.authorityScore;
|
|
2517
|
+
let resolvedAuthorityDomain;
|
|
2518
|
+
if (resolvedAuthorityScore === undefined) {
|
|
2519
|
+
const provider = options?.authorityProvider ??
|
|
2520
|
+
new CompositeAuthorityProvider([new OpenPageRankProvider(options?.openPageRankApiKey ?? "")]);
|
|
2521
|
+
try {
|
|
2522
|
+
const host = new URL(source.startsWith("http") ? source : `https://${source}`).hostname;
|
|
2523
|
+
resolvedAuthorityDomain = registrableDomain(host);
|
|
2524
|
+
const a = await provider.authorityFor(resolvedAuthorityDomain);
|
|
2525
|
+
if (a !== null)
|
|
2526
|
+
resolvedAuthorityScore = a;
|
|
2527
|
+
}
|
|
2528
|
+
catch {
|
|
2529
|
+
/* source is a local dir / unparseable → no authority */
|
|
2530
|
+
}
|
|
2531
|
+
}
|
|
2505
2532
|
// v0.6.0 — spec §15.1: site verdict comes from siteVerdictFromTemplates when
|
|
2506
2533
|
// ≥1 template has ≥5% coverage. Falls back to the legacy risk-ladder verdict
|
|
2507
2534
|
// when no template meets the threshold (single-template sites, `unclear`/
|
|
2508
2535
|
// `small-marketing` classifications, or the long-tail-only case).
|
|
2509
2536
|
// The `risk` score is intentionally unchanged — §15.1 governs verdict only.
|
|
2510
|
-
const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk),
|
|
2537
|
+
const legacyVerdict = shiftVerdictForAuthority(verdictForRisk(risk), resolvedAuthorityScore);
|
|
2511
2538
|
const templateVerdict = siteVerdictFromTemplates(siteTemplates);
|
|
2512
2539
|
const verdict = templateVerdict !== null ? templateVerdict : legacyVerdict;
|
|
2513
2540
|
const headline = buildHeadline(bucketCounts);
|
|
@@ -2546,6 +2573,9 @@ export async function auditSource(source, options) {
|
|
|
2546
2573
|
auditedUrls: parsedPages.length > 0
|
|
2547
2574
|
? [...parsedPages.map((p) => p.url)].sort()
|
|
2548
2575
|
: undefined,
|
|
2576
|
+
...(resolvedAuthorityScore !== undefined
|
|
2577
|
+
? { authority: { score: resolvedAuthorityScore, domain: resolvedAuthorityDomain ?? "" } }
|
|
2578
|
+
: {}),
|
|
2549
2579
|
};
|
|
2550
2580
|
// Partial-report flag: the backpressure watchdog aborted mid-crawl and we
|
|
2551
2581
|
// salvaged whatever pages had been fetched. Consumers MUST treat coverage as
|