@pseolint/core 0.6.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +3 -3
  2. package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
  3. package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
  4. package/dist/algorithms/authority/commoncrawl.js +17 -0
  5. package/dist/algorithms/authority/commoncrawl.js.map +1 -0
  6. package/dist/algorithms/authority/openpagerank.d.ts +19 -0
  7. package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
  8. package/dist/algorithms/authority/openpagerank.js +42 -0
  9. package/dist/algorithms/authority/openpagerank.js.map +1 -0
  10. package/dist/algorithms/authority/provider.d.ts +16 -0
  11. package/dist/algorithms/authority/provider.d.ts.map +1 -0
  12. package/dist/algorithms/authority/provider.js +24 -0
  13. package/dist/algorithms/authority/provider.js.map +1 -0
  14. package/dist/algorithms/auto-entity-mask.d.ts +19 -0
  15. package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
  16. package/dist/algorithms/auto-entity-mask.js +102 -0
  17. package/dist/algorithms/auto-entity-mask.js.map +1 -0
  18. package/dist/algorithms/example-regions.d.ts +22 -0
  19. package/dist/algorithms/example-regions.d.ts.map +1 -0
  20. package/dist/algorithms/example-regions.js +32 -0
  21. package/dist/algorithms/example-regions.js.map +1 -0
  22. package/dist/algorithms/fact-extraction.d.ts +46 -0
  23. package/dist/algorithms/fact-extraction.d.ts.map +1 -0
  24. package/dist/algorithms/fact-extraction.js +223 -0
  25. package/dist/algorithms/fact-extraction.js.map +1 -0
  26. package/dist/auditor.d.ts.map +1 -1
  27. package/dist/auditor.js +55 -9
  28. package/dist/auditor.js.map +1 -1
  29. package/dist/enrich-findings.d.ts.map +1 -1
  30. package/dist/enrich-findings.js +9 -8
  31. package/dist/enrich-findings.js.map +1 -1
  32. package/dist/index.d.ts +11 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +9 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/origin-preflight.d.ts +89 -0
  37. package/dist/origin-preflight.d.ts.map +1 -0
  38. package/dist/origin-preflight.js +93 -0
  39. package/dist/origin-preflight.js.map +1 -0
  40. package/dist/rule-references.d.ts.map +1 -1
  41. package/dist/rule-references.js +1 -0
  42. package/dist/rule-references.js.map +1 -1
  43. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  44. package/dist/rules/aeo/citable-facts.js +4 -33
  45. package/dist/rules/aeo/citable-facts.js.map +1 -1
  46. package/dist/rules/aeo/crawler-access.d.ts +14 -0
  47. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  48. package/dist/rules/aeo/crawler-access.js +96 -15
  49. package/dist/rules/aeo/crawler-access.js.map +1 -1
  50. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  51. package/dist/rules/aeo/summary-bait.js +4 -3
  52. package/dist/rules/aeo/summary-bait.js.map +1 -1
  53. package/dist/rules/content/citation-coverage.d.ts +11 -0
  54. package/dist/rules/content/citation-coverage.d.ts.map +1 -0
  55. package/dist/rules/content/citation-coverage.js +43 -0
  56. package/dist/rules/content/citation-coverage.js.map +1 -0
  57. package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
  58. package/dist/rules/content/common-phrase-reuse.js +7 -2
  59. package/dist/rules/content/common-phrase-reuse.js.map +1 -1
  60. package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
  61. package/dist/rules/content/regurgitated-content.js +11 -2
  62. package/dist/rules/content/regurgitated-content.js.map +1 -1
  63. package/dist/rules/content/translation-no-op.d.ts.map +1 -1
  64. package/dist/rules/content/translation-no-op.js +5 -1
  65. package/dist/rules/content/translation-no-op.js.map +1 -1
  66. package/dist/rules/content/unique-value.d.ts +15 -1
  67. package/dist/rules/content/unique-value.d.ts.map +1 -1
  68. package/dist/rules/content/unique-value.js +46 -39
  69. package/dist/rules/content/unique-value.js.map +1 -1
  70. package/dist/rules/content/value-add.d.ts.map +1 -1
  71. package/dist/rules/content/value-add.js +3 -1
  72. package/dist/rules/content/value-add.js.map +1 -1
  73. package/dist/rules/links/cluster-connectivity.d.ts +7 -1
  74. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
  75. package/dist/rules/links/cluster-connectivity.js +8 -2
  76. package/dist/rules/links/cluster-connectivity.js.map +1 -1
  77. package/dist/rules/links/orphan-pages.d.ts +8 -1
  78. package/dist/rules/links/orphan-pages.d.ts.map +1 -1
  79. package/dist/rules/links/orphan-pages.js +10 -1
  80. package/dist/rules/links/orphan-pages.js.map +1 -1
  81. package/dist/rules/schema/consistency.d.ts.map +1 -1
  82. package/dist/rules/schema/consistency.js +33 -21
  83. package/dist/rules/schema/consistency.js.map +1 -1
  84. package/dist/rules/scope.d.ts.map +1 -1
  85. package/dist/rules/scope.js +1 -0
  86. package/dist/rules/scope.js.map +1 -1
  87. package/dist/rules/spam/entity-swap.d.ts.map +1 -1
  88. package/dist/rules/spam/entity-swap.js +51 -9
  89. package/dist/rules/spam/entity-swap.js.map +1 -1
  90. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  91. package/dist/rules/spam/thin-content.js +5 -1
  92. package/dist/rules/spam/thin-content.js.map +1 -1
  93. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
  94. package/dist/rules/tech/canonical-consistency.js +144 -28
  95. package/dist/rules/tech/canonical-consistency.js.map +1 -1
  96. package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
  97. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
  98. package/dist/rules/tech/sitemap-completeness.js +21 -5
  99. package/dist/rules/tech/sitemap-completeness.js.map +1 -1
  100. package/dist/rules/tech/soft-404.d.ts +11 -0
  101. package/dist/rules/tech/soft-404.d.ts.map +1 -1
  102. package/dist/rules/tech/soft-404.js +47 -5
  103. package/dist/rules/tech/soft-404.js.map +1 -1
  104. package/dist/site-classifier.d.ts.map +1 -1
  105. package/dist/site-classifier.js +1 -0
  106. package/dist/site-classifier.js.map +1 -1
  107. package/dist/template-detection.d.ts +1 -0
  108. package/dist/template-detection.d.ts.map +1 -1
  109. package/dist/template-detection.js +1 -1
  110. package/dist/template-detection.js.map +1 -1
  111. package/dist/types.d.ts +22 -1
  112. package/dist/types.d.ts.map +1 -1
  113. package/package.json +17 -1
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # @pseolint/core
2
2
 
3
- > Programmatic SEO audit engine — 45 rules, surfaced per-template, on every monitored release.
3
+ > Programmatic SEO audit engine — 44 rules, surfaced per-template, on every monitored release.
4
4
 
5
- The core engine behind [pseolint](https://www.npmjs.com/package/pseolint) v0.6.2. Use this package to embed pSEO auditing into your own tools, CI pipelines, or SaaS products.
5
+ The core engine behind [pseolint](https://www.npmjs.com/package/pseolint) v0.7.0. Use this package to embed pSEO auditing into your own tools, CI pipelines, or SaaS products.
6
6
 
7
7
  ## Install
8
8
 
@@ -34,7 +34,7 @@ for (const t of result.templates) {
34
34
 
35
35
  ## What It Checks
36
36
 
37
- 45 rules grouped into 4 scoring super-categories (v0.4): **Integrity** (spam + content + cannibal, weight 0.50), **Discoverability** (links + tech, 0.20), **Citation** (aeo + schema, 0.25), **Data** (0.05). Source-tree namespaces remain `spam/*`, `aeo/*`, etc. for stable rule IDs.
37
+ 44 rules grouped into 4 scoring super-categories (v0.4): **Integrity** (spam + content + cannibal, weight 0.50), **Discoverability** (links + tech, 0.20), **Citation** (aeo + schema, 0.25), **Data** (0.05). Source-tree namespaces remain `spam/*`, `aeo/*`, etc. for stable rule IDs.
38
38
 
39
39
  - **Spam / SpamBrain risk** (8) — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern (cluster-collapsed since v0.5.2)
40
40
  - **Technical SEO** (9) — canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, hreflang reciprocity, robots-sitemap presence, **og-completeness** (v0.5.2)
@@ -0,0 +1,13 @@
1
+ import type { AuthorityProvider } from "./provider.js";
2
+ /**
3
+ * Authority from a pre-processed Common Crawl host-webgraph table
4
+ * (registrable domain -> harmonic-centrality rank normalized to 0–100).
5
+ * Owned/permissive data (CC license; attribution courtesy). The table is built
6
+ * offline (gated); this provider is a pure lookup. Empty table -> null.
7
+ */
8
+ export declare class CommonCrawlProvider implements AuthorityProvider {
9
+ private readonly table;
10
+ constructor(table: ReadonlyMap<string, number>);
11
+ authorityFor(domain: string): Promise<number | null>;
12
+ }
13
+ //# sourceMappingURL=commoncrawl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"commoncrawl.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/commoncrawl.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAEvD;;;;;GAKG;AACH,qBAAa,mBAAoB,YAAW,iBAAiB;IAC/C,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC;IAEzD,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAI3D"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Authority from a pre-processed Common Crawl host-webgraph table
3
+ * (registrable domain -> harmonic-centrality rank normalized to 0–100).
4
+ * Owned/permissive data (CC license; attribution courtesy). The table is built
5
+ * offline (gated); this provider is a pure lookup. Empty table -> null.
6
+ */
7
+ export class CommonCrawlProvider {
8
+ table;
9
+ constructor(table) {
10
+ this.table = table;
11
+ }
12
+ async authorityFor(domain) {
13
+ const v = this.table.get(domain);
14
+ return typeof v === "number" && Number.isFinite(v) ? v : null;
15
+ }
16
+ }
17
+ //# sourceMappingURL=commoncrawl.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"commoncrawl.js","sourceRoot":"","sources":["../../../src/algorithms/authority/commoncrawl.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,MAAM,OAAO,mBAAmB;IACD;IAA7B,YAA6B,KAAkC;QAAlC,UAAK,GAAL,KAAK,CAA6B;IAAG,CAAC;IAEnE,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACjC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChE,CAAC;CACF"}
@@ -0,0 +1,19 @@
1
+ import type { AuthorityProvider } from "./provider.js";
2
+ type FetchFn = (url: string, init?: {
3
+ headers?: Record<string, string>;
4
+ }) => Promise<Response>;
5
+ /**
6
+ * Open PageRank authority source. Returns 0–100 (page_rank_decimal × 10).
7
+ * Requires a free API key; with no key it returns null (no calls). Any network
8
+ * or per-domain error → null. Attribution ("Open PageRank by DomCop") is the
9
+ * caller's responsibility when displaying.
10
+ */
11
+ export declare class OpenPageRankProvider implements AuthorityProvider {
12
+ private readonly apiKey;
13
+ private readonly fetchFn;
14
+ private readonly timeoutMs;
15
+ constructor(apiKey: string, fetchFn?: FetchFn, timeoutMs?: number);
16
+ authorityFor(domain: string): Promise<number | null>;
17
+ }
18
+ export {};
19
+ //# sourceMappingURL=openpagerank.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openpagerank.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/openpagerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAEvD,KAAK,OAAO,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;AAQ/F;;;;;GAKG;AACH,qBAAa,oBAAqB,YAAW,iBAAiB;IAE1D,OAAO,CAAC,QAAQ,CAAC,MAAM;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS;gBAFT,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,OAAgD,EACzD,SAAS,SAAO;IAG7B,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAoB3D"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Open PageRank authority source. Returns 0–100 (page_rank_decimal × 10).
3
+ * Requires a free API key; with no key it returns null (no calls). Any network
4
+ * or per-domain error → null. Attribution ("Open PageRank by DomCop") is the
5
+ * caller's responsibility when displaying.
6
+ */
7
+ export class OpenPageRankProvider {
8
+ apiKey;
9
+ fetchFn;
10
+ timeoutMs;
11
+ constructor(apiKey, fetchFn = globalThis.fetch, timeoutMs = 8000) {
12
+ this.apiKey = apiKey;
13
+ this.fetchFn = fetchFn;
14
+ this.timeoutMs = timeoutMs;
15
+ }
16
+ async authorityFor(domain) {
17
+ if (!this.apiKey)
18
+ return null;
19
+ const url = `https://openpagerank.com/api/v1.0/getPageRank?domains[]=${encodeURIComponent(domain)}`;
20
+ let res;
21
+ try {
22
+ res = await this.fetchFn(url, { headers: { "API-OPR": this.apiKey } });
23
+ }
24
+ catch {
25
+ return null;
26
+ }
27
+ if (!res.ok)
28
+ return null;
29
+ let body;
30
+ try {
31
+ body = (await res.json());
32
+ }
33
+ catch {
34
+ return null;
35
+ }
36
+ const entry = body.response?.find((e) => e.domain === domain) ?? body.response?.[0];
37
+ if (!entry || entry.status_code !== 200 || typeof entry.page_rank_decimal !== "number")
38
+ return null;
39
+ return Math.round(entry.page_rank_decimal * 10);
40
+ }
41
+ }
42
+ //# sourceMappingURL=openpagerank.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openpagerank.js","sourceRoot":"","sources":["../../../src/algorithms/authority/openpagerank.ts"],"names":[],"mappings":"AAUA;;;;;GAKG;AACH,MAAM,OAAO,oBAAoB;IAEZ;IACA;IACA;IAHnB,YACmB,MAAc,EACd,UAAmB,UAAU,CAAC,KAA2B,EACzD,YAAY,IAAI;QAFhB,WAAM,GAAN,MAAM,CAAQ;QACd,YAAO,GAAP,OAAO,CAAkD;QACzD,cAAS,GAAT,SAAS,CAAO;IAChC,CAAC;IAEJ,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QAC9B,MAAM,GAAG,GAAG,2DAA2D,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC;QACpG,IAAI,GAAa,CAAC;QAClB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QACzB,IAAI,IAA+B,CAAC;QACpC,IAAI,CAAC;YACH,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAA8B,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;QACpF,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,WAAW,KAAK,GAAG,IAAI,OAAO,KAAK,CAAC,iBAAiB,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC;QACpG,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,iBAAiB,GAAG,EAAE,CAAC,CAAC;IAClD,CAAC;CACF"}
@@ -0,0 +1,16 @@
1
+ /** A source of domain authority on a 0–100 scale (higher = more authoritative). */
2
+ export interface AuthorityProvider {
3
+ /** Authority for a registrable domain; null when unknown/unavailable. */
4
+ authorityFor(domain: string): Promise<number | null>;
5
+ }
6
+ /**
7
+ * Combines several providers. Returns the MAX non-null score (any source
8
+ * vouching for authority is sufficient evidence). All-null → null → callers
9
+ * apply no moderation (fail-safe). A source that throws is treated as null.
10
+ */
11
+ export declare class CompositeAuthorityProvider implements AuthorityProvider {
12
+ private readonly sources;
13
+ constructor(sources: ReadonlyArray<AuthorityProvider>);
14
+ authorityFor(domain: string): Promise<number | null>;
15
+ }
16
+ //# sourceMappingURL=provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../src/algorithms/authority/provider.ts"],"names":[],"mappings":"AAAA,mFAAmF;AACnF,MAAM,WAAW,iBAAiB;IAChC,yEAAyE;IACzE,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CACtD;AAED;;;;GAIG;AACH,qBAAa,0BAA2B,YAAW,iBAAiB;IACtD,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,aAAa,CAAC,iBAAiB,CAAC;IAEhE,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;CAa3D"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Combines several providers. Returns the MAX non-null score (any source
3
+ * vouching for authority is sufficient evidence). All-null → null → callers
4
+ * apply no moderation (fail-safe). A source that throws is treated as null.
5
+ */
6
+ export class CompositeAuthorityProvider {
7
+ sources;
8
+ constructor(sources) {
9
+ this.sources = sources;
10
+ }
11
+ async authorityFor(domain) {
12
+ const results = await Promise.all(this.sources.map(async (s) => {
13
+ try {
14
+ return await s.authorityFor(domain);
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ }));
20
+ const vals = results.filter((v) => v !== null && Number.isFinite(v));
21
+ return vals.length ? Math.max(...vals) : null;
22
+ }
23
+ }
24
+ //# sourceMappingURL=provider.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provider.js","sourceRoot":"","sources":["../../../src/algorithms/authority/provider.ts"],"names":[],"mappings":"AAMA;;;;GAIG;AACH,MAAM,OAAO,0BAA0B;IACR;IAA7B,YAA6B,OAAyC;QAAzC,YAAO,GAAP,OAAO,CAAkC;IAAG,CAAC;IAE1E,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;YAC3B,IAAI,CAAC;gBACH,OAAO,MAAM,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QACF,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAClF,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChD,CAAC;CACF"}
@@ -0,0 +1,19 @@
1
+ import type { EntityMaskPattern, ParsedPage } from "../types.js";
2
+ export interface DeriveOptions {
3
+ /** Only derive from URL-template clusters with at least this many siblings. */
4
+ minClusterSize?: number;
5
+ /** Ignore tokens shorter than this. */
6
+ minTokenLength?: number;
7
+ /** Placeholder substituted for masked entities. */
8
+ placeholder?: string;
9
+ /** Enable URL-slug token derivation. */
10
+ urlSlug?: boolean;
11
+ /** Enable capitalized-content-token derivation. */
12
+ contentDiff?: boolean;
13
+ /** Hard cap on total derived tokens (over-masking guard). */
14
+ maxTokens?: number;
15
+ }
16
+ type MaskPage = Pick<ParsedPage, "url" | "contentText">;
17
+ export declare function deriveEntityPatterns(pages: ReadonlyArray<MaskPage>, opts?: DeriveOptions): EntityMaskPattern[];
18
+ export {};
19
+ //# sourceMappingURL=auto-entity-mask.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auto-entity-mask.d.ts","sourceRoot":"","sources":["../../src/algorithms/auto-entity-mask.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGjE,MAAM,WAAW,aAAa;IAC5B,+EAA+E;IAC/E,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uCAAuC;IACvC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mDAAmD;IACnD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mDAAmD;IACnD,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,KAAK,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,KAAK,GAAG,aAAa,CAAC,CAAC;AA4DxD,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,aAAa,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,EAAE,aAAa,GAAG,iBAAiB,EAAE,CA0C9G"}
@@ -0,0 +1,102 @@
1
+ import { normalizePathToTemplate } from "../template-detection.js";
2
+ /** Tiny stopword set so varying function-words never become entities. */
3
+ const STOPWORDS = new Set([
4
+ "the", "and", "for", "with", "from", "this", "that", "your", "our", "are",
5
+ "you", "all", "new", "best", "top", "how", "what", "why", "who", "about",
6
+ "page", "home", "more", "get", "buy", "free", "online", "now",
7
+ ]);
8
+ function pathOf(url) {
9
+ try {
10
+ return new URL(url).pathname;
11
+ }
12
+ catch {
13
+ return url.split("?")[0].split("#")[0];
14
+ }
15
+ }
16
+ function rawSegments(path) {
17
+ return path.replace(/^\/+|\/+$/g, "").split("/").filter(Boolean);
18
+ }
19
+ /** Tokens from `:slug` path segments only (numeric `:n` segments are not name-entities). */
20
+ function urlSlugTokens(path) {
21
+ const tmplSegs = normalizePathToTemplate(path).replace(/^\//, "").split("/");
22
+ const raw = rawSegments(path);
23
+ const out = [];
24
+ tmplSegs.forEach((t, i) => {
25
+ if (t === ":slug" && raw[i]) {
26
+ for (const tok of raw[i].split(/[-_]/))
27
+ out.push(tok.toLowerCase());
28
+ }
29
+ });
30
+ return out;
31
+ }
32
+ const CONTENT_ENTITY_RE = /\b[A-Z][a-zA-Z]{2,}\b/g;
33
+ function contentEntityTokens(text) {
34
+ return (text.match(CONTENT_ENTITY_RE) ?? []).map((t) => t.toLowerCase());
35
+ }
36
+ /**
37
+ * Tokens whose presence VARIES across cluster members: present in at least one
38
+ * member but not in all. Constant template vocabulary (in every member) is
39
+ * excluded; per-page entities (in a subset) are kept.
40
+ */
41
+ function varyingTokens(perMember, minLen) {
42
+ const memberSets = perMember.map((toks) => new Set(toks.filter((t) => t.length >= minLen && !STOPWORDS.has(t))));
43
+ const presence = new Map();
44
+ for (const s of memberSets)
45
+ for (const t of s)
46
+ presence.set(t, (presence.get(t) ?? 0) + 1);
47
+ const n = memberSets.length;
48
+ const out = new Set();
49
+ for (const [t, c] of presence)
50
+ if (c >= 1 && c < n)
51
+ out.add(t);
52
+ return out;
53
+ }
54
+ function escapeRegex(s) {
55
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
56
+ }
57
+ export function deriveEntityPatterns(pages, opts) {
58
+ const minCluster = opts?.minClusterSize ?? 3;
59
+ const minLen = opts?.minTokenLength ?? 3;
60
+ const placeholder = opts?.placeholder ?? "[ENTITY]";
61
+ const useUrl = opts?.urlSlug ?? true;
62
+ const useContent = opts?.contentDiff ?? true;
63
+ const maxTokens = opts?.maxTokens ?? 500;
64
+ // Cluster pages by normalized URL template.
65
+ const clusters = new Map();
66
+ for (const p of pages) {
67
+ const tmpl = normalizePathToTemplate(pathOf(p.url));
68
+ const bucket = clusters.get(tmpl);
69
+ if (bucket)
70
+ bucket.push(p);
71
+ else
72
+ clusters.set(tmpl, [p]);
73
+ }
74
+ const entities = new Set();
75
+ for (const members of clusters.values()) {
76
+ if (members.length < minCluster)
77
+ continue;
78
+ if (useUrl) {
79
+ for (const t of varyingTokens(members.map((m) => urlSlugTokens(pathOf(m.url))), minLen))
80
+ entities.add(t);
81
+ }
82
+ if (useContent) {
83
+ for (const t of varyingTokens(members.map((m) => contentEntityTokens(m.contentText ?? "")), minLen))
84
+ entities.add(t);
85
+ }
86
+ }
87
+ const tokens = [...entities].sort().slice(0, maxTokens);
88
+ if (tokens.length === 0)
89
+ return [];
90
+ const CHUNK = 200;
91
+ const patterns = [];
92
+ for (let i = 0; i < tokens.length; i += CHUNK) {
93
+ // Each token is metacharacter-escaped (escapeRegex) and joined into a bounded,
94
+ // backtracking-free alternation `\b(?:a|b|c)\b` — no nested quantifiers, so this
95
+ // dynamic RegExp is ReDoS-safe by construction.
96
+ const alt = tokens.slice(i, i + CHUNK).map(escapeRegex).join("|");
97
+ // nosemgrep: javascript.lang.security.audit.detect-non-literal-regexp.detect-non-literal-regexp
98
+ patterns.push({ placeholder, pattern: new RegExp(`\\b(?:${alt})\\b`, "gi") });
99
+ }
100
+ return patterns;
101
+ }
102
+ //# sourceMappingURL=auto-entity-mask.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auto-entity-mask.js","sourceRoot":"","sources":["../../src/algorithms/auto-entity-mask.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAmBnE,yEAAyE;AACzE,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK;IACzE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO;IACxE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK;CAC9D,CAAC,CAAC;AAEH,SAAS,MAAM,CAAC,GAAW;IACzB,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACnE,CAAC;AAED,4FAA4F;AAC5F,SAAS,aAAa,CAAC,IAAY;IACjC,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7E,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACxB,IAAI,CAAC,KAAK,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,KAAK,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;gBAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;QACtE,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,iBAAiB,GAAG,wBAAwB,CAAC;AACnD,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AAC3E,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,SAAqB,EAAE,MAAc;IAC1D,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAC9B,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/E,CAAC;IACF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,KAAK,MAAM,CAAC,IAAI,UAAU;QAAE,KAAK,MAAM,CAAC,IAAI,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3F,MAAM,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAC5B,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,QAAQ;QAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC/D,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,WAAW,CAAC,CAAS;IAC5B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAA8B,EAAE,IAAoB;IACvF,MAAM,UAAU,GAAG,IAAI,EAAE,cAAc,IAAI,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,IAAI,EAAE,cAAc,IAAI,CAAC,CAAC;IACzC,MAAM,WAAW,GAAG,IAAI,EAAE,WAAW,IAAI,UAAU,CAAC;IACpD,MAAM,MAAM,GAAG,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,EAAE,WAAW,IAAI,IAAI,CAAC;IAC7C,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,GAAG,CAAC;IAEzC,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,uBAAuB,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,MAAM;YAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YACtB,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,OAAO,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACxC,IAAI,OAAO,CAAC,MAAM,GAAG,UAAU;YAAE,SAAS;QAC1C,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC;gBAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3G,CAAC;QACD,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,MAAM,CAAC,IAAI,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC;gBAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACvH,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IACxD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,MAAM,KAAK,GAAG,GAAG,CAAC;IAClB,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;QAC9C,+EAA+E;QAC/E,iFAAiF;QACjF,gDAAgD;QAChD,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,gGAAgG;QAChG,QAAQ,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,OAAO,EAAE,IAAI,MAAM,CAAC,SAAS,GAAG,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,22 @@
1
+ import type { ParsedPage } from "../types.js";
2
+ /**
3
+ * Selector for "quoted example / code / sample" regions — markup that documents
4
+ * a pattern rather than expressing the page's own editorial voice.
5
+ *
6
+ * Content-quality heuristics that judge the page's OWN prose (cliché density,
7
+ * regurgitated-content signals) exclude these regions so a docs / explainer
8
+ * page that *quotes* a bad pattern to teach it isn't penalised for describing
9
+ * it. A real spam page puts clichés in flowing prose, not inside `<code>` or a
10
+ * `<blockquote>` example box — so the exclusion narrows false positives without
11
+ * opening a meaningful evasion path for a low-confidence proxy.
12
+ */
13
+ export declare const EXAMPLE_REGION_SELECTOR = "pre, code, blockquote, figure, samp, kbd, [data-example]";
14
+ /**
15
+ * Page body text with site chrome AND quoted-example regions removed.
16
+ *
17
+ * Falls back to the pre-parsed `contentText` when no html is available (e.g.
18
+ * synthetic unit-test pages). The fallback keeps example text in place — it's a
19
+ * best-effort path; the html path is what production audits exercise.
20
+ */
21
+ export declare function proseTextExcludingExamples(page: ParsedPage): string;
22
+ //# sourceMappingURL=example-regions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"example-regions.d.ts","sourceRoot":"","sources":["../../src/algorithms/example-regions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE9C;;;;;;;;;;GAUG;AACH,eAAO,MAAM,uBAAuB,6DACwB,CAAC;AAI7D;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CASnE"}
@@ -0,0 +1,32 @@
1
+ import { load } from "cheerio";
2
+ /**
3
+ * Selector for "quoted example / code / sample" regions — markup that documents
4
+ * a pattern rather than expressing the page's own editorial voice.
5
+ *
6
+ * Content-quality heuristics that judge the page's OWN prose (cliché density,
7
+ * regurgitated-content signals) exclude these regions so a docs / explainer
8
+ * page that *quotes* a bad pattern to teach it isn't penalised for describing
9
+ * it. A real spam page puts clichés in flowing prose, not inside `<code>` or a
10
+ * `<blockquote>` example box — so the exclusion narrows false positives without
11
+ * opening a meaningful evasion path for a low-confidence proxy.
12
+ */
13
+ export const EXAMPLE_REGION_SELECTOR = "pre, code, blockquote, figure, samp, kbd, [data-example]";
14
+ const CHROME_SELECTOR = "header, footer, nav, script, style, noscript";
15
+ /**
16
+ * Page body text with site chrome AND quoted-example regions removed.
17
+ *
18
+ * Falls back to the pre-parsed `contentText` when no html is available (e.g.
19
+ * synthetic unit-test pages). The fallback keeps example text in place — it's a
20
+ * best-effort path; the html path is what production audits exercise.
21
+ */
22
+ export function proseTextExcludingExamples(page) {
23
+ if (page.html && page.html.trim()) {
24
+ const $ = load(page.html);
25
+ $(`${CHROME_SELECTOR}, ${EXAMPLE_REGION_SELECTOR}`).remove();
26
+ const body = $("body");
27
+ const text = body.length ? body.text() : $.root().text();
28
+ return text.replace(/\s+/g, " ").trim();
29
+ }
30
+ return page.contentText ?? "";
31
+ }
32
+ //# sourceMappingURL=example-regions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"example-regions.js","sourceRoot":"","sources":["../../src/algorithms/example-regions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAG/B;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAClC,0DAA0D,CAAC;AAE7D,MAAM,eAAe,GAAG,8CAA8C,CAAC;AAEvE;;;;;;GAMG;AACH,MAAM,UAAU,0BAA0B,CAAC,IAAgB;IACzD,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC,CAAC,GAAG,eAAe,KAAK,uBAAuB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;QAC7D,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACzD,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;AAChC,CAAC"}
@@ -0,0 +1,46 @@
1
+ import type { EntityMaskPattern, ParsedPage } from "../types.js";
2
+ export type FactKind = "money" | "percent" | "timeframe" | "date" | "isoDate" | "form" | "ratio" | "measurement";
3
+ export interface FactSpan {
4
+ value: string;
5
+ kind: FactKind;
6
+ }
7
+ export interface NamedEntity {
8
+ value: string;
9
+ source: "proper-noun" | "cue-word" | "json-ld";
10
+ type?: "organization" | "person" | "product" | "law" | "standard" | "place" | "other";
11
+ }
12
+ export interface Citation {
13
+ href: string;
14
+ domain: string;
15
+ authority: "authoritative" | "general";
16
+ reason?: "tld" | "allowlist";
17
+ }
18
+ export interface GroundedClaim {
19
+ sentence: string;
20
+ facts: string[];
21
+ citations: string[];
22
+ }
23
+ export interface PageFacts {
24
+ /** EXACTLY today's extractRawFacts() output (run on entity-masked text). Frozen. */
25
+ citableFacts: string[];
26
+ measurements: FactSpan[];
27
+ namedEntities: NamedEntity[];
28
+ citations: Citation[];
29
+ groundedClaims: GroundedClaim[];
30
+ }
31
+ export declare function extractCitableFacts(text: string): string[];
32
+ export declare function extractMeasurements(maskedText: string): FactSpan[];
33
+ export declare function extractNamedEntities(maskedText: string, jsonLd?: unknown[]): NamedEntity[];
34
+ export declare const DEFAULT_CITATION_ALLOWLIST: readonly string[];
35
+ export declare function registrableDomain(host: string): string;
36
+ export declare function classifyCitations(resolvedHrefs: readonly string[], pageUrl: string, allowlist?: readonly string[]): Citation[];
37
+ export declare function hasAuthoritativeCitation(resolvedHrefs: readonly string[], pageUrl: string, allowlist?: readonly string[]): boolean;
38
+ /**
39
+ * Deterministic approximation of "a verifiable claim": a block (<p>/<li>) that
40
+ * contains a statistic AND an outbound citation. Approximated at block level,
41
+ * not exact sentence level — documented limitation. Detects co-occurrence, not
42
+ * semantic truth. Consume at `speculative` confidence.
43
+ */
44
+ export declare function extractGroundedClaims(html: string, pageUrl: string, allowlist?: readonly string[]): GroundedClaim[];
45
+ export declare function extractPageFacts(page: Pick<ParsedPage, "url" | "contentText" | "html" | "resolvedHrefs" | "jsonLd">, entityPatterns: EntityMaskPattern[], allowlist?: readonly string[]): PageFacts;
46
+ //# sourceMappingURL=fact-extraction.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fact-extraction.d.ts","sourceRoot":"","sources":["../../src/algorithms/fact-extraction.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,MAAM,QAAQ,GAChB,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,GAC/D,OAAO,GAAG,aAAa,CAAC;AAE5B,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,QAAQ,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;IAC/C,IAAI,CAAC,EAAE,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,KAAK,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,CAAC;CACvF;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,eAAe,GAAG,SAAS,CAAC;IACvC,MAAM,CAAC,EAAE,KAAK,GAAG,WAAW,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,oFAAoF;IACpF,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,YAAY,EAAE,QAAQ,EAAE,CAAC;IACzB,aAAa,EAAE,WAAW,EAAE,CAAC;IAC7B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,cAAc,EAAE,aAAa,EAAE,CAAC;CACjC;AAqBD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAQ1D;AAWD,wBAAgB,mBAAmB,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,EAAE,CAclE;AA4BD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,EAAO,GAAG,WAAW,EAAE,CAe9F;AAED,eAAO,MAAM,0BAA0B,EAAE,SAAS,MAAM,EAUvD,CAAC;AAWF,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMtD;AAMD,wBAAgB,iBAAiB,CAC/B,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,QAAQ,EAAE,CAqBZ;AAED,wBAAgB,wBAAwB,CACtC,aAAa,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,OAAO,CAET;AAYD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,aAAa,EAAE,CA0BjB;AAED,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,GAAG,aAAa,GAAG,MAAM,GAAG,eAAe,GAAG,QAAQ,CAAC,EACnF,cAAc,EAAE,iBAAiB,EAAE,EACnC,SAAS,GAAE,SAAS,MAAM,EAA+B,GACxD,SAAS,CASX"}