@pseolint/core 0.5.7 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/auditor.d.ts.map +1 -1
  2. package/dist/auditor.js +86 -7
  3. package/dist/auditor.js.map +1 -1
  4. package/dist/per-template-scoring.d.ts +30 -0
  5. package/dist/per-template-scoring.d.ts.map +1 -0
  6. package/dist/per-template-scoring.js +322 -0
  7. package/dist/per-template-scoring.js.map +1 -0
  8. package/dist/rules/content/regurgitated-content.d.ts +3 -0
  9. package/dist/rules/content/regurgitated-content.d.ts.map +1 -0
  10. package/dist/rules/content/regurgitated-content.js +112 -0
  11. package/dist/rules/content/regurgitated-content.js.map +1 -0
  12. package/dist/rules/content/translation-no-op.d.ts +11 -0
  13. package/dist/rules/content/translation-no-op.d.ts.map +1 -0
  14. package/dist/rules/content/translation-no-op.js +101 -0
  15. package/dist/rules/content/translation-no-op.js.map +1 -0
  16. package/dist/rules/content/value-add.d.ts +10 -0
  17. package/dist/rules/content/value-add.d.ts.map +1 -0
  18. package/dist/rules/content/value-add.js +117 -0
  19. package/dist/rules/content/value-add.js.map +1 -0
  20. package/dist/scrape-strategy.d.ts +19 -0
  21. package/dist/scrape-strategy.d.ts.map +1 -1
  22. package/dist/scrape-strategy.js +151 -1
  23. package/dist/scrape-strategy.js.map +1 -1
  24. package/dist/site-classifier.d.ts +49 -0
  25. package/dist/site-classifier.d.ts.map +1 -1
  26. package/dist/site-classifier.js +68 -0
  27. package/dist/site-classifier.js.map +1 -1
  28. package/dist/template-detection.d.ts +51 -0
  29. package/dist/template-detection.d.ts.map +1 -0
  30. package/dist/template-detection.js +139 -0
  31. package/dist/template-detection.js.map +1 -0
  32. package/dist/types.d.ts +60 -0
  33. package/dist/types.d.ts.map +1 -1
  34. package/package.json +1 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AA0GvE;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
@@ -0,0 +1,117 @@
1
+ const RULE_ID = "content/value-add";
2
+ const EEAT_HTML_PATTERNS = [
3
+ /last\s+updated/i,
4
+ /last\s+modified/i,
5
+ /reviewed\s+by/i,
6
+ /\bsources:/i,
7
+ /\breferences:/i,
8
+ ];
9
+ function countEeatCategories(page) {
10
+ let count = 0;
11
+ if (page.resolvedHrefs.some((h) => /\/about\b/i.test(h)))
12
+ count += 1;
13
+ const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
14
+ if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink)
15
+ count += 1;
16
+ if (page.publishedDate)
17
+ count += 1;
18
+ if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)))
19
+ count += 1;
20
+ return count;
21
+ }
22
+ function computeSignals(page, allFindings) {
23
+ const pageFindings = allFindings.filter((f) => f.pageUrl === page.url);
24
+ // Originality: 1.0 if regurgitated-content doesn't fire, 0.0 if it does
25
+ const hasRegurgitated = pageFindings.some((f) => f.ruleId === "content/regurgitated-content");
26
+ const originality = hasRegurgitated ? 0.0 : 1.0;
27
+ // Freshness: based on aeo/freshness-signals severity
28
+ const freshnessFinding = pageFindings.find((f) => f.ruleId === "aeo/freshness-signals");
29
+ let freshness;
30
+ if (!freshnessFinding) {
31
+ freshness = 1.0;
32
+ }
33
+ else if (freshnessFinding.severity === "warning") {
34
+ freshness = 0.5;
35
+ }
36
+ else {
37
+ freshness = 0.0;
38
+ }
39
+ // Citable facts: based on aeo/citable-facts severity
40
+ const factsFinding = pageFindings.find((f) => f.ruleId === "aeo/citable-facts");
41
+ let facts;
42
+ if (!factsFinding) {
43
+ facts = 1.0;
44
+ }
45
+ else if (factsFinding.severity === "info" || factsFinding.severity === "warning") {
46
+ facts = 0.5;
47
+ }
48
+ else {
49
+ facts = 0.0;
50
+ }
51
+ // E-E-A-T: based on signal count
52
+ const eeatCount = countEeatCategories(page);
53
+ let eeat;
54
+ if (eeatCount >= 4) {
55
+ eeat = 1.0;
56
+ }
57
+ else if (eeatCount >= 2) {
58
+ eeat = 0.5;
59
+ }
60
+ else {
61
+ eeat = 0.0;
62
+ }
63
+ // Translation: 1.0 unless translation-no-op lists this page
64
+ const hasTranslationNoOp = allFindings.some((f) => f.ruleId === "content/translation-no-op" &&
65
+ (f.pageUrl === page.url || (f.relatedUrls ?? []).includes(page.url)));
66
+ const translation = hasTranslationNoOp ? 0.0 : 1.0;
67
+ return { originality, freshness, facts, eeat, translation };
68
+ }
69
+ function meanScore(signals) {
70
+ const values = [signals.originality, signals.freshness, signals.facts, signals.eeat, signals.translation];
71
+ return values.reduce((a, b) => a + b, 0) / values.length;
72
+ }
73
+ function severityForScore(score) {
74
+ if (score < 0.3)
75
+ return "critical";
76
+ return "error";
77
+ }
78
+ function buildMessage(page, score, signals) {
79
+ const pct = (v) => `${(v * 100).toFixed(0)}%`;
80
+ const worstSignals = [];
81
+ const entries = Object.entries(signals);
82
+ for (const [key, val] of entries) {
83
+ if (val < 0.5)
84
+ worstSignals.push(key);
85
+ }
86
+ const worstLabel = worstSignals.length > 0 ? worstSignals.join(", ") : "multiple signals";
87
+ return (`${page.url}: value-add score ${pct(score)} — composite of ` +
88
+ `[originality: ${pct(signals.originality)}, freshness: ${pct(signals.freshness)}, ` +
89
+ `facts: ${pct(signals.facts)}, E-E-A-T: ${pct(signals.eeat)}, translation: ${pct(signals.translation)}]. ` +
90
+ `The page lacks ${worstLabel}; pages without proprietary value-add are demoted by SpamBrain.`);
91
+ }
92
+ /**
93
+ * content/value-add — second-pass composite rule.
94
+ *
95
+ * Reads from existing findings instead of parsing pages directly.
96
+ * Aggregates 5 per-page signal scores into a single 0-1 quality score.
97
+ * Fires ONE critical/error finding per page when score < 0.5.
98
+ */
99
+ export function valueAddRule(pages, findings) {
100
+ const results = [];
101
+ for (const page of pages) {
102
+ const signals = computeSignals(page, findings);
103
+ const score = meanScore(signals);
104
+ if (score >= 0.5)
105
+ continue;
106
+ results.push({
107
+ ruleId: RULE_ID,
108
+ severity: severityForScore(score),
109
+ confidence: "medium",
110
+ message: buildMessage(page, score, signals),
111
+ fix: "Add proprietary content (original analysis, primary-source data, expert commentary, original imagery) to lift the value-add score above 0.5. Score is a composite — improve any underweight signal.",
112
+ pageUrl: page.url,
113
+ });
114
+ }
115
+ return results;
116
+ }
117
+ //# sourceMappingURL=value-add.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IAClE,OAAO,KAAK,CAAC;AACf,CAAC;AAUD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;AAC9D,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IAC1G,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,KAAK;QAC1G,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -37,6 +37,25 @@ export interface ScrapeStrategyInputs {
37
37
  * `RunState`.
38
38
  */
39
39
  forceRefetchUrls?: ReadonlyArray<string>;
40
+ /**
41
+ * v0.5.4 — optional budget cap for candidate URL selection. When set,
42
+ * `planScrapeStrategy` will apply template-stratified sampling to narrow
43
+ * `candidateUrls` down to this budget before running the per-URL decision
44
+ * matrix. Watched URLs (from `forceRefetchUrls`) bypass this budget and
45
+ * are always included regardless of the cap.
46
+ *
47
+ * Stratification activates only when:
48
+ * 1. `candidateUrls.length > sampleSize * 1.5` (no point stratifying when
49
+ * we can take everything)
50
+ * 2. `clusterUrlTemplates` produces ≥2 clusters AND the largest cluster
51
+ * covers ≤80% of the candidate pool (if one template dominates 90%+,
52
+ * there is nothing to balance — falls back to uniform sampling).
53
+ *
54
+ * When stratification doesn't activate, a simple sequential prefix slice is
55
+ * used (preserving the caller's ordering). The decision matrix runs only
56
+ * on the selected URLs.
57
+ */
58
+ sampleSize?: number;
40
59
  }
41
60
  export declare function planScrapeStrategy(inputs: ScrapeStrategyInputs): ScrapePlan;
42
61
  //# sourceMappingURL=scrape-strategy.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"scrape-strategy.d.ts","sourceRoot":"","sources":["../src/scrape-strategy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,6EAA6E;AAC7E,eAAO,MAAM,sBAAsB,IAAI,CAAC;AAaxC,MAAM,MAAM,aAAa,GACrB,KAAK,GACL,KAAK,GACL,SAAS,GACT,SAAS,GACT,SAAS,GACT,KAAK,GACL,WAAW,GACX,SAAS,CAAC;AAEd,MAAM,MAAM,UAAU,GAAG,WAAW,CAAC;AAErC,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IACpC,IAAI,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,oBAAoB;IACnC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IACjC,UAAU,EAAE,QAAQ,GAAG,IAAI,CAAC;IAC5B,mBAAmB,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjD,cAAc,CAAC,EAAE,WAAW,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC/C,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,IAAI,CAAC;IACV;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;CAC1C;AAyBD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,oBAAoB,GAAG,UAAU,CAgF3E"}
1
+ {"version":3,"file":"scrape-strategy.d.ts","sourceRoot":"","sources":["../src/scrape-strategy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAG3C,6EAA6E;AAC7E,eAAO,MAAM,sBAAsB,IAAI,CAAC;AAaxC,MAAM,MAAM,aAAa,GACrB,KAAK,GACL,KAAK,GACL,SAAS,GACT,SAAS,GACT,SAAS,GACT,KAAK,GACL,WAAW,GACX,SAAS,CAAC;AAEd,MAAM,MAAM,UAAU,GAAG,WAAW,CAAC;AAErC,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IACpC,IAAI,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,oBAAoB;IACnC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IACjC,UAAU,EAAE,QAAQ,GAAG,IAAI,CAAC;IAC5B,mBAAmB,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjD,cAAc,CAAC,EAAE,WAAW,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC/C,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,GAAG,EAAE,IAAI,CAAC;IACV;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IACzC;;;;;;;;;;;;;;;;;OAiBG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AA8LD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,oBAAoB,GAAG,UAAU,CAyF3E"}
@@ -1,3 +1,4 @@
1
+ import { clusterUrlTemplates } from "./site-classifier.js";
1
2
  /** v0.5: shared default age-floor for monitoring. Single source of truth. */
2
3
  export const DEFAULT_AGE_FLOOR_DAYS = 7;
3
4
  /**
@@ -11,6 +12,148 @@ export const DEFAULT_AGE_FLOOR_DAYS = 7;
11
12
  */
12
13
  const RECHECK_SEVERITIES = new Set(["error", "critical", "warning", "warn"]);
13
14
  const MS_PER_DAY = 24 * 60 * 60 * 1000;
15
+ /** Threshold: stratification only activates when candidates exceed budget by this factor. */
16
+ const STRATIFY_TRIGGER_FACTOR = 1.5;
17
+ /** Threshold: largest cluster must cover ≤ this ratio for stratification to be meaningful. */
18
+ const STRATIFY_MAX_TOP_RATIO = 0.8;
19
+ /** Long-tail allocation: min(LONG_TAIL_FIXED, LONG_TAIL_FRACTION × budget). */
20
+ const LONG_TAIL_FIXED = 20;
21
+ const LONG_TAIL_FRACTION = 0.1;
22
+ /**
23
+ * Allocate `budget` slots across template clusters proportionally, with a
24
+ * floor of 1 per cluster. Long-tail (unmatched) URLs get a small fixed
25
+ * allocation. Returns index lists into the original URL array.
26
+ *
27
+ * Preconditions (caller must verify):
28
+ * - clusters.length >= 2
29
+ * - clusters[0].ratio <= STRATIFY_MAX_TOP_RATIO
30
+ * - urlsWithoutCluster may be empty
31
+ */
32
+ function stratifiedUrlSample(clusteredGroups, urlsWithoutCluster, budget) {
33
+ const longTailBudget = urlsWithoutCluster.length > 0
34
+ ? Math.min(LONG_TAIL_FIXED, Math.round(budget * LONG_TAIL_FRACTION), urlsWithoutCluster.length)
35
+ : 0;
36
+ const clusterBudget = budget - longTailBudget;
37
+ const totalClustered = clusteredGroups.reduce((s, g) => s + g.urls.length, 0);
38
+ // First pass: proportional allocation with floor of 1.
39
+ const allocations = clusteredGroups.map((g) => Math.max(1, Math.round(clusterBudget * (g.urls.length / totalClustered))));
40
+ // Second pass: trim excess to keep total within clusterBudget.
41
+ let allocated = allocations.reduce((s, a) => s + a, 0);
42
+ for (let i = allocations.length - 1; i >= 0 && allocated > clusterBudget; i--) {
43
+ const canTrim = allocations[i] - 1;
44
+ if (canTrim <= 0)
45
+ continue;
46
+ const trimBy = Math.min(canTrim, allocated - clusterBudget);
47
+ allocations[i] -= trimBy;
48
+ allocated -= trimBy;
49
+ }
50
+ const result = [];
51
+ for (let i = 0; i < clusteredGroups.length; i++) {
52
+ const urls = clusteredGroups[i].urls;
53
+ const take = Math.min(allocations[i], urls.length);
54
+ for (let j = 0; j < take; j++)
55
+ result.push(urls[j]);
56
+ }
57
+ // Long-tail: take from front of the unmatched list.
58
+ for (let i = 0; i < longTailBudget; i++)
59
+ result.push(urlsWithoutCluster[i]);
60
+ return result;
61
+ }
62
+ /**
63
+ * Narrow `candidateUrls` to `sampleSize` using template-stratified sampling,
64
+ * falling back to a sequential prefix slice when stratification conditions
65
+ * aren't met. `watchedSet` URLs are excluded from consideration here — the
66
+ * caller handles them separately.
67
+ *
68
+ * "Long-tail" definition: templates that cover ≤1% of the candidate pool OR
69
+ * have only a single URL. These are collected into a separate bucket that gets
70
+ * a small fixed allocation rather than a proportional one.
71
+ */
72
+ function applyStratifiedSample(candidateUrls, sampleSize, watchedSet) {
73
+ // Filter out watched URLs — they bypass the budget entirely.
74
+ const pool = watchedSet
75
+ ? candidateUrls.filter((u) => !watchedSet.has(u))
76
+ : Array.from(candidateUrls);
77
+ if (pool.length <= sampleSize)
78
+ return pool;
79
+ if (pool.length <= sampleSize * STRATIFY_TRIGGER_FACTOR) {
80
+ return pool.slice(0, sampleSize);
81
+ }
82
+ // Check stratification eligibility.
83
+ const clusters = clusterUrlTemplates(pool);
84
+ // Separate significant clusters from long-tail singletons/tiny clusters.
85
+ // A template is "significant" when it covers >1% of the pool AND has >1 URL.
86
+ // Everything else is long-tail.
87
+ const significantClusters = clusters.filter((c) => c.count > 1 && c.ratio > 0.01);
88
+ const longTailTemplates = new Set(clusters.filter((c) => c.count <= 1 || c.ratio <= 0.01).map((c) => c.template));
89
+ const hasEnoughClusters = significantClusters.length >= 2;
90
+ const topRatioOk = !hasEnoughClusters || significantClusters[0].ratio <= STRATIFY_MAX_TOP_RATIO;
91
+ if (!hasEnoughClusters || !topRatioOk) {
92
+ return pool.slice(0, sampleSize);
93
+ }
94
+ // Group URLs by template in a single pass.
95
+ const significantTemplateSet = new Set(significantClusters.map((c) => c.template));
96
+ const groupsByTemplate = new Map();
97
+ const longTailUrls = [];
98
+ for (const url of pool) {
99
+ const template = deriveTemplate(url);
100
+ if (template !== null && significantTemplateSet.has(template)) {
101
+ const arr = groupsByTemplate.get(template) ?? [];
102
+ arr.push(url);
103
+ groupsByTemplate.set(template, arr);
104
+ }
105
+ else {
106
+ // Long-tail: either a singleton template, a URL that didn't parse, or
107
+ // a template so rare it falls below the 1% significance threshold.
108
+ // Only include if it's actually a long-tail template (not just failed parse).
109
+ if (template === null || longTailTemplates.has(template)) {
110
+ longTailUrls.push(url);
111
+ }
112
+ }
113
+ }
114
+ const orderedGroups = significantClusters.map((c) => ({
115
+ template: c.template,
116
+ urls: groupsByTemplate.get(c.template) ?? [],
117
+ })).filter((g) => g.urls.length > 0);
118
+ return stratifiedUrlSample(orderedGroups, longTailUrls, sampleSize);
119
+ }
120
+ /** Derive the template for a URL string (mirrors site-classifier's urlToTemplate). */
121
+ function deriveTemplate(url) {
122
+ try {
123
+ const u = new URL(url);
124
+ return normalizePathTemplate(u.pathname);
125
+ }
126
+ catch {
127
+ if (typeof url === "string" && url.length > 0) {
128
+ const path = url.split("?")[0].split("#")[0];
129
+ return normalizePathTemplate(path.startsWith("/") ? path : `/${path}`);
130
+ }
131
+ return null;
132
+ }
133
+ }
134
+ /** Simplified path normalizer matching site-classifier's normalizePathToTemplate logic. */
135
+ function normalizePathTemplate(pathname) {
136
+ let p = pathname || "/";
137
+ if (p.length > 1 && p.endsWith("/"))
138
+ p = p.slice(0, -1);
139
+ if (!p.startsWith("/"))
140
+ p = "/" + p;
141
+ const segments = p.split("/").slice(1);
142
+ if (segments.length === 0 || (segments.length === 1 && segments[0] === ""))
143
+ return "/";
144
+ const out = segments.map((seg) => {
145
+ if (seg === "")
146
+ return "";
147
+ if (/^\d+$/.test(seg))
148
+ return ":n";
149
+ if (/^[a-z0-9]+(?:-[a-z0-9]+)+$/.test(seg))
150
+ return ":slug";
151
+ if (seg.length >= 12 && /^[a-z]+$/.test(seg))
152
+ return ":slug";
153
+ return seg;
154
+ });
155
+ return "/" + out.join("/");
156
+ }
14
157
  function gscExceedsThreshold(delta, thresholds) {
15
158
  return Math.abs(delta.impressionsDelta) >= thresholds.impressionsPct
16
159
  || Math.abs(delta.clicksDelta) >= thresholds.clicksAbsolute;
@@ -41,6 +184,13 @@ export function planScrapeStrategy(inputs) {
41
184
  const watchedSet = inputs.forceRefetchUrls && inputs.forceRefetchUrls.length > 0
42
185
  ? new Set(inputs.forceRefetchUrls)
43
186
  : null;
187
+ // v0.5.4: template-stratified candidate selection. Narrows candidateUrls to
188
+ // sampleSize before the decision matrix, proportionally covering all
189
+ // template clusters. Watched URLs are excluded from the budget calculation
190
+ // and added unconditionally in the eval-order loop below.
191
+ const effectiveCandidates = inputs.sampleSize && inputs.sampleSize > 0
192
+ ? applyStratifiedSample(inputs.candidateUrls, inputs.sampleSize, watchedSet)
193
+ : inputs.candidateUrls;
44
194
  const visited = new Set();
45
195
  const evalOrder = [];
46
196
  if (watchedSet) {
@@ -51,7 +201,7 @@ export function planScrapeStrategy(inputs) {
51
201
  }
52
202
  }
53
203
  }
54
- for (const url of inputs.candidateUrls) {
204
+ for (const url of effectiveCandidates) {
55
205
  if (!visited.has(url)) {
56
206
  visited.add(url);
57
207
  evalOrder.push(url);
@@ -1 +1 @@
1
- {"version":3,"file":"scrape-strategy.js","sourceRoot":"","sources":["../src/scrape-strategy.ts"],"names":[],"mappings":"AAEA,6EAA6E;AAC7E,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC;AAExC;;;;;;;;GAQG;AACH,MAAM,kBAAkB,GAAwB,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;AAqDlG,MAAM,UAAU,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAEvC,SAAS,mBAAmB,CAAC,KAAe,EAAE,UAAyB;IACrE,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,UAAU,CAAC,cAAc;WAC7D,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,UAAU,CAAC,cAAc,CAAC;AAClE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,2BAA2B,CAAC,KAA+B;IAClE,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,0EAA0E;IAC1E,OAAO,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,MAA4B;IAC7D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAyB,CAAC;IACjD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAC;IAE3C,0EAA0E;IAC1E,2EAA2E;IAC3E,6EAA6E;IAC7E,2EAA2E;IAC3E,4DAA4D;IAC5D,MAAM,UAAU,GAAG,MAAM,CAAC,gBAAgB,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;QAC9E,CAAC,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC;IAET,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,IAAI,UAAU,EAAE,CAAC;QACf,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACjB,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACvC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjB,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,IAAI,UAAU,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,MAAM,CAAC,YAAY,GAAG,UAAU,EAAE,CAAC;YACvE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,cAAc,KAAK,MAAM,CAAC,qBAAqB,EAAE,CAAC;YAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,IAAI,2BAA2B,CAAC,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACjE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,cAAc,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;QACjD,IAAI,QAAQ,IAAI,MAAM,CAAC,aAAa,IAAI,mBAAmB,CAAC,QAAQ,EAAE,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YAC5F,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;YAC9B,SAAS;QACX,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC3B,CAAC"}
1
+ {"version":3,"file":"scrape-strategy.js","sourceRoot":"","sources":["../src/scrape-strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAE3D,6EAA6E;AAC7E,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC;AAExC;;;;;;;;GAQG;AACH,MAAM,kBAAkB,GAAwB,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;AAwElG,MAAM,UAAU,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;AAEvC,6FAA6F;AAC7F,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAEpC,8FAA8F;AAC9F,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAEnC,+EAA+E;AAC/E,MAAM,eAAe,GAAG,EAAE,CAAC;AAC3B,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B;;;;;;;;;GASG;AACH,SAAS,mBAAmB,CAC1B,eAA4D,EAC5D,kBAA4B,EAC5B,MAAc;IAEd,MAAM,cAAc,GAAG,kBAAkB,CAAC,MAAM,GAAG,CAAC;QAClD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,kBAAkB,CAAC,EAAE,kBAAkB,CAAC,MAAM,CAAC;QAC/F,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,aAAa,GAAG,MAAM,GAAG,cAAc,CAAC;IAE9C,MAAM,cAAc,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAE9E,uDAAuD;IACvD,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAC5C,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAC1E,CAAC;IAEF,+DAA+D;IAC/D,IAAI,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACvD,KAAK,IAAI,CAAC,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,SAAS,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9E,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,OAAO,IAAI,CAAC;YAAE,SAAS;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,GAAG,aAAa,CAAC,CAAC;QAC5D,WAAW,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;QACzB,SAAS,IAAI,MAAM,CAAC;IACtB,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE;YAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,oDAAoD;IACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5E,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,qBAAqB,CAC5B,aAAgC,EAChC,UAAkB,EAClB,UAAsC;IAEtC,6DAA6D;IAC7D,MAAM,IAAI,GAAG,UAAU;QACrB,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAE9B,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU;QAAE,OAAO,IAAI,CAAC;IAC3C,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,GAAG,uBAAuB,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACnC,CAAC;IAED,oCAAoC;IACpC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAgB,CAAC,CAAC;IAEvD,yEAAyE;IACzE,6EAA6E;IAC7E,gCAAgC;IAChC,MAAM,mBAAmB,GAAG,QAAQ,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,GAAG,IAAI,CACrC,CAAC;IACF,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAC/B,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC/E,CAAC;IAEF,MAAM,iBAAiB,GAAG,mBAAmB,CAAC,MAAM,IAAI,CAAC,CAAC;IAC1D,MAAM,UAAU,GAAG,CAAC,iBAAiB,IAAI,mBAAmB,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,sBAAsB,CAAC;IAEhG,IAAI,CAAC,iBAAiB,IAAI,CAAC,UAAU,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACnC,CAAC;IAED,2CAA2C;IAC3C,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnF,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAoB,CAAC;IACrD,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,QAAQ,KAAK,IAAI,IAAI,sBAAsB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9D,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACjD,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACd,gBAAgB,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,sEAAsE;YACtE,mEAAmE;YACnE,8EAA8E;YAC9E,IAAI,QAAQ,KAAK,IAAI,IAAI,iBAAiB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACzD,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACpD,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,IAAI,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE;KAC7C,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAErC,OAAO,mBAAmB,CAAC,aAAa,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;AACtE,CAAC;AAED,sFAAsF;AACtF,SAAS,cAAc,CAAC,GAAW;IACjC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,qBAAqB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,qBAAqB,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QACzE,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,2FAA2F;AAC3F,SAAS,qBAAqB,CAAC,QAAgB;IAC7C,IAAI,CAAC,GAAG,QAAQ,IAAI,GAAG,CAAC;IACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACxD,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IACpC,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACvC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;QAAE,OAAO,GAAG,CAAC;IACvF,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC/B,IAAI,GAAG,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,IAAI,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC;QAC3D,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC;QAC7D,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;IACH,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAe,EAAE,UAAyB;IACrE,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,UAAU,CAAC,cAAc;WAC7D,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,UAAU,CAAC,cAAc,CAAC;AAClE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,2BAA2B,CAAC,KAA+B;IAClE,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,0EAA0E;IAC1E,OAAO,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,MAA4B;IAC7D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAyB,CAAC;IACjD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAsB,CAAC;IAE3C,0EAA0E;IAC1E,2EAA2E;IAC3E,6EAA6E;IAC7E,2EAA2E;IAC3E,4DAA4D;IAC5D,MAAM,UAAU,GAAG,MAAM,CAAC,gBAAgB,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;QAC9E,CAAC,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,gBAAgB,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC;IAET,4EAA4E;IAC5E,qEAAqE;IACrE,2EAA2E;IAC3E,0DAA0D;IAC1D,MAAM,mBAAmB,GACvB,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;QACxC,CAAC,CAAC,qBAAqB,CAAC,MAAM,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC;QAC5E,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC;IAE3B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,IAAI,UAAU,EAAE,CAAC;QACf,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACjB,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;QACtC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjB,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,IAAI,UAAU,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QAE3C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,MAAM,CAAC,YAAY,GAAG,UAAU,EAAE,CAAC;YACvE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,cAAc,KAAK,MAAM,CAAC,qBAAqB,EAAE,CAAC;YAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,IAAI,2BAA2B,CAAC,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACjE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,cAAc,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;QACjD,IAAI,QAAQ,IAAI,MAAM,CAAC,aAAa,IAAI,mBAAmB,CAAC,QAAQ,EAAE,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;YAC5F,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;YAC9B,SAAS;QACX,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC3B,CAAC"}
@@ -31,6 +31,18 @@ export type ClassificationSignal = {
31
31
  } | {
32
32
  kind: "framework-detected";
33
33
  value: "nextjs" | "vite" | "astro" | "unknown";
34
+ }
35
+ /**
36
+ * v0.5.3 — emitted when `applyDegenerationGuard` downgrades a
37
+ * `small-marketing` or `blog` classification to `unclear` because the
38
+ * corpus is degenerate (mostly thin / mostly identical titles). Surfacing
39
+ * this in `signals` lets the UI explain why severity demotions didn't
40
+ * apply.
41
+ */
42
+ | {
43
+ kind: "degeneration-guard-tripped";
44
+ reason: "median-thin" | "title-duplicate-heavy";
45
+ value: number;
34
46
  };
35
47
  export interface SiteClassification {
36
48
  type: SiteType;
@@ -80,4 +92,41 @@ export interface ClassifySiteInput {
80
92
  * signal to distinguish a 5000-page directory from a 25-page sample of one.
81
93
  */
82
94
  export declare function classifySite(input: ClassifySiteInput): SiteClassification;
95
+ /**
96
+ * v0.5.3 — corpus-quality guard against "small-marketing" / "blog"
97
+ * classification masking degenerate sites. The `small-marketing` profile
98
+ * demotes `spam/thin-content`, `aeo/citable-facts`, `aeo/freshness-signals`,
99
+ * `spam/doorway-pattern` etc. to `info` to avoid false-positives on legit
100
+ * 6-page marketing sites (linear.app etc). But a 6-page site with 0 unique
101
+ * content per page (e.g. an un-translated language switcher pretending to be
102
+ * a directory) trips the same shape and inherits the demotions, escaping
103
+ * with grade B.
104
+ *
105
+ * This guard runs AFTER classification, with parsed-page stats. If the
106
+ * corpus is degenerate (median word count < 50 OR ≥50% of pages share an
107
+ * identical title), the classification is downgraded to `unclear` so the
108
+ * demotion table doesn't apply — the natural rule severities then fire.
109
+ *
110
+ * Only `small-marketing` and `blog` are guarded. The other types either
111
+ * already run all rules (`unclear`, `programmatic-directory`, `ecommerce`,
112
+ * `docs`) or aren't reached by the small-corpus path.
113
+ */
114
+ export declare function applyDegenerationGuard(classification: SiteClassification, corpusStats: {
115
+ medianWordCount: number;
116
+ identicalTitleRatio: number;
117
+ pageCount: number;
118
+ }): SiteClassification;
119
+ /**
120
+ * Compute the corpus stats `applyDegenerationGuard` consumes. Pulled out so
121
+ * tests can pass a fixture stat block directly without constructing
122
+ * `ParsedPage` instances.
123
+ */
124
+ export declare function corpusStatsFromPages(pages: ReadonlyArray<{
125
+ title: string;
126
+ contentText: string;
127
+ }>): {
128
+ medianWordCount: number;
129
+ identicalTitleRatio: number;
130
+ pageCount: number;
131
+ };
83
132
  //# sourceMappingURL=site-classifier.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"site-classifier.d.ts","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,MAAM,QAAQ,GAChB,wBAAwB,GACxB,iBAAiB,GACjB,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,CAAC;AAEd,MAAM,MAAM,oBAAoB,GAC5B;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC5C;IACE,IAAI,EAAE,8BAA8B,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf,GACD;IAAE,IAAI,EAAE,oBAAoB,CAAC;IAAC,KAAK,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAA;CAAE,CAAC;AAEnF,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,QAAQ,CAAC;IACf,wEAAwE;IACxE,UAAU,EAAE,MAAM,CAAC;IACnB,2EAA2E;IAC3E,OAAO,EAAE,oBAAoB,EAAE,CAAC;IAChC;;;;;;OAMG;IACH,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,oEAAoE;AACpE,eAAO,MAAM,kBAAkB,EAAE,SAAS,MAAM,EAM/C,CAAC;AAEF;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAiChE;AA8BD,kFAAkF;AAClF,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAiB7G;AAED,MAAM,WAAW,iBAAiB;IAChC,yDAAyD;IACzD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,kEAAkE;IAClE,SAAS,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC;CACrD;AA8KD;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,iBAAiB,GAAG,kBAAkB,CA2HzE"}
1
+ {"version":3,"file":"site-classifier.d.ts","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,MAAM,QAAQ,GAChB,wBAAwB,GACxB,iBAAiB,GACjB,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,CAAC;AAEd,MAAM,MAAM,oBAAoB,GAC5B;IAAE,IAAI,EAAE,mBAAmB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC5C;IACE,IAAI,EAAE,8BAA8B,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf,GACD;IAAE,IAAI,EAAE,oBAAoB,CAAC;IAAC,KAAK,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAA;CAAE;AAChF;;;;;;GAMG;GACD;IAAE,IAAI,EAAE,4BAA4B,CAAC;IAAC,MAAM,EAAE,aAAa,GAAG,uBAAuB,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3G,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,QAAQ,CAAC;IACf,wEAAwE;IACxE,UAAU,EAAE,MAAM,CAAC;IACnB,2EAA2E;IAC3E,OAAO,EAAE,oBAAoB,EAAE,CAAC;IAChC;;;;;;OAMG;IACH,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,oEAAoE;AACpE,eAAO,MAAM,kBAAkB,EAAE,SAAS,MAAM,EAM/C,CAAC;AAEF;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAiChE;AA8BD,kFAAkF;AAClF,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAiB7G;AAED,MAAM,WAAW,iBAAiB;IAChC,yDAAyD;IACzD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,kEAAkE;IAClE,SAAS,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC;CACrD;AA8KD;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,iBAAiB,GAAG,kBAAkB,CA2HzE;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,sBAAsB,CACpC,cAAc,EAAE,kBAAkB,EAClC,WAAW,EAAE;IAAE,eAAe,EAAE,MAAM,CAAC;IAAC,mBAAmB,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACvF,kBAAkB,CA0BpB;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,aAAa,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG;IAClG,eAAe,EAAE,MAAM,CAAC;IACxB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;CACnB,CAoBA"}
@@ -419,4 +419,72 @@ export function classifySite(input) {
419
419
  const suppressedRules = type === "small-marketing" || type === "blog" ? [...PSEO_ONLY_RULE_IDS] : [];
420
420
  return { type, confidence, signals, suppressedRules };
421
421
  }
422
+ /**
423
+ * v0.5.3 — corpus-quality guard against "small-marketing" / "blog"
424
+ * classification masking degenerate sites. The `small-marketing` profile
425
+ * demotes `spam/thin-content`, `aeo/citable-facts`, `aeo/freshness-signals`,
426
+ * `spam/doorway-pattern` etc. to `info` to avoid false-positives on legit
427
+ * 6-page marketing sites (linear.app etc). But a 6-page site with 0 unique
428
+ * content per page (e.g. an un-translated language switcher pretending to be
429
+ * a directory) trips the same shape and inherits the demotions, escaping
430
+ * with grade B.
431
+ *
432
+ * This guard runs AFTER classification, with parsed-page stats. If the
433
+ * corpus is degenerate (median word count < 50 OR ≥50% of pages share an
434
+ * identical title), the classification is downgraded to `unclear` so the
435
+ * demotion table doesn't apply — the natural rule severities then fire.
436
+ *
437
+ * Only `small-marketing` and `blog` are guarded. The other types either
438
+ * already run all rules (`unclear`, `programmatic-directory`, `ecommerce`,
439
+ * `docs`) or aren't reached by the small-corpus path.
440
+ */
441
+ export function applyDegenerationGuard(classification, corpusStats) {
442
+ if (classification.type !== "small-marketing" && classification.type !== "blog") {
443
+ return classification;
444
+ }
445
+ if (corpusStats.pageCount === 0)
446
+ return classification;
447
+ const isThin = corpusStats.medianWordCount < 50;
448
+ // Require ≥4 pages so a 2-page marketing site with two same-titled drafts
449
+ // doesn't false-positive. Tunable; bestfirenze.com has 6 pages, all with
450
+ // the same homepage content.
451
+ const isTitleDuplicateHeavy = corpusStats.identicalTitleRatio >= 0.5 && corpusStats.pageCount >= 4;
452
+ if (!isThin && !isTitleDuplicateHeavy)
453
+ return classification;
454
+ const reason = isThin
455
+ ? "median-thin"
456
+ : "title-duplicate-heavy";
457
+ const value = isThin ? corpusStats.medianWordCount : corpusStats.identicalTitleRatio;
458
+ return {
459
+ type: "unclear",
460
+ confidence: 0,
461
+ signals: [...classification.signals, { kind: "degeneration-guard-tripped", reason, value }],
462
+ suppressedRules: [],
463
+ };
464
+ }
465
+ /**
466
+ * Compute the corpus stats `applyDegenerationGuard` consumes. Pulled out so
467
+ * tests can pass a fixture stat block directly without constructing
468
+ * `ParsedPage` instances.
469
+ */
470
+ export function corpusStatsFromPages(pages) {
471
+ if (pages.length === 0) {
472
+ return { medianWordCount: 0, identicalTitleRatio: 0, pageCount: 0 };
473
+ }
474
+ const wordCounts = pages
475
+ .map((p) => p.contentText.split(/\s+/).filter(Boolean).length)
476
+ .sort((a, b) => a - b);
477
+ const medianWordCount = wordCounts[Math.floor(wordCounts.length / 2)];
478
+ const titles = pages.map((p) => p.title.toLowerCase().trim()).filter(Boolean);
479
+ let identicalTitleRatio = 0;
480
+ if (titles.length > 0) {
481
+ // Find the largest cluster of identical titles, divided by page count.
482
+ const counts = new Map();
483
+ for (const t of titles)
484
+ counts.set(t, (counts.get(t) ?? 0) + 1);
485
+ const maxClusterSize = Math.max(...counts.values());
486
+ identicalTitleRatio = maxClusterSize / pages.length;
487
+ }
488
+ return { medianWordCount, identicalTitleRatio, pageCount: pages.length };
489
+ }
422
490
  //# sourceMappingURL=site-classifier.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"site-classifier.js","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAoCH,oEAAoE;AACpE,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,wBAAwB;IACxB,yBAAyB;IACzB,kBAAkB;IAClB,sBAAsB;IACtB,+BAA+B;CAChC,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,4DAA4D;IAC5D,IAAI,CAAC,GAAG,QAAQ,IAAI,GAAG,CAAC;IACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACxD,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;IAC7D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC;QAC3E,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,GAAG,GAAa,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACzC,IAAI,GAAG,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC1B,wBAAwB;QACxB,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,gEAAgE;QAChE,6DAA6D;QAC7D,kEAAkE;QAClE,iEAAiE;QACjE,oCAAoC;QACpC,IAAI,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,mEAAmE;QACnE,kEAAkE;QAClE,oCAAoC;QACpC,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,qEAAqE;AACrE,SAAS,SAAS,CAAC,GAAW;IAC5B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC;IACtC,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;QAClD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,qEAAqE;AACrE,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,uBAAuB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,uBAAuB;QACvB,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,mBAAmB,CAAC,IAAc;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK,IAAI;YAAE,SAAS;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxC,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACvE,QAAQ;QACR,KAAK;QACL,KAAK,EAAE,KAAK,GAAG,KAAK;KACrB,CAAC,CAAC,CAAC;IACJ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAClF,OAAO,OAAO,CAAC;AACjB,CAAC;AASD;;;;;GAKG;AACH,MAAM,kBAAkB,GAAsB;IAC5C,OAAO;IACP,MAAM;IACN,gBAAgB;IAChB,MAAM;IACN,WAAW;IACX,gBAAgB;IAChB,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,YAAY;IACZ,SAAS;IACT,WAAW;CACZ,CAAC;AAEF;;;GAGG;AACH,MAAM,uBAAuB,GAAsB;IACjD,WAAW;IACX,UAAU;IACV,cAAc;IACd,aAAa;IACb,OAAO;IACP,QAAQ;IACR,OAAO;IACP,WAAW;IACX,WAAW;IACX,aAAa;IACb,IAAI,EAAE,+BAA+B;CACtC,CAAC;AAEF,SAAS,mBAAmB,CAAC,QAAgB,EAAE,QAA2B;IACxE,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;IAC9C,OAAO,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAClC,CAAC;AAED,wEAAwE;AACxE,SAAS,eAAe,CAAC,IAAc,EAAE,QAA2B;IAClE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,KAAK,IAAI;YAAE,SAAS;QAC5B,KAAK,IAAI,CAAC,CAAC;QACX,IAAI,mBAAmB,CAAC,IAAI,EAAE,QAAQ,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,IAAI,GAAG,KAAK,CAAC;AACtB,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CACtB,IAAc,EACd,OAA+B;IAE/B,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAClC,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;IACxD,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,IAAI,CAAC;IAC7B,IAAI,UAAU,GAAG,GAAG,CAAC;IACrB,IAAI,KAAK,IAAI,IAAI;QAAE,UAAU,GAAG,IAAI,CAAC;IACrC,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC;IACvE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;AACpE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,oBAAoB,CAC3B,IAAc,EACd,OAA+B;IAE/B,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAClC,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;IAC3E,IAAI,aAAa,IAAI,GAAG,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QACvD,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACzE,CAAC;IACD,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,EAAE,uBAAuB,CAAC,CAAC;IAClE,IAAI,UAAU,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QAC5C,MAAM,UAAU,GAAG,UAAU,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QACnD,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACzE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,0BAA0B,CAAC;AAExD,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,OAAO,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,uEAAuE;AACvE,SAAS,cAAc,CAAC,IAAc;IACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,KAAK,IAAI;YAAE,SAAS;QAC5B,KAAK,IAAI,CAAC,CAAC;QACX,IAAI,eAAe,CAAC,IAAI,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,IAAI,GAAG,KAAK,CAAC;AACtB,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,6BAA6B,CACpC,IAAc,EACd,OAA+B;IAE/B,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,IAAI,CAAC;IAC7B,wEAAwE;IACxE,sEAAsE;IACtE,qEAAqE;IACrE,sBAAsB;IACtB,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,OAAO;QACL,IAAI,EAAE,iBAAiB;QACvB,UAAU;QACV,OAAO;QACP,eAAe,EAAE,CAAC,GAAG,kBAAkB,CAAC;KACzC,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,KAAwB;IACnD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,SAAS,CAAC;IAE/C,MAAM,OAAO,GAA2B,EAAE,CAAC;IAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,mBAAmB,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5E,IAAI,GAAG,EAAE,CAAC;QACR,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,GAAG,CAAC,QAAQ;YACzB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IAE/D,8CAA8C;IAC9C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IAC1E,CAAC;IAED,wEAAwE;IACxE,uEAAuE;IACvE,gEAAgE;IAChE,0EAA0E;IAC1E,yEAAyE;IACzE,oEAAoE;IACpE,6EAA6E;IAC7E,yEAAyE;IACzE,0EAA0E;IAC1E,iDAAiD;IACjD,0EAA0E;IAC1E,MAAM,SAAS,GAAG,oBAAoB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACtD,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC;IAEhC,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAEtB,MAAM,SAAS,GAAG,6BAA6B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC;IAEhC,sBAAsB;IACtB,IAAI,IAAI,GAAa,SAAS,CAAC;IAC/B,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,wEAAwE;IACxE,uEAAuE;IACvE,0EAA0E;IAC1E,gDAAgD;IAChD,2EAA2E;IAC3E,sEAAsE;IACtE,sCAAsC;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,oEAAoE;YACpE,qDAAqD;YACrD,IAAI,GAAG,WAAW,CAAC;YACnB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QAClD,IAAI,GAAG,wBAAwB,CAAC;QAChC,UAAU,GAAG,IAAI,CAAC;IACpB,CAAC;SAAM,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QAC5B,mEAAmE;QACnE,EAAE;QACF,qEAAqE;QACrE,mEAAmE;QACnE,qEAAqE;QACrE,qEAAqE;QACrE,sEAAsE;QACtE,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACvC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,aAAa,CAC1F,CAAC;QACF,MAAM,gBAAgB,GACpB,GAAG,KAAK,SAAS;YACjB,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;YACnD,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC;QACnB,IAAI,YAAY,IAAI,YAAY,CAAC,KAAK,IAAI,GAAG,EAAE,CAAC;YAC9C,IAAI,GAAG,MAAM,CAAC;YACd,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7B,gEAAgE;YAChE,IAAI,GAAG,iBAAiB,CAAC;YACzB,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,+DAA+D;YAC/D,qEAAqE;YACrE,qEAAqE;YACrE,eAAe;YACf,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,uDAAuD;QACvD,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,kEAAkE;YAClE,mEAAmE;YACnE,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,SAAS,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,CAAC,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;QAClG,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,eAAe,GACnB,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE/E,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;AACxD,CAAC"}
1
+ {"version":3,"file":"site-classifier.js","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AA4CH,oEAAoE;AACpE,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,wBAAwB;IACxB,yBAAyB;IACzB,kBAAkB;IAClB,sBAAsB;IACtB,+BAA+B;CAChC,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,4DAA4D;IAC5D,IAAI,CAAC,GAAG,QAAQ,IAAI,GAAG,CAAC;IACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACxD,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;IAC7D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC;QAC3E,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,GAAG,GAAa,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACzC,IAAI,GAAG,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC1B,wBAAwB;QACxB,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,gEAAgE;QAChE,6DAA6D;QAC7D,kEAAkE;QAClE,iEAAiE;QACjE,oCAAoC;QACpC,IAAI,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,mEAAmE;QACnE,kEAAkE;QAClE,oCAAoC;QACpC,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,qEAAqE;AACrE,SAAS,SAAS,CAAC,GAAW;IAC5B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC;IACtC,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;QAClD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,qEAAqE;AACrE,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,uBAAuB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,uBAAuB;QACvB,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,mBAAmB,CAAC,IAAc;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK,IAAI;YAAE,SAAS;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxC,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACvE,QAAQ;QACR,KAAK;QACL,KAAK,EAAE,KAAK,GAAG,KAAK;KACrB,CAAC,CAAC,CAAC;IACJ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAClF,OAAO,OAAO,CAAC;AACjB,CAAC;AASD;;;;;GAKG;AACH,MAAM,kBAAkB,GAAsB;IAC5C,OAAO;IACP,MAAM;IACN,gBAAgB;IAChB,MAAM;IACN,WAAW;IACX,gBAAgB;IAChB,YAAY;IACZ,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,YAAY;IACZ,SAAS;IACT,WAAW;CACZ,CAAC;AAEF;;;GAGG;AACH,MAAM,uBAAuB,GAAsB;IACjD,WAAW;IACX,UAAU;IACV,cAAc;IACd,aAAa;IACb,OAAO;IACP,QAAQ;IACR,OAAO;IACP,WAAW;IACX,WAAW;IACX,aAAa;IACb,IAAI,EAAE,+BAA+B;CACtC,CAAC;AAEF,SAAS,mBAAmB,CAAC,QAAgB,EAAE,QAA2B;IACxE,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;IAC9C,OAAO,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAClC,CAAC;AAED,wEAAwE;AACxE,SAAS,eAAe,CAAC,IAAc,EAAE,QAA2B;IAClE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,KAAK,IAAI;YAAE,SAAS;QAC5B,KAAK,IAAI,CAAC,CAAC;QACX,IAAI,mBAAmB,CAAC,IAAI,EAAE,QAAQ,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,IAAI,GAAG,KAAK,CAAC;AACtB,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,eAAe,CACtB,IAAc,EACd,OAA+B;IAE/B,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAClC,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;IACxD,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,IAAI,CAAC;IAC7B,IAAI,UAAU,GAAG,GAAG,CAAC;IACrB,IAAI,KAAK,IAAI,IAAI;QAAE,UAAU,GAAG,IAAI,CAAC;IACrC,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC;IACvE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;AACpE,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,oBAAoB,CAC3B,IAAc,EACd,OAA+B;IAE/B,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAClC,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;IAC3E,IAAI,aAAa,IAAI,GAAG,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QACvD,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACzE,CAAC;IACD,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,EAAE,uBAAuB,CAAC,CAAC;IAClE,IAAI,UAAU,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QAC5C,MAAM,UAAU,GAAG,UAAU,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;QACnD,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IACzE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,0BAA0B,CAAC;AAExD,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,OAAO,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,uEAAuE;AACvE,SAAS,cAAc,CAAC,IAAc;IACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,KAAK,IAAI;YAAE,SAAS;QAC5B,KAAK,IAAI,CAAC,CAAC;QACX,IAAI,eAAe,CAAC,IAAI,CAAC;YAAE,IAAI,IAAI,CAAC,CAAC;IACvC,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,IAAI,GAAG,KAAK,CAAC;AACtB,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAS,6BAA6B,CACpC,IAAc,EACd,OAA+B;IAE/B,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,IAAI,CAAC;IAC7B,wEAAwE;IACxE,sEAAsE;IACtE,qEAAqE;IACrE,sBAAsB;IACtB,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,IAAI,KAAK,IAAI,GAAG;QAAE,UAAU,GAAG,IAAI,CAAC;IACpC,OAAO;QACL,IAAI,EAAE,iBAAiB;QACvB,UAAU;QACV,OAAO;QACP,eAAe,EAAE,CAAC,GAAG,kBAAkB,CAAC;KACzC,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,KAAwB;IACnD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,SAAS,CAAC;IAE/C,MAAM,OAAO,GAA2B,EAAE,CAAC;IAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,mBAAmB,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5E,IAAI,GAAG,EAAE,CAAC;QACR,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,GAAG,CAAC,QAAQ;YACzB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IAE/D,8CAA8C;IAC9C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IAC1E,CAAC;IAED,wEAAwE;IACxE,uEAAuE;IACvE,gEAAgE;IAChE,0EAA0E;IAC1E,yEAAyE;IACzE,oEAAoE;IACpE,6EAA6E;IAC7E,yEAAyE;IACzE,0EAA0E;IAC1E,iDAAiD;IACjD,0EAA0E;IAC1E,MAAM,SAAS,GAAG,oBAAoB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACtD,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC;IAEhC,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAEtB,MAAM,SAAS,GAAG,6BAA6B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC;IAEhC,sBAAsB;IACtB,IAAI,IAAI,GAAa,SAAS,CAAC;IAC/B,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,wEAAwE;IACxE,uEAAuE;IACvE,0EAA0E;IAC1E,gDAAgD;IAChD,2EAA2E;IAC3E,sEAAsE;IACtE,sCAAsC;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,oEAAoE;YACpE,qDAAqD;YACrD,IAAI,GAAG,WAAW,CAAC;YACnB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QAClD,IAAI,GAAG,wBAAwB,CAAC;QAChC,UAAU,GAAG,IAAI,CAAC;IACpB,CAAC;SAAM,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QAC5B,mEAAmE;QACnE,EAAE;QACF,qEAAqE;QACrE,mEAAmE;QACnE,qEAAqE;QACrE,qEAAqE;QACrE,sEAAsE;QACtE,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACvC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,aAAa,CAC1F,CAAC;QACF,MAAM,gBAAgB,GACpB,GAAG,KAAK,SAAS;YACjB,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;YACnD,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC;QACnB,IAAI,YAAY,IAAI,YAAY,CAAC,KAAK,IAAI,GAAG,EAAE,CAAC;YAC9C,IAAI,GAAG,MAAM,CAAC;YACd,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7B,gEAAgE;YAChE,IAAI,GAAG,iBAAiB,CAAC;YACzB,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,+DAA+D;YAC/D,qEAAqE;YACrE,qEAAqE;YACrE,eAAe;YACf,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,uDAAuD;QACvD,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,kEAAkE;YAClE,mEAAmE;YACnE,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,SAAS,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,CAAC,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;QAClG,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,eAAe,GACnB,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE/E,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;AACxD,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,sBAAsB,CACpC,cAAkC,EAClC,WAAwF;IAExF,IAAI,cAAc,CAAC,IAAI,KAAK,iBAAiB,IAAI,cAAc,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAChF,OAAO,cAAc,CAAC;IACxB,CAAC;IACD,IAAI,WAAW,CAAC,SAAS,KAAK,CAAC;QAAE,OAAO,cAAc,CAAC;IAEvD,MAAM,MAAM,GAAG,WAAW,CAAC,eAAe,GAAG,EAAE,CAAC;IAChD,0EAA0E;IAC1E,yEAAyE;IACzE,6BAA6B;IAC7B,MAAM,qBAAqB,GACzB,WAAW,CAAC,mBAAmB,IAAI,GAAG,IAAI,WAAW,CAAC,SAAS,IAAI,CAAC,CAAC;IAEvE,IAAI,CAAC,MAAM,IAAI,CAAC,qBAAqB;QAAE,OAAO,cAAc,CAAC;IAE7D,MAAM,MAAM,GAA4C,MAAM;QAC5D,CAAC,CAAC,aAAa;QACf,CAAC,CAAC,uBAAuB,CAAC;IAC5B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,eAAe,CAAC,CAAC,CAAC,WAAW,CAAC,mBAAmB,CAAC;IAErF,OAAO;QACL,IAAI,EAAE,SAAS;QACf,UAAU,EAAE,CAAC;QACb,OAAO,EAAE,CAAC,GAAG,cAAc,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,4BAA4B,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;QAC3F,eAAe,EAAE,EAAE;KACpB,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,KAA4D;IAK/F,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,eAAe,EAAE,CAAC,EAAE,mBAAmB,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;IACtE,CAAC;IACD,MAAM,UAAU,GAAG,KAAK;SACrB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;SAC7D,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACzB,MAAM,eAAe,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9E,IAAI,mBAAmB,GAAG,CAAC,CAAC;IAC5B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,uEAAuE;QACvE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QACpD,mBAAmB,GAAG,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC;IACtD,CAAC;IAED,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;AAC3E,CAAC"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * v0.6 Template Detection — Phase 1 of the audit-as-template pipeline.
3
+ *
4
+ * Clusters discovered URLs by path template, filters to qualifying clusters
5
+ * (ratio ≥ 1% AND count ≥ 5), and returns TemplateCandidate[] with each
6
+ * cluster's signature, URL list, and coverage metadata.
7
+ *
8
+ * Activation gating: callers must check that ≥ 2 candidates survive before
9
+ * switching from the legacy single-template path.
10
+ *
11
+ * See spec §6.1 and §15.3.
12
+ */
13
+ /** Synthetic signature for the long-tail bucket. */
14
+ export declare const LONGTAIL_SIGNATURE = "_longtail";
15
+ /** A qualifying URL-template cluster. */
16
+ export interface TemplateCandidate {
17
+ /** Normalized path pattern, e.g. "/listing/:slug" */
18
+ signature: string;
19
+ /** All URLs in the discovered set belonging to this cluster. */
20
+ urls: string[];
21
+ /** Count of URLs in this cluster. */
22
+ count: number;
23
+ /** Ratio of URLs in this cluster vs total discovered. */
24
+ ratio: number;
25
+ }
26
+ /**
27
+ * Detect qualifying URL templates from a list of discovered URLs.
28
+ *
29
+ * Returns TemplateCandidate[] for clusters meeting the 1% ratio AND count-5
30
+ * thresholds. URLs not matching any qualifying cluster are grouped into a
31
+ * synthetic "_longtail" candidate (when at least one URL falls through).
32
+ *
33
+ * A URL is attributed to the FIRST qualifying cluster whose signature matches
34
+ * (clusters are sorted by descending count, so the most specific/largest
35
+ * cluster wins in the case of ambiguity — though URL-to-template mapping is
36
+ * injective: one URL → exactly one normalized template).
37
+ */
38
+ export declare function detectTemplates(urls: string[]): TemplateCandidate[];
39
+ /**
40
+ * Given a list of TemplateCandidate[] (as returned by detectTemplates),
41
+ * build a lookup map from URL → template signature for fast per-finding tagging.
42
+ *
43
+ * Long-tail URLs map to LONGTAIL_SIGNATURE.
44
+ */
45
+ export declare function buildUrlToTemplateMap(candidates: TemplateCandidate[]): Map<string, string>;
46
+ /**
47
+ * Check whether the v0.6 template path should activate.
48
+ * Requires ≥ 2 QUALIFYING (non-longtail) templates per §15.3.
49
+ */
50
+ export declare function shouldActivateTemplateScoring(candidates: TemplateCandidate[]): boolean;
51
+ //# sourceMappingURL=template-detection.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"template-detection.d.ts","sourceRoot":"","sources":["../src/template-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAUH,oDAAoD;AACpD,eAAO,MAAM,kBAAkB,cAAc,CAAC;AAE9C,yCAAyC;AACzC,MAAM,WAAW,iBAAiB;IAChC,qDAAqD;IACrD,SAAS,EAAE,MAAM,CAAC;IAClB,gEAAgE;IAChE,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,qCAAqC;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,iBAAiB,EAAE,CAmDnE;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,UAAU,EAAE,iBAAiB,EAAE,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAQ1F;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAAC,UAAU,EAAE,iBAAiB,EAAE,GAAG,OAAO,CAGtF"}