langtell 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -76,6 +76,27 @@ const detect = compile({ candidates: [uk, ru, en], engines: [chromeAiEngine] });
76
76
  const result = await detect({ text }); // Promise<Classification>
77
77
  ```
78
78
 
79
+ Need more than "what language + how sure"? The default `Classification` collapses
80
+ the candidate-relative ladder into one `confidence` float. When you need the raw
81
+ structure — _which_ rung decided (distinctive letters → function words → frequent
82
+ words → optional trigram backstop) and the integer **margin** (the winner's lead
83
+ over the runner-up) — reach for the opt-in `langtell/classify` door. It stays
84
+ zero-dependency and franc-free; scoring is relative to the roster you pass in.
85
+
86
+ ```ts
87
+ import { classifyBySnippet } from "langtell/classify";
88
+ import { uk, ru } from "langtell/profiles";
89
+
90
+ classifyBySnippet("Слава Україні", [uk, ru]);
91
+ // → { language: "uk", margin: 2, rung: 1, discriminating: true } (a distinctive letter)
92
+ classifyBySnippet("Кофе и чай", [uk, ru]);
93
+ // → { language: "ru", margin: 1, rung: "2a", … } (a function-word marker)
94
+ ```
95
+
96
+ This powers per-rung safety gates ("act only when a _weak_ rung clears a high
97
+ margin") and diagnostics — uses a single confidence number can't serve. The
98
+ high-level `compile`/`detect`/`fuse` output is unchanged; this is purely additive.
99
+
79
100
  ## API at a glance
80
101
 
81
102
  | Export | Role |
@@ -88,6 +109,7 @@ const result = await detect({ text }); // Promise<Classification>
88
109
  | `normalizeBCP47(tag)` | Normalize a BCP-47 tag/alias to a canonical code (`uk-UA`/`ua` → `uk`). |
89
110
  | `fuse(evidence, opts?)` | Weighted blend + "context never overrides clear script" guard. |
90
111
  | `langtell/profiles` | Ready-made `LanguageProfile` data (uk/ru/be/bg/en). Opt-in (carries word data). |
112
+ | `langtell/classify` | Opt-in structured snippet verdict (`{ language, margin, rung }`). Zero-dep. |
91
113
  | `langtell/franc` | Opt-in franc engine (pulls trigram tables). Sync. |
92
114
  | `langtell/chrome-ai` | Opt-in on-device Chrome AI engine (browser). Async. |
93
115
 
@@ -1,4 +1,4 @@
1
- import { A as AsyncSource } from './types-D4Ux-xA6.js';
1
+ import { A as AsyncSource } from './types-BIXrkuAr.js';
2
2
 
3
3
  /**
4
4
  * `langtell/chrome-ai` — the opt-in on-device engine wrapping the browser's
@@ -19,13 +19,14 @@ var MIN_MARGIN = 0.12;
19
19
  function fuse(evidence, options = {}) {
20
20
  const weights = options.weights ?? {};
21
21
  const normalized = normalizeEvidence(evidence, options.candidates);
22
+ const scoring = options.nonDiscriminatingScript === "unknown" ? normalized.filter((item) => !isNeutralized(item, normalized)) : normalized;
22
23
  const scores = /* @__PURE__ */ new Map();
23
- for (const item of normalized) {
24
+ for (const item of scoring) {
24
25
  if (item.language === "unknown") continue;
25
26
  const weight = weights[item.source] ?? weights[item.kind] ?? DEFAULT_KIND_WEIGHT[item.kind] ?? 0.5;
26
27
  scores.set(item.language, (scores.get(item.language) ?? 0) + clamp01(item.confidence) * weight);
27
28
  }
28
- const pinned = confidentScriptLanguage(normalized);
29
+ const pinned = confidentScriptLanguage(scoring);
29
30
  const { best, bestScore, secondScore } = argmax(scores, pinned);
30
31
  if (best === null || bestScore < MIN_WINNING_SCORE || bestScore - secondScore < MIN_MARGIN) {
31
32
  if (pinned !== null && scores.has(pinned)) {
@@ -52,6 +53,12 @@ function normalizeEvidence(evidence, _candidates) {
52
53
  return { ...item, language: normalized };
53
54
  });
54
55
  }
56
+ function isNeutralized(item, all) {
57
+ if (item.discriminating !== false || !SCRIPT_KINDS.has(item.kind)) return false;
58
+ return !all.some(
59
+ (other) => other.language === item.language && other.language !== "unknown" && !SCRIPT_KINDS.has(other.kind)
60
+ );
61
+ }
55
62
  function confidentScriptLanguage(evidence) {
56
63
  let best = null;
57
64
  let bestConfidence = 0;
@@ -98,5 +105,5 @@ function clamp01(value) {
98
105
  }
99
106
 
100
107
  export { fuse };
101
- //# sourceMappingURL=chunk-TYSRYQN7.js.map
102
- //# sourceMappingURL=chunk-TYSRYQN7.js.map
108
+ //# sourceMappingURL=chunk-7G3MEXWK.js.map
109
+ //# sourceMappingURL=chunk-7G3MEXWK.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/fuse.ts"],"names":[],"mappings":";;;AA0BA,IAAM,mBAAA,GAA8C;AAAA,EAClD,cAAA,EAAgB,CAAA;AAAA,EAChB,iBAAA,EAAmB,CAAA;AAAA,EACnB,WAAA,EAAa,CAAA;AAAA,EACb,cAAA,EAAgB,GAAA;AAAA,EAChB,KAAA,EAAO,GAAA;AAAA,EACP,uBAAA,EAAyB,GAAA;AAAA,EACzB,uBAAA,EAAyB,IAAA;AAAA,EACzB,gBAAA,EAAkB,IAAA;AAAA,EAClB,WAAA,EAAa;AACf,CAAA;AAKA,IAAM,+BAAe,IAAI,GAAA,CAAY,CAAC,cAAA,EAAgB,OAAA,EAAS,WAAW,CAAC,CAAA;AAI3E,IAAM,uBAAA,GAA0B,GAAA;AAEhC,IAAM,iBAAA,GAAoB,IAAA;AAC1B,IAAM,UAAA,GAAa,IAAA;AAcZ,SAAS,IAAA,CACd,QAAA,EACA,OAAA,GAAuB,EAAC,EACR;AAChB,EAAA,MAAM,OAAA,GAAU,OAAA,CAAQ,OAAA,IAAW,EAAC;AACpC,EAAA,MAAM,UAAA,GAAa,iBAAA,CAAkB,QAAA,EAAU,OAAA,CAAQ,UAAU,CAAA;AAKjE,EAAA,MAAM,OAAA,GACJ,OAAA,CAAQ,uBAAA,KAA4B,SAAA,GAChC,UAAA,CAAW,MAAA,CAAO,CAAC,IAAA,KAAS,CAAC,aAAA,CAAc,IAAA,EAAM,UAAU,CAAC,CAAA,GAC5D,UAAA;AAEN,EAAA,MAAM,MAAA,uBAAa,GAAA,EAAoB;AACvC,EAAA,KAAA,MAAW,QAAQ,OAAA,EAAS;AAC1B,IAAA,IAAI,IAAA,CAAK,aAAa,SAAA,EAAW;AACjC,IAAA,MAAM,MAAA,GACJ,OAAA,CAAQ,IAAA,CAAK,MAAM,CAAA,IAAK,OAAA,CAAQ,IAAA,CAAK,IAAI,CAAA,IAAK,mBAAA,CAAoB,IAAA,CAAK,IAAI,CAAA,IAAK,GAAA;AAClF,IAAA,MAAA,CAAO,GAAA,CAAI,IAAA,CAAK,QAAA,EAAA,CAAW,MAAA,CAAO,GAAA,CAAI,IAAA,CAAK,QAAQ,CAAA,IAAK,CAAA,IAAK,OAAA,CAAQ,IAAA,CAAK,UAAU,IAAI,MAAM,CAAA;AAAA,EAChG;AAGA,EAAA,MAAM,MAAA,GAAS,wBAAwB,OAAO,CAAA;AAE9C,EAAA,MAAM,EAAE,IAAA,EAAM,SAAA,EAAW,aAAY,GAAI,MAAA,CAAO,QAAQ,MAAM,CAAA;AAE9D,EAAA,IAAI,SAAS,IAAA,IAAQ,SAAA,GAAY,iBAAA,IAAqB,SAAA,GAAY,cAAc,UAAA,EAAY;AAG1F,IAAA,IAAI,MAAA,KAAW,IAAA,IAAQ,MAAA,CAAO,GAAA,CAAI,MAAM,CAAA,EAAG;AACzC,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,GAAA,CAAI,MAAM,CAAA,IAAK,CAAA;AACpC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,UAAA,EAAY,OAAA,CAAQ,KAAA,IAAS,KAAA,GAAQ,IAAA,CAAK,CAAA;AAAA,QAC1C,QAAA,EAAU,CAAC,GAAG,UAAU;AAAA,OAC1B;AAAA,IACF;AACA,IAAA,OAAO,EAAE,QAAA,EAAU,SAAA,EAAW,UAAA,EAAY,OAAA,CAAQ,SAAS,CAAA,EAAG,QAAA,EAAU,CAAC,GAAG,UAAU,CAAA,EAAE;AAAA,EAC1F;AAEA,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,IAAA;AAAA,IACV,UAAA,EAAY,OAAA,CAAQ,SAAA,IAAa,SAAA,GAAY,cAAc,IAAA,CAAK,CAAA;AAAA,IAChE,QAAA,EAAU,CAAC,GAAG,UAAU;AAAA,GAC1B;AACF;AAYA,SAAS,iBAAA,CACP,UACA,WAAA,EACoB;AACpB,EAAA,OAAO,QAAA,CAAS,GAAA,CAAI,CAAC,IAAA,KAAS;AAC5B,IAAA,IAAI,IAAA,CAAK,QAAA,KAAa,SAAA,EAAW,OAAO,IAAA;AACxC,IAAA,MAAM,UAAA,GAAa,cAAA,CAAe,IAAA,CAAK,QAAQ,KAAK,IAAA,CAAK,QAAA;AACzD,IAAA,IAAI,UAAA,KAAe,IAAA,CAAK,QAAA,EAAU,OAAO,IAAA;AACzC,IAAA,OAAO,EAAE,GAAG,IAAA,EAAM,QAAA,EAAU,UAAA,EAAW;AAAA,EACzC,CAAC,CAAA;AACH;AAUA,SAAS,aAAA,CAAc,MAAwB,GAAA,EAA2C;AACxF,EAAA,IAAI,IAAA,CAAK,mBAAmB,KAAA,IAAS,CAAC,aAAa,GAAA,CAAI,IAAA,CAAK,IAAI,CAAA,EAAG,OAAO,KAAA;AAC1E,EAAA,OAAO,CAAC,GAAA,CAAI,IAAA;AAAA,IACV,CAAC,KAAA,KACC,KAAA,CAAM,QAAA,KAAa,IAAA,CAAK,QAAA,IACxB,KAAA,CAAM,QAAA,KAAa,SAAA,IACnB,CAAC,YAAA,CAAa,GAAA,CAAI,MAAM,IAAI;AAAA,GAChC;AACF;AAKA,SAAS,wBAAwB,QAAA,EAAsD;AACrF,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,IAAI,cAAA,GAAiB,CAAA;AACrB,EAAA,KAAA,MAAW,QAAQ,QAAA,EAAU;AAC3B,IAAA,IAAI,IAAA,CAAK,aAAa,SAAA,IAAa,CAAC,aAAa,GAAA,CAAI,IAAA,CAAK,IAAI,CAAA,EAAG;AACjE,IAAA,MAAM,CAAA,GAAI,OAAA,CAAQ,IAAA,CAAK,UAAU,CAAA;AACjC,IAAA,IAAI,IAAI,uBAAA,EAAyB;AACjC,IAAA,IAAI,IAAI,cAAA,EAAgB;AACtB,MAAA,cAAA,GAAiB,CAAA;AACjB,MAAA,IAAA,GAAO,IAAA,CAAK,QAAA;AAAA,IACd,CAAA,MAAA,IAAW,CAAA,KAAM,cAAA,IAAkB,IAAA,CAAK,aAAa,IAAA,EAAM;AAEzD,MAAA,IAAA,GAAO,IAAA;AAAA,IACT;AAAA,EACF;AACA,EAAA,OAAO,IAAA;AACT;AAQA,SAAS,MAAA,CACP,QACA,MAAA,EACiE;AACjE,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,IAAI,SAAA,GAAY,CAAA;AAChB,EAAA,IAAI,WAAA,GAAc,CAAA;AAClB,EAAA,MAAM,cAAc,MAAA,KAAW,IAAA,GAAQ,OAAO,GAAA,CAAI,MAAM,KAAK,CAAA,GAAK,CAAA;AAElE,EAAA,KAAA,MAAW,CAAC,QAAA,EAAU,GAAG,CAAA,IAAK,MAAA,EAAQ;AAEpC,IAAA,MAAM,KAAA,GAAQ,WAAW,IAAA,IAAQ,QAAA,KAAa,SAAS,IAAA,CAAK,GAAA,CAAI,GAAA,EAAK,WAAW,CAAA,GAAI,GAAA;AACpF,IAAA,IAAI,QAAQ,SAAA,EAAW;AACrB,MAAA,WAAA,GAAc,SAAA;AACd,MAAA,SAAA,GAAY,KAAA;AACZ,MAAA,IAAA,GAAO,QAAA;AAAA,IACT,CAAA,MAAA,IAAW,QAAQ,WAAA,EAAa;AAC9B,MAAA,WAAA,GAAc,KAAA;AAAA,IAChB;AAAA,EACF;AAEA,EAAA,IAAI,WAAW,IAAA,IAAQ,IAAA,KAAS,UAAU,SAAA,KAAc,WAAA,IAAe,cAAc,CAAA,EAAG;AACtF,IAAA,WAAA,GAAc,SAAA;AACd,IAAA,IAAA,GAAO,MAAA;AACP,IAAA,SAAA,GAAY,WAAA;AAAA,EACd;AACA,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,WAAA,EAAY;AACxC;AAEA,SAAS,QAAQ,KAAA,EAAuB;AACtC,EAAA,IAAI,CAAC,MAAA,CAAO,QAAA,CAAS,KAAK,GAAG,OAAO,CAAA;AACpC,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,OAAO,KAAA;AACT","file":"chunk-7G3MEXWK.js","sourcesContent":["import type {\n Classification,\n LanguageEvidence,\n LanguageProfile,\n NonDiscriminatingScript,\n Weights,\n} from \"./types.js\";\nimport { normalizeBCP47 } from \"./internal/bcp47.js\";\n\nexport interface FuseOptions {\n weights?: Weights;\n /** The candidate roster. When present, incoming evidence tags are normalized\n * into it (`uk-UA` → `uk`, `ua` → `uk`) so context signals (page/header\n * locale) land on the same code the text rungs use. */\n candidates?: readonly LanguageProfile[];\n /** How to resolve a *non-discriminating* script read (one flagged\n * `discriminating: false` — its winning script owned by ≤1 roster candidate).\n * Default `\"candidate\"` keeps current behavior; `\"unknown\"` drops such a read\n * unless non-script evidence corroborates the same language. See\n * {@link NonDiscriminatingScript}. */\n nonDiscriminatingScript?: NonDiscriminatingScript;\n}\n\n/** Default per-kind weights. Clear lexical signal (script, explicit locale)\n * outweighs contextual signal (page tags, headers). Callers override per\n * `source` id or `kind` via {@link FuseOptions.weights}. */\nconst DEFAULT_KIND_WEIGHT: Record<string, number> = {\n \"title-script\": 1,\n \"explicit-locale\": 1,\n \"chrome-ai\": 1,\n \"source-prior\": 0.7,\n franc: 0.7,\n \"http-content-language\": 0.6,\n \"meta-content-language\": 0.55,\n \"meta-og-locale\": 0.55,\n \"html-lang\": 0.5,\n};\n\n/** Evidence kinds that constitute *clear script evidence* — a verdict the text\n * classifier or an on-device model reached by actually reading the string. The\n * guard below forbids weaker page/header *context* from flipping these. */\nconst SCRIPT_KINDS = new Set<string>([\"title-script\", \"franc\", \"chrome-ai\"]);\n\n/** A script verdict this confident is treated as settled — context may add to it\n * but must not flip the winner to a different language. */\nconst SCRIPT_CONFIDENCE_FLOOR = 0.6;\n\nconst MIN_WINNING_SCORE = 0.35;\nconst MIN_MARGIN = 0.12;\n\n/**\n * Combine evidence into a single weighted verdict with an audit trail.\n *\n * Three steps:\n * 1. Normalize each item's language tag into the candidate roster (BCP-47:\n * `uk-UA`/`ua` → `uk`) so text, page, and header signals agree on a code.\n * 2. Weighted argmax over languages (caller weights override per `source`/`kind`).\n * 3. Apply the guard **context must never override clear script evidence**: when\n * the text classifier (or an on-device model) confidently read one language,\n * weaker page/header context for a *different* language cannot win — a\n * Ukrainian page chrome does not make a Latin/English title Ukrainian.\n */\nexport function fuse(\n evidence: readonly LanguageEvidence[],\n options: FuseOptions = {},\n): Classification {\n const weights = options.weights ?? {};\n const normalized = normalizeEvidence(evidence, options.candidates);\n\n // Under `\"unknown\"`, a non-discriminating script read scores nothing on its own\n // — it's dropped from the tally and the pin below — but stays in the trail. The\n // full `normalized` set is still returned as evidence.\n const scoring =\n options.nonDiscriminatingScript === \"unknown\"\n ? normalized.filter((item) => !isNeutralized(item, normalized))\n : normalized;\n\n const scores = new Map<string, number>();\n for (const item of scoring) {\n if (item.language === \"unknown\") continue;\n const weight =\n weights[item.source] ?? weights[item.kind] ?? DEFAULT_KIND_WEIGHT[item.kind] ?? 0.5;\n scores.set(item.language, (scores.get(item.language) ?? 0) + clamp01(item.confidence) * weight);\n }\n\n // The context-vs-script guard: a confident script read pins the winner.\n const pinned = confidentScriptLanguage(scoring);\n\n const { best, bestScore, secondScore } = argmax(scores, pinned);\n\n if (best === null || bestScore < MIN_WINNING_SCORE || bestScore - secondScore < MIN_MARGIN) {\n // A pinned script language still wins even on a thin margin — clear script\n // evidence is never demoted to \"unknown\" by competing context.\n if (pinned !== null && scores.has(pinned)) {\n const score = scores.get(pinned) ?? 0;\n return {\n language: pinned,\n confidence: clamp01(score / (score + 0.15)),\n evidence: [...normalized],\n };\n }\n return { language: \"unknown\", confidence: clamp01(bestScore), evidence: [...normalized] };\n }\n\n return {\n language: best,\n confidence: clamp01(bestScore / (bestScore + secondScore + 0.15)),\n evidence: [...normalized],\n };\n}\n\n/** Normalize each item's tag into the roster's code space (BCP-47-aware). Items\n * already `\"unknown\"` pass through untouched. Tags are BCP-47-normalized\n * (`en-US` → `en`, `ua` → `uk`) so text, page, and header signals land on the\n * same code. The normalized code is kept even when it falls outside the roster —\n * argmax simply won't favor an out-of-roster context tag, but it stays in the\n * audit trail.\n *\n * The roster is accepted (and reserved) so a future revision can fold roster\n * aliasing in without a signature change; today BCP-47 normalization alone\n * reconciles the codes the producers emit. */\nfunction normalizeEvidence(\n evidence: readonly LanguageEvidence[],\n _candidates: readonly LanguageProfile[] | undefined,\n): LanguageEvidence[] {\n return evidence.map((item) => {\n if (item.language === \"unknown\") return item;\n const normalized = normalizeBCP47(item.language) ?? item.language;\n if (normalized === item.language) return item;\n return { ...item, language: normalized };\n });\n}\n\n/**\n * Whether a non-discriminating script read should score nothing (mode\n * `\"unknown\"`). True when `item` is a script kind flagged `discriminating:\n * false` (its winning script is owned by ≤1 roster candidate) AND no *non-script*\n * evidence corroborates its language. Corroboration must come from context kinds\n * (page tags, headers): two lone-candidate script reads agreeing is still two\n * defaults, not real evidence — so script kinds never corroborate one another.\n */\nfunction isNeutralized(item: LanguageEvidence, all: readonly LanguageEvidence[]): boolean {\n if (item.discriminating !== false || !SCRIPT_KINDS.has(item.kind)) return false;\n return !all.some(\n (other) =>\n other.language === item.language &&\n other.language !== \"unknown\" &&\n !SCRIPT_KINDS.has(other.kind),\n );\n}\n\n/** The language of a *clear script* read confident enough to pin the verdict, or\n * `null` when none qualifies. When two script reads disagree, the higher-\n * confidence one pins (a tie leaves nothing pinned — argmax decides normally). */\nfunction confidentScriptLanguage(evidence: readonly LanguageEvidence[]): string | null {\n let best: string | null = null;\n let bestConfidence = 0;\n for (const item of evidence) {\n if (item.language === \"unknown\" || !SCRIPT_KINDS.has(item.kind)) continue;\n const c = clamp01(item.confidence);\n if (c < SCRIPT_CONFIDENCE_FLOOR) continue;\n if (c > bestConfidence) {\n bestConfidence = c;\n best = item.language;\n } else if (c === bestConfidence && item.language !== best) {\n // Two equally-confident script reads for different languages — ambiguous.\n best = null;\n }\n }\n return best;\n}\n\n/**\n * Weighted argmax. When `pinned` is set (a confident script language), any\n * *other* language's score may only come from context kinds; that score is\n * capped so it can never exceed the pinned language. This enforces the guard\n * without discarding the context from the audit trail.\n */\nfunction argmax(\n scores: Map<string, number>,\n pinned: string | null,\n): { best: string | null; bestScore: number; secondScore: number } {\n let best: string | null = null;\n let bestScore = 0;\n let secondScore = 0;\n const pinnedScore = pinned !== null ? (scores.get(pinned) ?? 0) : 0;\n\n for (const [language, raw] of scores) {\n // Guard: a non-pinned language cannot out-score the pinned one.\n const score = pinned !== null && language !== pinned ? Math.min(raw, pinnedScore) : raw;\n if (score > bestScore) {\n secondScore = bestScore;\n bestScore = score;\n best = language;\n } else if (score > secondScore) {\n secondScore = score;\n }\n }\n // On a pinned tie (pinned capped equal to a context language), prefer pinned.\n if (pinned !== null && best !== pinned && bestScore === pinnedScore && pinnedScore > 0) {\n secondScore = bestScore;\n best = pinned;\n bestScore = pinnedScore;\n }\n return { best, bestScore, secondScore };\n}\n\nfunction clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0;\n if (value < 0) return 0;\n if (value > 1) return 1;\n return value;\n}\n"]}
@@ -1,5 +1,11 @@
1
1
  // src/internal/classify.ts
2
- var UNKNOWN = { language: "unknown", margin: 0, rung: null };
2
+ var FRANC_RUNG = 3;
3
+ var UNKNOWN = {
4
+ language: "unknown",
5
+ margin: 0,
6
+ rung: null,
7
+ discriminating: false
8
+ };
3
9
  var CYRILLIC_RE = /\p{Script=Cyrillic}/u;
4
10
  var LATIN_RE = /\p{Script=Latin}/u;
5
11
  var NOISE_PATTERNS = [
@@ -111,13 +117,15 @@ function classifyBySnippet(text, candidates, rung3) {
111
117
  const cleaned = stripNoise(text);
112
118
  const scoped = scopeCandidates(cleaned, candidates);
113
119
  if (scoped.length === 0) return UNKNOWN;
120
+ const discriminating = scoped.length >= 2;
114
121
  const byLetter = letterRung(cleaned, scoped);
115
- if (byLetter) return byLetter;
122
+ if (byLetter) return { ...byLetter, discriminating };
116
123
  const tokens = tokenize(cleaned);
117
124
  if (tokens.length === 0) return UNKNOWN;
118
- return wordRung(tokens, scoped, "function", "2a") ?? wordRung(tokens, scoped, "frequent", "2b") ?? rung3?.(cleaned, scoped) ?? UNKNOWN;
125
+ const byWord = wordRung(tokens, scoped, "function", "2a") ?? wordRung(tokens, scoped, "frequent", "2b") ?? rung3?.(cleaned, scoped);
126
+ return byWord ? { ...byWord, discriminating } : UNKNOWN;
119
127
  }
120
128
 
121
- export { classifyBySnippet, scopeCandidates };
122
- //# sourceMappingURL=chunk-RFR5I7P7.js.map
123
- //# sourceMappingURL=chunk-RFR5I7P7.js.map
129
+ export { FRANC_RUNG, classifyBySnippet, scopeCandidates };
130
+ //# sourceMappingURL=chunk-NCGZPEDA.js.map
131
+ //# sourceMappingURL=chunk-NCGZPEDA.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/internal/classify.ts"],"names":[],"mappings":";AA0BO,IAAM,UAAA,GAAa;AA0B1B,IAAM,OAAA,GAA0B;AAAA,EAC9B,QAAA,EAAU,SAAA;AAAA,EACV,MAAA,EAAQ,CAAA;AAAA,EACR,IAAA,EAAM,IAAA;AAAA,EACN,cAAA,EAAgB;AAClB,CAAA;AAYA,IAAM,WAAA,GAAc,sBAAA;AACpB,IAAM,QAAA,GAAW,mBAAA;AAkBjB,IAAM,cAAA,GAAoC;AAAA,EACxC,oBAAA;AAAA;AAAA,EACA,cAAA;AAAA;AAAA,EACA,2CAAA;AAAA;AAAA,EACA;AAAA;AACF,CAAA;AAIO,SAAS,WAAW,IAAA,EAAsB;AAC/C,EAAA,IAAI,GAAA,GAAM,IAAA;AACV,EAAA,KAAA,MAAW,MAAM,cAAA,EAAgB,GAAA,GAAM,GAAA,CAAI,OAAA,CAAQ,IAAI,GAAG,CAAA;AAC1D,EAAA,OAAO,GAAA;AACT;AAKA,SAAS,eAAe,IAAA,EAA2C;AACjE,EAAA,IAAI,GAAA,GAAM,CAAA;AACV,EAAA,IAAI,GAAA,GAAM,CAAA;AACV,EAAA,KAAA,MAAW,EAAA,IAAM,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,IAAA,IAAI,WAAA,CAAY,IAAA,CAAK,EAAE,CAAA,EAAG,GAAA,IAAO,CAAA;AAAA,SAAA,IACxB,QAAA,CAAS,IAAA,CAAK,EAAE,CAAA,EAAG,GAAA,IAAO,CAAA;AAAA,EACrC;AACA,EAAA,IAAI,GAAA,KAAQ,CAAA,IAAK,GAAA,KAAQ,CAAA,EAAG,OAAO,IAAA;AACnC,EAAA,OAAO,GAAA,IAAO,MAAM,UAAA,GAAa,OAAA;AACnC;AAGA,SAAS,cAAc,OAAA,EAAuD;AAC5E,EAAA,KAAA,MAAW,EAAA,IAAM,QAAQ,QAAA,EAAU;AACjC,IAAA,IAAI,WAAA,CAAY,IAAA,CAAK,EAAE,CAAA,EAAG,OAAO,UAAA;AACjC,IAAA,IAAI,QAAA,CAAS,IAAA,CAAK,EAAE,CAAA,EAAG,OAAO,OAAA;AAAA,EAChC;AACA,EAAA,OAAO,IAAA;AACT;AAIO,SAAS,eAAA,CACd,MACA,UAAA,EACmB;AACnB,EAAA,MAAM,MAAA,GAAS,eAAe,IAAI,CAAA;AAClC,EAAA,IAAI,MAAA,KAAW,IAAA,EAAM,OAAO,EAAC;AAI7B,EAAA,MAAM,IAAA,uBAAW,GAAA,EAAY;AAC7B,EAAA,MAAM,SAA4B,EAAC;AACnC,EAAA,KAAA,MAAW,KAAK,UAAA,EAAY;AAC1B,IAAA,IAAI,aAAA,CAAc,CAAC,CAAA,KAAM,MAAA,IAAU,KAAK,GAAA,CAAI,CAAA,CAAE,IAAI,CAAA,EAAG;AACrD,IAAA,IAAA,CAAK,GAAA,CAAI,EAAE,IAAI,CAAA;AACf,IAAA,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,EACf;AACA,EAAA,OAAO,MAAA;AACT;AA8BA,SAAS,SAAS,IAAA,EAAwB;AACxC,EAAA,OAAO,KAAK,WAAA,EAAY,CAAE,KAAA,CAAM,UAAU,KAAK,EAAC;AAClD;AAOA,SAAS,KAAA,CAAM,OAAyB,UAAA,EAAwD;AAC9F,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAoB,UAAA,CAAW,GAAA,CAAI,CAAC,CAAA,KAAM,CAAC,CAAA,CAAE,IAAA,EAAM,CAAC,CAAC,CAAC,CAAA;AACzE,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,IAAA,IAAI,KAAA,GAAuB,IAAA;AAC3B,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,MAAW,KAAK,UAAA,EAAY;AAC1B,MAAA,IAAI,CAAA,CAAE,GAAA,CAAI,GAAA,CAAI,IAAI,CAAA,EAAG;AACnB,QAAA,MAAA,IAAU,CAAA;AACV,QAAA,IAAI,SAAS,CAAA,EAAG;AACd,UAAA,KAAA,GAAQ,IAAA;AACR,UAAA;AAAA,QACF;AACA,QAAA,KAAA,GAAQ,CAAA,CAAE,IAAA;AAAA,MACZ;AAAA,IACF;AACA,IAAA,IAAI,KAAA,KAAU,IAAA,EAAM,MAAA,CAAO,GAAA,CAAI,KAAA,EAAA,CAAQ,OAAO,GAAA,CAAI,KAAK,CAAA,IAAK,CAAA,IAAK,CAAC,CAAA;AAAA,EACpE;AACA,EAAA,OAAO,MAAA;AACT;AAGA,SAAS,OAAO,MAAA,EAAsE;AACpF,EAAA,IAAI,GAAA,GAAM,EAAA;AACV,EAAA,IAAI,MAAA,GAAS,EAAA;AACb,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,KAAA,MAAW,CAAC,CAAA,EAAG,KAAK,CAAA,IAAK,MAAA,EAAQ;AAC/B,IAAA,IAAI,QAAQ,GAAA,EAAK;AACf,MAAA,MAAA,GAAS,GAAA;AACT,MAAA,GAAA,GAAM,KAAA;AACN,MAAA,IAAA,GAAO,CAAA;AAAA,IACT,CAAA,MAAA,IAAW,QAAQ,MAAA,EAAQ;AACzB,MAAA,MAAA,GAAS,KAAA;AAAA,IACX;AAAA,EACF;AACA,EAAA,IAAI,IAAA,KAAS,IAAA,IAAQ,GAAA,GAAM,CAAA,EAAG,OAAO,IAAA;AACrC,EAAA,MAAM,MAAA,GAAS,GAAA,GAAM,IAAA,CAAK,GAAA,CAAI,QAAQ,CAAC,CAAA;AACvC,EAAA,OAAO,MAAA,IAAU,CAAA,GAAI,EAAE,IAAA,EAAM,QAAO,GAAI,IAAA;AAC1C;AAEA,SAAS,aAAA,CACP,YACA,IAAA,EACc;AACd,EAAA,OAAO,UAAA,CAAW,GAAA,CAAI,CAAC,CAAA,MAAO,EAAE,IAAA,EAAM,CAAA,CAAE,IAAA,EAAM,GAAA,EAAK,IAAI,GAAA,CAAI,IAAA,CAAK,CAAC,CAAC,GAAE,CAAE,CAAA;AACxE;AAIA,SAAS,UAAA,CAAW,MAAc,MAAA,EAAwD;AACxF,EAAA,MAAM,CAAA,GAAI,MAAA;AAAA,IACR,KAAA;AAAA,MACE,KAAK,WAAA,EAAY;AAAA,MACjB,aAAA,CAAc,QAAQ,CAAC,CAAA,KAAM,EAAE,QAAA,IAAY,CAAA,CAAE,SAAS,EAAA,CAAG;AAAA;AAC3D,GACF;AACA,EAAA,OAAO,CAAA,GAAI,EAAE,QAAA,EAAU,CAAA,CAAE,IAAA,EAAM,QAAQ,CAAA,CAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAE,GAAI,IAAA;AAC/D;AAGA,SAAS,QAAA,CACP,MAAA,EACA,MAAA,EACA,IAAA,EACA,IAAA,EACoB;AACpB,EAAA,MAAM,CAAA,GAAI,MAAA;AAAA,IACR,KAAA;AAAA,MACE,MAAA;AAAA,MACA,aAAA,CAAc,QAAQ,CAAC,CAAA,KAAM,EAAE,KAAA,GAAQ,IAAI,CAAA,IAAK,EAAE;AAAA;AACpD,GACF;AACA,EAAA,OAAO,CAAA,GAAI,EAAE,QAAA,EAAU,CAAA,CAAE,MAAM,MAAA,EAAQ,CAAA,CAAE,MAAA,EAAQ,IAAA,EAAK,GAAI,IAAA;AAC5D;AAOO,SAAS,iBAAA,CACd,IAAA,EACA,UAAA,EACA,KAAA,EACgB;AAChB,EAAA,IAAI,CAAC,IAAA,IAAQ,UAAA,CAAW,MAAA,KAAW,GAAG,OAAO,OAAA;AAI7C,EAAA,MAAM,OAAA,GAAU,WAAW,IAAI,CAAA;AAG/B,EAAA,MAAM,MAAA,GAAS,eAAA,CAAgB,OAAA,EAAS,UAAU,CAAA;AAClD,EAAA,IAAI,MAAA,CAAO,MAAA,KAAW,CAAA,EAAG,OAAO,OAAA;AAKhC,EAAA,MAAM,cAAA,GAAiB,OAAO,MAAA,IAAU,CAAA;AAExC,EAAA,MAAM,QAAA,GAAW,UAAA,CAAW,OAAA,EAAS,MAAM,CAAA;AAC3C,EAAA,IAAI,QAAA,EAAU,OAAO,EAAE,GAAG,UAAU,cAAA,EAAe;AAEnD,EAAA,MAAM,MAAA,GAAS,SAAS,OAAO,CAAA;AAC/B,EAAA,IAAI,MAAA,CAAO,MAAA,KAAW,CAAA,EAAG,OAAO,OAAA;AAEhC,EAAA,MAAM,MAAA,GACJ,QAAA,CAAS,MAAA,EAAQ,MAAA,EAAQ,YAAY,IAAI,CAAA,IACzC,QAAA,CAAS,MAAA,EAAQ,QAAQ,UAAA,EAAY,IAAI,CAAA,IACzC,KAAA,GAAQ,SAAS,MAAM,CAAA;AACzB,EAAA,OAAO,MAAA,GAAS,EAAE,GAAG,MAAA,EAAQ,gBAAe,GAAI,OAAA;AAClD","file":"chunk-NCGZPEDA.js","sourcesContent":["/**\n * Per-snippet language classification by candidate-set-relative set-difference.\n *\n * A ladder of rungs; the first rung whose leader clears a lead (margin) of ≥1\n * wins; otherwise `\"unknown\"`:\n *\n * 1 alphabet — characters distinctive within the candidate set\n * 2a function words — curated grammatical markers (highest precision)\n * 2b frequent words — corpus content words\n * 3 franc — optional trigram backstop for the distinctive-free\n * residual, injected as a resolver (this module stays\n * franc-free and importable without franc's tables)\n *\n * \"Distinctive\" is ALWAYS relative to the candidate set: a signal counts for a\n * candidate iff it appears in that candidate's profile and in NO other\n * candidate's. So `і` decides {uk, ru} (only uk has it) but is inert in\n * {uk, be} (both have it), and the word `и` decides {uk, ru} even though the\n * *letter* `и` is shared. Nothing is precomputed — uniqueness is the runtime\n * output, never stored.\n *\n * Adapted to langtell's {@link LanguageProfile} shape: the `words` and `iso6393`\n * fields are optional here, so a bare `{ code, alphabet }` profile still\n * classifies on rung 1.\n */\nimport type { LanguageProfile } from \"../types.js\";\n\nexport const FRANC_RUNG = 3;\n\n/** Which rung decided a verdict; `null` when unknown. */\nexport type Rung = 1 | \"2a\" | \"2b\" | typeof FRANC_RUNG | null;\n\nexport interface SnippetVerdict {\n /** Winning language code, or the sentinel `\"unknown\"`. */\n language: string;\n /** Lead of the winner over the runner-up, in the rung's own unit (distinctive\n * char/word count for rungs 1–2; franc score-gap for rung 3). 0 when unknown. */\n margin: number;\n /** Which rung decided; `null` when unknown. */\n rung: Rung;\n /** Whether ≥2 same-script candidates were in scope when the verdict was\n * reached. `true` ⇒ the distinctive-letter/word machinery actually chose\n * between candidates; `false` ⇒ the winner was the lone candidate in its\n * script, selected by script alone (no evidence it is *distinctively* that\n * language). `false` for `\"unknown\"`. */\n discriminating: boolean;\n}\n\n/** A rung's verdict before {@link classifyBySnippet} stamps on the scope-derived\n * `discriminating` flag (which a single rung can't know — it depends on how many\n * same-script candidates were scoped). */\nexport type RungVerdict = Pick<SnippetVerdict, \"language\" | \"margin\" | \"rung\">;\n\nconst UNKNOWN: SnippetVerdict = {\n language: \"unknown\",\n margin: 0,\n rung: null,\n discriminating: false,\n};\n\n/** Resolver for rung 3 (the optional trigram backstop), injected into\n * {@link classifyBySnippet} by callers that have franc available. Kept as an\n * injected seam — not a direct import — so this module stays franc-free and\n * importable without pulling franc's tables. Returns a rung-3 verdict or\n * `null` (abstain). */\nexport type Rung3Resolver = (\n text: string,\n scoped: readonly LanguageProfile[],\n) => RungVerdict | null;\n\nconst CYRILLIC_RE = /\\p{Script=Cyrillic}/u;\nconst LATIN_RE = /\\p{Script=Latin}/u;\n\n/** Below this length, trigrams are too noisy to justify a rung-3 verdict. */\nexport const RUNG3_MIN_LENGTH = 24;\n\n/**\n * Trailing/inline Latin \"noise\" tokens — URLs, @handles, #hashtags — that a\n * Cyrillic title commonly carries (a headline followed by a link or a social\n * handle). These are almost always Latin even on Cyrillic-language content, so\n * left in they can flip {@link dominantScript} to Latin and let genuinely\n * Cyrillic content scope to the wrong roster. Stripped before the script vote\n * AND before the rung tallies so the URL's letters never contribute either.\n *\n * Kept as separate simple patterns (applied in order — schemes/www before bare\n * domains) rather than one big alternation, so each stays readable. ASCII-only\n * `[a-z0-9-]` in the domain pattern means a Cyrillic word is never mistaken for\n * a domain.\n */\nconst NOISE_PATTERNS: readonly RegExp[] = [\n /\\bhttps?:\\/\\/\\S+/gi, // full URLs\n /\\bwww\\.\\S+/gi, // www.… without a scheme\n /\\b[a-z0-9-]+(?:\\.[a-z0-9-]+)+(?:\\/\\S*)?/gi, // bare domains (example.com/path)\n /[@#][\\p{L}\\p{N}_]+/gu, // @handles and #hashtags\n];\n\n/** Drop URLs / @handles / #hashtags so trailing Latin noise can't outvote the\n * prose's script or pollute the per-rung tallies. */\nexport function stripNoise(text: string): string {\n let out = text;\n for (const re of NOISE_PATTERNS) out = out.replace(re, \" \");\n return out;\n}\n\n/** The script most of `text` is written in, or `null` if it carries no letters.\n * Noise (URLs/handles/hashtags) is stripped first so a single trailing link\n * can't flip a multi-word Cyrillic title's vote to Latin. */\nfunction dominantScript(text: string): \"cyrillic\" | \"latin\" | null {\n let cyr = 0;\n let lat = 0;\n for (const ch of stripNoise(text)) {\n if (CYRILLIC_RE.test(ch)) cyr += 1;\n else if (LATIN_RE.test(ch)) lat += 1;\n }\n if (cyr === 0 && lat === 0) return null;\n return cyr >= lat ? \"cyrillic\" : \"latin\";\n}\n\n/** The script of a profile's alphabet. */\nfunction profileScript(profile: LanguageProfile): \"cyrillic\" | \"latin\" | null {\n for (const ch of profile.alphabet) {\n if (CYRILLIC_RE.test(ch)) return \"cyrillic\";\n if (LATIN_RE.test(ch)) return \"latin\";\n }\n return null;\n}\n\n/** Candidates whose script matches the text's dominant script (others can't tip\n * the verdict). Empty when the text carries no letters. */\nexport function scopeCandidates(\n text: string,\n candidates: readonly LanguageProfile[],\n): LanguageProfile[] {\n const script = dominantScript(text);\n if (script === null) return [];\n // Keep one profile per code. A language listed twice would otherwise make its\n // own distinctive chars/words read as \"owned by ≥2 candidates\" in `tally`,\n // cancelling them out and collapsing the verdict to \"unknown\".\n const seen = new Set<string>();\n const scoped: LanguageProfile[] = [];\n for (const c of candidates) {\n if (profileScript(c) !== script || seen.has(c.code)) continue;\n seen.add(c.code);\n scoped.push(c);\n }\n return scoped;\n}\n\n/**\n * Per-language set of characters globally unique within `profiles` — present in\n * exactly one profile's alphabet. Relative to the given profile set: the unique\n * set shrinks as languages are added (a second Latin language un-uniques a–z).\n */\nexport function distinctiveChars(profiles: readonly LanguageProfile[]): Map<string, Set<string>> {\n const owners = new Map<string, string[]>();\n for (const p of profiles) {\n for (const ch of new Set(p.alphabet)) {\n const list = owners.get(ch);\n if (list) list.push(p.code);\n else owners.set(ch, [p.code]);\n }\n }\n const result = new Map<string, Set<string>>(profiles.map((p) => [p.code, new Set()]));\n for (const [ch, codes] of owners) {\n const [only] = codes;\n if (codes.length === 1 && only !== undefined) result.get(only)?.add(ch);\n }\n return result;\n}\n\ninterface Membership {\n code: string;\n set: ReadonlySet<string>;\n}\n\n/** Lowercased Unicode letter-run tokens. Keeps single-char tokens (`і`, `и`). */\nfunction tokenize(text: string): string[] {\n return text.toLowerCase().match(/\\p{L}+/gu) ?? [];\n}\n\n/**\n * Tally how many items (characters or word tokens) are distinctive to each\n * candidate — present in exactly one candidate's set. Items owned by zero or by\n * ≥2 candidates contribute nothing.\n */\nfunction tally(items: Iterable<string>, membership: readonly Membership[]): Map<string, number> {\n const scores = new Map<string, number>(membership.map((m) => [m.code, 0]));\n for (const item of items) {\n let owner: string | null = null;\n let owners = 0;\n for (const m of membership) {\n if (m.set.has(item)) {\n owners += 1;\n if (owners > 1) {\n owner = null;\n break;\n }\n owner = m.code;\n }\n }\n if (owner !== null) scores.set(owner, (scores.get(owner) ?? 0) + 1);\n }\n return scores;\n}\n\n/** The leading candidate and its lead over the runner-up, or `null` if <1. */\nfunction leader(scores: Map<string, number>): { code: string; margin: number } | null {\n let max = -1;\n let second = -1;\n let code: string | null = null;\n for (const [c, score] of scores) {\n if (score > max) {\n second = max;\n max = score;\n code = c;\n } else if (score > second) {\n second = score;\n }\n }\n if (code === null || max < 1) return null;\n const margin = max - Math.max(second, 0);\n return margin >= 1 ? { code, margin } : null;\n}\n\nfunction membershipFor(\n candidates: readonly LanguageProfile[],\n pick: (p: LanguageProfile) => Iterable<string>,\n): Membership[] {\n return candidates.map((c) => ({ code: c.code, set: new Set(pick(c)) }));\n}\n\n/** Rung 1 — characters (alphabet + orthographic {@link LanguageProfile.marks})\n * distinctive within the scoped candidate set. */\nfunction letterRung(text: string, scoped: readonly LanguageProfile[]): RungVerdict | null {\n const r = leader(\n tally(\n text.toLowerCase(),\n membershipFor(scoped, (p) => p.alphabet + (p.marks ?? \"\")),\n ),\n );\n return r ? { language: r.code, margin: r.margin, rung: 1 } : null;\n}\n\n/** Rung 2 — distinctive words from the given tier (2a function, 2b frequent). */\nfunction wordRung(\n tokens: readonly string[],\n scoped: readonly LanguageProfile[],\n tier: \"function\" | \"frequent\",\n rung: \"2a\" | \"2b\",\n): RungVerdict | null {\n const r = leader(\n tally(\n tokens,\n membershipFor(scoped, (p) => p.words?.[tier] ?? []),\n ),\n );\n return r ? { language: r.code, margin: r.margin, rung } : null;\n}\n\n/**\n * Classify `text` among `candidates`. Synchronous and allocation-light. Returns\n * `\"unknown\"` on empty evidence, on a tie inside the candidate set, or when\n * nothing is distinctive.\n */\nexport function classifyBySnippet(\n text: string,\n candidates: readonly LanguageProfile[],\n rung3?: Rung3Resolver,\n): SnippetVerdict {\n if (!text || candidates.length === 0) return UNKNOWN;\n\n // Drop URLs / @handles / #hashtags once, up front: trailing Latin noise must\n // not flip the dominant-script vote nor pollute the per-rung tallies.\n const cleaned = stripNoise(text);\n\n // Restrict to candidates in the text's dominant script.\n const scoped = scopeCandidates(cleaned, candidates);\n if (scoped.length === 0) return UNKNOWN;\n\n // ≥2 same-script candidates means the distinctive machinery actually had a\n // choice to make; a lone scoped candidate wins by script alone. Stamped onto\n // whichever rung decides — a single rung can't see the scope size.\n const discriminating = scoped.length >= 2;\n\n const byLetter = letterRung(cleaned, scoped);\n if (byLetter) return { ...byLetter, discriminating };\n\n const tokens = tokenize(cleaned);\n if (tokens.length === 0) return UNKNOWN;\n\n const byWord =\n wordRung(tokens, scoped, \"function\", \"2a\") ??\n wordRung(tokens, scoped, \"frequent\", \"2b\") ??\n rung3?.(cleaned, scoped);\n return byWord ? { ...byWord, discriminating } : UNKNOWN;\n}\n"]}
@@ -1,4 +1,4 @@
1
- import { classifyBySnippet } from './chunk-RFR5I7P7.js';
1
+ import { classifyBySnippet } from './chunk-NCGZPEDA.js';
2
2
 
3
3
  // src/text.ts
4
4
  function evidenceFromText(text, candidates, rung3) {
@@ -6,15 +6,15 @@ function evidenceFromText(text, candidates, rung3) {
6
6
  if (candidates === void 0 || candidates.length === 0) return [];
7
7
  const verdict = classifyBySnippet(text, candidates, rung3);
8
8
  if (verdict.language === "unknown") return [];
9
- return [
10
- {
11
- kind: "title-script",
12
- language: verdict.language,
13
- confidence: marginToConfidence(verdict.margin, verdict.rung),
14
- source: "title-script",
15
- value: text.trim().slice(0, 80)
16
- }
17
- ];
9
+ const item = {
10
+ kind: "title-script",
11
+ language: verdict.language,
12
+ confidence: marginToConfidence(verdict.margin, verdict.rung),
13
+ source: "title-script",
14
+ value: text.trim().slice(0, 80)
15
+ };
16
+ if (!verdict.discriminating) item.discriminating = false;
17
+ return [item];
18
18
  }
19
19
  function marginToConfidence(margin, rung) {
20
20
  if (rung === 3) {
@@ -31,5 +31,5 @@ function clamp01(value) {
31
31
  }
32
32
 
33
33
  export { evidenceFromText };
34
- //# sourceMappingURL=chunk-3SO2WI75.js.map
35
- //# sourceMappingURL=chunk-3SO2WI75.js.map
34
+ //# sourceMappingURL=chunk-PT7R2BRQ.js.map
35
+ //# sourceMappingURL=chunk-PT7R2BRQ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/text.ts"],"names":[],"mappings":";;;AAmBO,SAAS,gBAAA,CACd,IAAA,EACA,UAAA,EACA,KAAA,EACoB;AACpB,EAAA,IAAI,IAAA,KAAS,UAAa,IAAA,CAAK,IAAA,GAAO,MAAA,KAAW,CAAA,SAAU,EAAC;AAC5D,EAAA,IAAI,eAAe,MAAA,IAAa,UAAA,CAAW,MAAA,KAAW,CAAA,SAAU,EAAC;AAEjE,EAAA,MAAM,OAAA,GAAU,iBAAA,CAAkB,IAAA,EAAM,UAAA,EAAY,KAAK,CAAA;AACzD,EAAA,IAAI,OAAA,CAAQ,QAAA,KAAa,SAAA,EAAW,OAAO,EAAC;AAE5C,EAAA,MAAM,IAAA,GAAyB;AAAA,IAC7B,IAAA,EAAM,cAAA;AAAA,IACN,UAAU,OAAA,CAAQ,QAAA;AAAA,IAClB,UAAA,EAAY,kBAAA,CAAmB,OAAA,CAAQ,MAAA,EAAQ,QAAQ,IAAI,CAAA;AAAA,IAC3D,MAAA,EAAQ,cAAA;AAAA,IACR,OAAO,IAAA,CAAK,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,EAAE;AAAA,GAChC;AAIA,EAAA,IAAI,CAAC,OAAA,CAAQ,cAAA,EAAgB,IAAA,CAAK,cAAA,GAAiB,KAAA;AACnD,EAAA,OAAO,CAAC,IAAI,CAAA;AACd;AAWA,SAAS,kBAAA,CAAmB,QAAgB,IAAA,EAAoB;AAC9D,EAAA,IAAI,SAAS,CAAA,EAAG;AAEd,IAAA,OAAO,OAAA,CAAQ,GAAA,GAAM,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,GAAA,CAAI,MAAA,EAAQ,CAAC,CAAA,EAAG,CAAC,CAAA,GAAI,IAAI,CAAA;AAAA,EAC9D;AACA,EAAA,MAAM,IAAA,GAAO,IAAA,CAAK,GAAA,CAAI,MAAA,EAAQ,CAAC,CAAA;AAC/B,EAAA,OAAO,OAAA,CAAQ,MAAO,IAAA,CAAK,GAAA,CAAI,MAAM,CAAC,CAAA,GAAI,IAAK,IAAI,CAAA;AACrD;AAEA,SAAS,QAAQ,KAAA,EAAuB;AACtC,EAAA,IAAI,CAAC,MAAA,CAAO,QAAA,CAAS,KAAK,GAAG,OAAO,CAAA;AACpC,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,OAAO,KAAA;AACT","file":"chunk-PT7R2BRQ.js","sourcesContent":["import type { LanguageEvidence, LanguageProfile } from \"./types.js\";\nimport { classifyBySnippet, type Rung, type Rung3Resolver } from \"./internal/classify.js\";\n\n/**\n * Producer: candidate-relative script + lexical signals from the title text.\n *\n * Wraps the ported snippet classifier ({@link classifyBySnippet}): noise strip →\n * dominant-script scope → distinctive letters (rung 1) → function words (2a) →\n * frequent words (2b). The `candidates` roster makes scoring roster-relative —\n * `і` decides Ukrainian only when Russian is also a candidate. Sync and\n * zero-dependency; the optional franc rung is injected via `rung3`.\n *\n * Emits at most one `kind: \"title-script\"` evidence item. The classifier's\n * integer `margin` (the winner's lead over the runner-up) maps to a 0..1\n * `confidence`: a verdict at all means the dominant script and the deciding rung\n * agreed, so the floor is high; a wider lead nudges it up. With no candidates\n * (or no usable distinctive signal) it abstains — emitting nothing rather than a\n * coarse \"unknown\", since the roster decides relevance.\n */\nexport function evidenceFromText(\n text: string | undefined,\n candidates?: readonly LanguageProfile[],\n rung3?: Rung3Resolver,\n): LanguageEvidence[] {\n if (text === undefined || text.trim().length === 0) return [];\n if (candidates === undefined || candidates.length === 0) return [];\n\n const verdict = classifyBySnippet(text, candidates, rung3);\n if (verdict.language === \"unknown\") return [];\n\n const item: LanguageEvidence = {\n kind: \"title-script\",\n language: verdict.language,\n confidence: marginToConfidence(verdict.margin, verdict.rung),\n source: \"title-script\",\n value: text.trim().slice(0, 80),\n };\n // Surface only the meaningful negative: the script was owned by ≤1 candidate,\n // so it didn't choose between candidates. The discriminating case stays narrow\n // (flag omitted). `fuse({ nonDiscriminatingScript: \"unknown\" })` reads this.\n if (!verdict.discriminating) item.discriminating = false;\n return [item];\n}\n\n/**\n * Map the classifier's per-rung lead to a 0..1 confidence.\n *\n * Rungs 1–2 carry an integer count of distinctive items (≥1). A verdict already\n * means script + rung agreed, so the floor is high (0.6) and each extra\n * distinctive item adds up to a 0.35 bonus, saturating by a lead of 4. Rung 3\n * (franc) carries franc's own 0..1 score-gap, which is weaker evidence, so it is\n * scaled into a 0.4..0.75 band.\n */\nfunction marginToConfidence(margin: number, rung: Rung): number {\n if (rung === 3) {\n // franc score-gap is already 0..1; weaker than the distinctive rungs.\n return clamp01(0.4 + Math.min(Math.max(margin, 0), 1) * 0.35);\n }\n const lead = Math.max(margin, 1);\n return clamp01(0.6 + (Math.min(lead, 4) / 4) * 0.35);\n}\n\nfunction clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0;\n if (value < 0) return 0;\n if (value > 1) return 1;\n return value;\n}\n"]}
@@ -0,0 +1,63 @@
1
+ import { L as LanguageProfile } from './types-BIXrkuAr.js';
2
+
3
+ /**
4
+ * Per-snippet language classification by candidate-set-relative set-difference.
5
+ *
6
+ * A ladder of rungs; the first rung whose leader clears a lead (margin) of ≥1
7
+ * wins; otherwise `"unknown"`:
8
+ *
9
+ * 1 alphabet — characters distinctive within the candidate set
10
+ * 2a function words — curated grammatical markers (highest precision)
11
+ * 2b frequent words — corpus content words
12
+ * 3 franc — optional trigram backstop for the distinctive-free
13
+ * residual, injected as a resolver (this module stays
14
+ * franc-free and importable without franc's tables)
15
+ *
16
+ * "Distinctive" is ALWAYS relative to the candidate set: a signal counts for a
17
+ * candidate iff it appears in that candidate's profile and in NO other
18
+ * candidate's. So `і` decides {uk, ru} (only uk has it) but is inert in
19
+ * {uk, be} (both have it), and the word `и` decides {uk, ru} even though the
20
+ * *letter* `и` is shared. Nothing is precomputed — uniqueness is the runtime
21
+ * output, never stored.
22
+ *
23
+ * Adapted to langtell's {@link LanguageProfile} shape: the `words` and `iso6393`
24
+ * fields are optional here, so a bare `{ code, alphabet }` profile still
25
+ * classifies on rung 1.
26
+ */
27
+
28
+ declare const FRANC_RUNG = 3;
29
+ /** Which rung decided a verdict; `null` when unknown. */
30
+ type Rung = 1 | "2a" | "2b" | typeof FRANC_RUNG | null;
31
+ interface SnippetVerdict {
32
+ /** Winning language code, or the sentinel `"unknown"`. */
33
+ language: string;
34
+ /** Lead of the winner over the runner-up, in the rung's own unit (distinctive
35
+ * char/word count for rungs 1–2; franc score-gap for rung 3). 0 when unknown. */
36
+ margin: number;
37
+ /** Which rung decided; `null` when unknown. */
38
+ rung: Rung;
39
+ /** Whether ≥2 same-script candidates were in scope when the verdict was
40
+ * reached. `true` ⇒ the distinctive-letter/word machinery actually chose
41
+ * between candidates; `false` ⇒ the winner was the lone candidate in its
42
+ * script, selected by script alone (no evidence it is *distinctively* that
43
+ * language). `false` for `"unknown"`. */
44
+ discriminating: boolean;
45
+ }
46
+ /** A rung's verdict before {@link classifyBySnippet} stamps on the scope-derived
47
+ * `discriminating` flag (which a single rung can't know — it depends on how many
48
+ * same-script candidates were scoped). */
49
+ type RungVerdict = Pick<SnippetVerdict, "language" | "margin" | "rung">;
50
+ /** Resolver for rung 3 (the optional trigram backstop), injected into
51
+ * {@link classifyBySnippet} by callers that have franc available. Kept as an
52
+ * injected seam — not a direct import — so this module stays franc-free and
53
+ * importable without pulling franc's tables. Returns a rung-3 verdict or
54
+ * `null` (abstain). */
55
+ type Rung3Resolver = (text: string, scoped: readonly LanguageProfile[]) => RungVerdict | null;
56
+ /**
57
+ * Classify `text` among `candidates`. Synchronous and allocation-light. Returns
58
+ * `"unknown"` on empty evidence, on a tie inside the candidate set, or when
59
+ * nothing is distinctive.
60
+ */
61
+ declare function classifyBySnippet(text: string, candidates: readonly LanguageProfile[], rung3?: Rung3Resolver): SnippetVerdict;
62
+
63
+ export { FRANC_RUNG, type Rung, type Rung3Resolver, type RungVerdict, type SnippetVerdict, classifyBySnippet };
@@ -0,0 +1,3 @@
1
+ export { FRANC_RUNG, classifyBySnippet } from './chunk-NCGZPEDA.js';
2
+ //# sourceMappingURL=classify.js.map
3
+ //# sourceMappingURL=classify.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"classify.js"}
package/dist/franc.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { L as LanguageProfile, S as SyncSource, a as LanguageEvidence } from './types-D4Ux-xA6.js';
1
+ import { L as LanguageProfile, S as SyncSource, a as LanguageEvidence } from './types-BIXrkuAr.js';
2
2
 
3
3
  /**
4
4
  * Producer: the franc trigram backstop over `text`, scoped to `candidates`.
package/dist/franc.js CHANGED
@@ -1,4 +1,4 @@
1
- import { scopeCandidates } from './chunk-RFR5I7P7.js';
1
+ import { scopeCandidates } from './chunk-NCGZPEDA.js';
2
2
  import { francAll } from 'franc';
3
3
 
4
4
  var RUNG_MIN_LENGTH = 24;
package/dist/fuse.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { W as Weights, L as LanguageProfile, a as LanguageEvidence, C as Classification } from './types-D4Ux-xA6.js';
1
+ import { W as Weights, L as LanguageProfile, N as NonDiscriminatingScript, a as LanguageEvidence, C as Classification } from './types-BIXrkuAr.js';
2
2
 
3
3
  interface FuseOptions {
4
4
  weights?: Weights;
@@ -6,6 +6,12 @@ interface FuseOptions {
6
6
  * into it (`uk-UA` → `uk`, `ua` → `uk`) so context signals (page/header
7
7
  * locale) land on the same code the text rungs use. */
8
8
  candidates?: readonly LanguageProfile[];
9
+ /** How to resolve a *non-discriminating* script read (one flagged
10
+ * `discriminating: false` — its winning script owned by ≤1 roster candidate).
11
+ * Default `"candidate"` keeps current behavior; `"unknown"` drops such a read
12
+ * unless non-script evidence corroborates the same language. See
13
+ * {@link NonDiscriminatingScript}. */
14
+ nonDiscriminatingScript?: NonDiscriminatingScript;
9
15
  }
10
16
  /**
11
17
  * Combine evidence into a single weighted verdict with an audit trail.
package/dist/fuse.js CHANGED
@@ -1,4 +1,4 @@
1
- export { fuse } from './chunk-TYSRYQN7.js';
1
+ export { fuse } from './chunk-7G3MEXWK.js';
2
2
  import './chunk-OVSPOZ5J.js';
3
3
  //# sourceMappingURL=fuse.js.map
4
4
  //# sourceMappingURL=fuse.js.map
package/dist/headers.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { g as HeaderBag, a as LanguageEvidence } from './types-D4Ux-xA6.js';
1
+ import { H as HeaderBag, a as LanguageEvidence } from './types-BIXrkuAr.js';
2
2
 
3
3
  /** Producer: the HTTP `Content-Language` response header. */
4
4
  declare function evidenceFromHeaders(headers: HeaderBag | undefined): LanguageEvidence[];
package/dist/html.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { a as LanguageEvidence } from './types-D4Ux-xA6.js';
1
+ import { a as LanguageEvidence } from './types-BIXrkuAr.js';
2
2
 
3
3
  /**
4
4
  * Producer: language clues from an HTML string's metadata.
package/dist/index.d.ts CHANGED
@@ -1,9 +1,10 @@
1
- import { E as EvidenceSource, D as DetectorConfig, b as DetectFn } from './types-D4Ux-xA6.js';
2
- export { A as AsyncSource, C as Classification, c as DetectContext, d as DetectInput, e as EarlyExit, f as EvidenceKind, H as HasAsync, g as HeaderBag, h as LanguageCode, a as LanguageEvidence, L as LanguageProfile, i as SourceInput, S as SyncSource, W as Weights } from './types-D4Ux-xA6.js';
1
+ import { E as EvidenceSource, D as DetectorConfig, c as DetectFn } from './types-BIXrkuAr.js';
2
+ export { A as AsyncSource, C as Classification, d as DetectContext, e as DetectInput, f as EarlyExit, g as EvidenceKind, h as HasAsync, H as HeaderBag, b as LanguageCode, a as LanguageEvidence, L as LanguageProfile, N as NonDiscriminatingScript, i as SourceInput, S as SyncSource, W as Weights } from './types-BIXrkuAr.js';
3
3
  export { FuseOptions, fuse } from './fuse.js';
4
4
  export { evidenceFromText } from './text.js';
5
5
  export { evidenceFromHtml } from './html.js';
6
6
  export { evidenceFromHeaders } from './headers.js';
7
+ import './classify.js';
7
8
 
8
9
  /**
9
10
  * Build a configured detector. Does the per-roster setup once and returns a
package/dist/index.js CHANGED
@@ -1,13 +1,13 @@
1
- import { evidenceFromText } from './chunk-3SO2WI75.js';
2
- export { evidenceFromText } from './chunk-3SO2WI75.js';
1
+ import { evidenceFromText } from './chunk-PT7R2BRQ.js';
2
+ export { evidenceFromText } from './chunk-PT7R2BRQ.js';
3
3
  import { evidenceFromHtml } from './chunk-KI4MAI3N.js';
4
4
  export { evidenceFromHtml } from './chunk-KI4MAI3N.js';
5
5
  import { evidenceFromHeaders } from './chunk-3LDE35U2.js';
6
6
  export { evidenceFromHeaders } from './chunk-3LDE35U2.js';
7
- import { fuse } from './chunk-TYSRYQN7.js';
8
- export { fuse } from './chunk-TYSRYQN7.js';
7
+ import { fuse } from './chunk-7G3MEXWK.js';
8
+ export { fuse } from './chunk-7G3MEXWK.js';
9
9
  export { normalizeBCP47, normalizeLanguageCode, primarySubtag } from './chunk-OVSPOZ5J.js';
10
- import './chunk-RFR5I7P7.js';
10
+ import './chunk-NCGZPEDA.js';
11
11
 
12
12
  // src/compile.ts
13
13
  function builtIns(candidates) {
@@ -33,15 +33,18 @@ function applicable(source, input) {
33
33
  function compile(config = {}) {
34
34
  const sources = [...builtIns(config.candidates), ...config.engines ?? []];
35
35
  const hasAsync = sources.some((source) => !source.sync);
36
- const weights = config.weights;
37
- const candidates = config.candidates;
36
+ const fuseOptions = {
37
+ weights: config.weights,
38
+ candidates: config.candidates,
39
+ nonDiscriminatingScript: config.nonDiscriminatingScript
40
+ };
38
41
  if (!hasAsync) {
39
42
  const detect2 = (input) => {
40
43
  const evidence = [];
41
44
  for (const source of sources) {
42
45
  if (source.sync && applicable(source, input)) evidence.push(...source.detect(input));
43
46
  }
44
- return fuse(evidence, { weights, candidates });
47
+ return fuse(evidence, fuseOptions);
45
48
  };
46
49
  return detect2;
47
50
  }
@@ -54,7 +57,7 @@ function compile(config = {}) {
54
57
  else pending.push(Promise.resolve(source.detect(input, ctx)).catch(() => []));
55
58
  }
56
59
  for (const batch of await Promise.all(pending)) evidence.push(...batch);
57
- return fuse(evidence, { weights });
60
+ return fuse(evidence, fuseOptions);
58
61
  };
59
62
  return detect;
60
63
  }
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/compile.ts"],"names":["detect"],"mappings":";;;;;;;;;;;;AAmBA,SAAS,SAAS,UAAA,EAAkE;AAClF,EAAA,OAAO;AAAA,IACL;AAAA,MACE,EAAA,EAAI,MAAA;AAAA,MACJ,IAAA,EAAM,IAAA;AAAA,MACN,MAAA,EAAQ,CAAC,MAAM,CAAA;AAAA,MACf,QAAQ,CAAC,CAAA,KAAM,gBAAA,CAAiB,CAAA,CAAE,MAAM,UAAU;AAAA,KACpD;AAAA,IACA,EAAE,EAAA,EAAI,MAAA,EAAQ,IAAA,EAAM,MAAM,MAAA,EAAQ,CAAC,MAAM,CAAA,EAAG,QAAQ,CAAC,CAAA,KAAM,gBAAA,CAAiB,CAAA,CAAE,IAAI,CAAA,EAAE;AAAA,IACpF;AAAA,MACE,EAAA,EAAI,SAAA;AAAA,MACJ,IAAA,EAAM,IAAA;AAAA,MACN,MAAA,EAAQ,CAAC,SAAS,CAAA;AAAA,MAClB,MAAA,EAAQ,CAAC,CAAA,KAAM,mBAAA,CAAoB,EAAE,OAAO;AAAA;AAC9C,GACF;AACF;AAGA,SAAS,UAAA,CAAW,QAAwB,KAAA,EAA6B;AACvE,EAAA,OAAO,MAAA,CAAO,OAAO,KAAA,CAAM,CAAC,QAAQ,KAAA,CAAM,GAAG,MAAM,MAAS,CAAA;AAC9D;AAQO,SAAS,OAAA,CACd,MAAA,GAA4B,EAAC,EAChB;AACb,EAAA,MAAM,OAAA,GAA4B,CAAC,GAAG,QAAA,CAAS,MAAA,CAAO,UAAU,CAAA,EAAG,GAAI,MAAA,CAAO,OAAA,IAAW,EAAG,CAAA;AAC5F,EAAA,MAAM,WAAW,OAAA,CAAQ,IAAA,CAAK,CAAC,MAAA,KAAW,CAAC,OAAO,IAAI,CAAA;AACtD,EAAA,MAAM,UAAU,MAAA,CAAO,OAAA;AACvB,EAAA,MAAM,aAAa,MAAA,CAAO,UAAA;AAE1B,EAAA,IAAI,CAAC,QAAA,EAAU;AACb,IAAA,MAAMA,OAAAA,GAAS,CAAC,KAAA,KAAuC;AACrD,MAAA,MAAM,WAA+B,EAAC;AACtC,MAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,QAAA,IAAI,MAAA,CAAO,IAAA,IAAQ,UAAA,CAAW,MAAA,EAAQ,KAAK,CAAA,EAAG,QAAA,CAAS,IAAA,CAAK,GAAG,MAAA,CAAO,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,MACrF;AACA,MAAA,OAAO,IAAA,CAAK,QAAA,EAAU,EAAE,OAAA,EAAS,YAAY,CAAA;AAAA,IAC/C,CAAA;AACA,IAAA,OAAOA,OAAAA;AAAA,EACT;AAEA,EAAA,MAAM,MAAA,GAAS,OAAO,KAAA,EAAoB,GAAA,GAAqB,EAAC,KAA+B;AAC7F,IAAA,MAAM,WAA+B,EAAC;AACtC,IAAA,MAAM,UAAyC,EAAC;AAChD,IAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,MAAA,IAAI,CAAC,UAAA,CAAW,MAAA,EAAQ,KAAK,CAAA,EAAG;AAChC,MAAA,IAAI,MAAA,CAAO,MAAM,QAAA,CAAS,IAAA,CAAK,GAAG,MAAA,CAAO,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,WACjD,OAAA,CAAQ,IAAA,CAAK,OAAA,CAAQ,OAAA,CAAQ,OAAO,MAAA,CAAO,KAAA,EAAO,GAAG,CAAC,CAAA,CAAE,KAAA,CAAM,MAAM,EAAE,CAAC,CAAA;AAAA,IAC9E;AACA,IAAA,KAAA,MAAW,KAAA,IAAS,MAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG,QAAA,CAAS,IAAA,CAAK,GAAG,KAAK,CAAA;AACtE,IAAA,OAAO,IAAA,CAAK,QAAA,EAAU,EAAE,OAAA,EAAS,CAAA;AAAA,EACnC,CAAA;AACA,EAAA,OAAO,MAAA;AACT","file":"index.js","sourcesContent":["import { evidenceFromHeaders } from \"./headers.js\";\nimport { evidenceFromHtml } from \"./html.js\";\nimport { evidenceFromText } from \"./text.js\";\nimport { fuse } from \"./fuse.js\";\nimport type {\n Classification,\n DetectContext,\n DetectFn,\n DetectInput,\n DetectorConfig,\n EvidenceSource,\n LanguageEvidence,\n LanguageProfile,\n SyncSource,\n} from \"./types.js\";\n\n/** The always-on, zero-dependency producers. The text producer is bound to the\n * configured candidate roster so its scoring is roster-relative (and so it\n * abstains when no roster was supplied — its signals need candidates). */\nfunction builtIns(candidates: readonly LanguageProfile[] | undefined): SyncSource[] {\n return [\n {\n id: \"text\",\n sync: true,\n inputs: [\"text\"],\n detect: (i) => evidenceFromText(i.text, candidates),\n },\n { id: \"html\", sync: true, inputs: [\"html\"], detect: (i) => evidenceFromHtml(i.html) },\n {\n id: \"headers\",\n sync: true,\n inputs: [\"headers\"],\n detect: (i) => evidenceFromHeaders(i.headers),\n },\n ];\n}\n\n/** Run a source only when every input it declares is present. */\nfunction applicable(source: EvidenceSource, input: DetectInput): boolean {\n return source.inputs.every((key) => input[key] !== undefined);\n}\n\n/**\n * Build a configured detector. Does the per-roster setup once and returns a\n * `detect` function whose sync/async shape is fixed by the registered engines\n * (see {@link DetectFn}). The built-in producers are always registered; opt-in\n * engines (franc, chrome-ai) are added via `config.engines`.\n */\nexport function compile<const E extends readonly EvidenceSource[] = []>(\n config: DetectorConfig<E> = {},\n): DetectFn<E> {\n const sources: EvidenceSource[] = [...builtIns(config.candidates), ...(config.engines ?? [])];\n const hasAsync = sources.some((source) => !source.sync);\n const weights = config.weights;\n const candidates = config.candidates;\n\n if (!hasAsync) {\n const detect = (input: DetectInput): Classification => {\n const evidence: LanguageEvidence[] = [];\n for (const source of sources) {\n if (source.sync && applicable(source, input)) evidence.push(...source.detect(input));\n }\n return fuse(evidence, { weights, candidates });\n };\n return detect as DetectFn<E>;\n }\n\n const detect = async (input: DetectInput, ctx: DetectContext = {}): Promise<Classification> => {\n const evidence: LanguageEvidence[] = [];\n const pending: Promise<LanguageEvidence[]>[] = [];\n for (const source of sources) {\n if (!applicable(source, input)) continue;\n if (source.sync) evidence.push(...source.detect(input));\n else pending.push(Promise.resolve(source.detect(input, ctx)).catch(() => []));\n }\n for (const batch of await Promise.all(pending)) evidence.push(...batch);\n return fuse(evidence, { weights });\n };\n return detect as DetectFn<E>;\n}\n"]}
1
+ {"version":3,"sources":["../src/compile.ts"],"names":["detect"],"mappings":";;;;;;;;;;;;AAmBA,SAAS,SAAS,UAAA,EAAkE;AAClF,EAAA,OAAO;AAAA,IACL;AAAA,MACE,EAAA,EAAI,MAAA;AAAA,MACJ,IAAA,EAAM,IAAA;AAAA,MACN,MAAA,EAAQ,CAAC,MAAM,CAAA;AAAA,MACf,QAAQ,CAAC,CAAA,KAAM,gBAAA,CAAiB,CAAA,CAAE,MAAM,UAAU;AAAA,KACpD;AAAA,IACA,EAAE,EAAA,EAAI,MAAA,EAAQ,IAAA,EAAM,MAAM,MAAA,EAAQ,CAAC,MAAM,CAAA,EAAG,QAAQ,CAAC,CAAA,KAAM,gBAAA,CAAiB,CAAA,CAAE,IAAI,CAAA,EAAE;AAAA,IACpF;AAAA,MACE,EAAA,EAAI,SAAA;AAAA,MACJ,IAAA,EAAM,IAAA;AAAA,MACN,MAAA,EAAQ,CAAC,SAAS,CAAA;AAAA,MAClB,MAAA,EAAQ,CAAC,CAAA,KAAM,mBAAA,CAAoB,EAAE,OAAO;AAAA;AAC9C,GACF;AACF;AAGA,SAAS,UAAA,CAAW,QAAwB,KAAA,EAA6B;AACvE,EAAA,OAAO,MAAA,CAAO,OAAO,KAAA,CAAM,CAAC,QAAQ,KAAA,CAAM,GAAG,MAAM,MAAS,CAAA;AAC9D;AAQO,SAAS,OAAA,CACd,MAAA,GAA4B,EAAC,EAChB;AACb,EAAA,MAAM,OAAA,GAA4B,CAAC,GAAG,QAAA,CAAS,MAAA,CAAO,UAAU,CAAA,EAAG,GAAI,MAAA,CAAO,OAAA,IAAW,EAAG,CAAA;AAC5F,EAAA,MAAM,WAAW,OAAA,CAAQ,IAAA,CAAK,CAAC,MAAA,KAAW,CAAC,OAAO,IAAI,CAAA;AACtD,EAAA,MAAM,WAAA,GAA2B;AAAA,IAC/B,SAAS,MAAA,CAAO,OAAA;AAAA,IAChB,YAAY,MAAA,CAAO,UAAA;AAAA,IACnB,yBAAyB,MAAA,CAAO;AAAA,GAClC;AAEA,EAAA,IAAI,CAAC,QAAA,EAAU;AACb,IAAA,MAAMA,OAAAA,GAAS,CAAC,KAAA,KAAuC;AACrD,MAAA,MAAM,WAA+B,EAAC;AACtC,MAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,QAAA,IAAI,MAAA,CAAO,IAAA,IAAQ,UAAA,CAAW,MAAA,EAAQ,KAAK,CAAA,EAAG,QAAA,CAAS,IAAA,CAAK,GAAG,MAAA,CAAO,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,MACrF;AACA,MAAA,OAAO,IAAA,CAAK,UAAU,WAAW,CAAA;AAAA,IACnC,CAAA;AACA,IAAA,OAAOA,OAAAA;AAAA,EACT;AAEA,EAAA,MAAM,MAAA,GAAS,OAAO,KAAA,EAAoB,GAAA,GAAqB,EAAC,KAA+B;AAC7F,IAAA,MAAM,WAA+B,EAAC;AACtC,IAAA,MAAM,UAAyC,EAAC;AAChD,IAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,MAAA,IAAI,CAAC,UAAA,CAAW,MAAA,EAAQ,KAAK,CAAA,EAAG;AAChC,MAAA,IAAI,MAAA,CAAO,MAAM,QAAA,CAAS,IAAA,CAAK,GAAG,MAAA,CAAO,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,WACjD,OAAA,CAAQ,IAAA,CAAK,OAAA,CAAQ,OAAA,CAAQ,OAAO,MAAA,CAAO,KAAA,EAAO,GAAG,CAAC,CAAA,CAAE,KAAA,CAAM,MAAM,EAAE,CAAC,CAAA;AAAA,IAC9E;AACA,IAAA,KAAA,MAAW,KAAA,IAAS,MAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG,QAAA,CAAS,IAAA,CAAK,GAAG,KAAK,CAAA;AACtE,IAAA,OAAO,IAAA,CAAK,UAAU,WAAW,CAAA;AAAA,EACnC,CAAA;AACA,EAAA,OAAO,MAAA;AACT","file":"index.js","sourcesContent":["import { evidenceFromHeaders } from \"./headers.js\";\nimport { evidenceFromHtml } from \"./html.js\";\nimport { evidenceFromText } from \"./text.js\";\nimport { fuse, type FuseOptions } from \"./fuse.js\";\nimport type {\n Classification,\n DetectContext,\n DetectFn,\n DetectInput,\n DetectorConfig,\n EvidenceSource,\n LanguageEvidence,\n LanguageProfile,\n SyncSource,\n} from \"./types.js\";\n\n/** The always-on, zero-dependency producers. The text producer is bound to the\n * configured candidate roster so its scoring is roster-relative (and so it\n * abstains when no roster was supplied — its signals need candidates). */\nfunction builtIns(candidates: readonly LanguageProfile[] | undefined): SyncSource[] {\n return [\n {\n id: \"text\",\n sync: true,\n inputs: [\"text\"],\n detect: (i) => evidenceFromText(i.text, candidates),\n },\n { id: \"html\", sync: true, inputs: [\"html\"], detect: (i) => evidenceFromHtml(i.html) },\n {\n id: \"headers\",\n sync: true,\n inputs: [\"headers\"],\n detect: (i) => evidenceFromHeaders(i.headers),\n },\n ];\n}\n\n/** Run a source only when every input it declares is present. */\nfunction applicable(source: EvidenceSource, input: DetectInput): boolean {\n return source.inputs.every((key) => input[key] !== undefined);\n}\n\n/**\n * Build a configured detector. Does the per-roster setup once and returns a\n * `detect` function whose sync/async shape is fixed by the registered engines\n * (see {@link DetectFn}). The built-in producers are always registered; opt-in\n * engines (franc, chrome-ai) are added via `config.engines`.\n */\nexport function compile<const E extends readonly EvidenceSource[] = []>(\n config: DetectorConfig<E> = {},\n): DetectFn<E> {\n const sources: EvidenceSource[] = [...builtIns(config.candidates), ...(config.engines ?? [])];\n const hasAsync = sources.some((source) => !source.sync);\n const fuseOptions: FuseOptions = {\n weights: config.weights,\n candidates: config.candidates,\n nonDiscriminatingScript: config.nonDiscriminatingScript,\n };\n\n if (!hasAsync) {\n const detect = (input: DetectInput): Classification => {\n const evidence: LanguageEvidence[] = [];\n for (const source of sources) {\n if (source.sync && applicable(source, input)) evidence.push(...source.detect(input));\n }\n return fuse(evidence, fuseOptions);\n };\n return detect as DetectFn<E>;\n }\n\n const detect = async (input: DetectInput, ctx: DetectContext = {}): Promise<Classification> => {\n const evidence: LanguageEvidence[] = [];\n const pending: Promise<LanguageEvidence[]>[] = [];\n for (const source of sources) {\n if (!applicable(source, input)) continue;\n if (source.sync) evidence.push(...source.detect(input));\n else pending.push(Promise.resolve(source.detect(input, ctx)).catch(() => []));\n }\n for (const batch of await Promise.all(pending)) evidence.push(...batch);\n return fuse(evidence, fuseOptions);\n };\n return detect as DetectFn<E>;\n}\n"]}
@@ -1,4 +1,4 @@
1
- import { h as LanguageCode, L as LanguageProfile } from './types-D4Ux-xA6.js';
1
+ import { b as LanguageCode, L as LanguageProfile } from './types-BIXrkuAr.js';
2
2
 
3
3
  /**
4
4
  * `langtell/profiles` — ready-to-use {@link LanguageProfile} data.
@@ -33,7 +33,15 @@ declare const en: LanguageProfile;
33
33
 
34
34
  /** Registry of shipped profiles, keyed by BCP-47 code. */
35
35
  declare const PROFILES: Readonly<Record<LanguageCode, LanguageProfile>>;
36
+ /** BCP-47 codes for which langtell ships a ready-made {@link LanguageProfile}.
37
+ * Handy for narrowing a caller's roster to codes that can actually classify —
38
+ * e.g. `codes.filter(hasProfile)`. Derived from {@link PROFILES}. */
39
+ declare const PROFILED_CODES: readonly LanguageCode[];
40
+ /** Whether langtell ships a ready-made {@link LanguageProfile} for `code`. An
41
+ * own-property check, so inherited names (`"toString"`, `"constructor"`) read
42
+ * as absent. */
43
+ declare function hasProfile(code: LanguageCode): boolean;
36
44
  /** Resolve profiles for the given codes, skipping any without a shipped profile. */
37
45
  declare function getProfiles(codes: readonly LanguageCode[]): LanguageProfile[];
38
46
 
39
- export { PROFILES, be, bg, en, getProfiles, ru, uk };
47
+ export { PROFILED_CODES, PROFILES, be, bg, en, getProfiles, hasProfile, ru, uk };
package/dist/profiles.js CHANGED
@@ -1017,10 +1017,14 @@ var en = {
1017
1017
  }
1018
1018
  };
1019
1019
  var PROFILES = { uk, ru, be, bg, en };
1020
+ var PROFILED_CODES = Object.keys(PROFILES);
1021
+ function hasProfile(code) {
1022
+ return Object.prototype.hasOwnProperty.call(PROFILES, code);
1023
+ }
1020
1024
  function getProfiles(codes) {
1021
1025
  return codes.map((c) => PROFILES[c]).filter((p) => p !== void 0);
1022
1026
  }
1023
1027
 
1024
- export { PROFILES, be, bg, en, getProfiles, ru, uk };
1028
+ export { PROFILED_CODES, PROFILES, be, bg, en, getProfiles, hasProfile, ru, uk };
1025
1029
  //# sourceMappingURL=profiles.js.map
1026
1030
  //# sourceMappingURL=profiles.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/internal/frequent.ts","../src/profiles.ts"],"names":[],"mappings":";AAOO,IAAM,kBAAA,GAAkE;AAAA,EAC7E,EAAA,EAAI;AAAA,IACF,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,8DAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA;AAAA,GACF;AAAA,EACA,EAAA,EAAI;AAAA,IACF,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,8DAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,8DAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,8DAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gFAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,wDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,8DAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA;AAAA,GACF;AAAA,EACA,IAAI;AACN,CAAA;;;ACjxBA,IAAM,WAAA,GAAiC;AAAA,EACrC,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,8DAAA;AAAA,EACA,gCAAA;AAAA,EACA;AACF,CAAA;AAKA,IAAM,WAAA,GAAiC;AAAA,EACrC,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,0BAAA;AAAA,EACA,4CAAA;AAAA,EACA,0BAAA;AAAA,EACA,gCAAA;AAAA,EACA,0BAAA;AAAA,EACA,sCAAA;AAAA,EACA,kDAAA;AAAA,EACA,wDAAA;AAAA,EACA,kDAAA;AAAA,EACA;AACF,CAAA;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,wMAAA;AAAA;AAAA;AAAA,EAGV,KAAA,EAAO,eAAA;AAAA,EACP,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,QAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,sCAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,wMAAA;AAAA,EACV,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,sCAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,kMAAA;AAAA;AAAA,EAEV,KAAA,EAAO,eAAA;AAAA,EACP,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,sCAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,4CAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAA;AAE1C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA;AAAA;AAAA,EAIT,QAAA,EAAU,sLAAA;AAAA,EACV,KAAA,EAAO;AAAA;AAAA,IAEL,QAAA,EAAU;AAAA,MACR,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,0BAAA;AAAA,MACA,0BAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,sCAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAA;AAE1C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA,EACT,QAAA,EAAU,4BAAA;AAAA,EACV,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,KAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,MAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,MAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,OAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,OAAA;AAAA,MACA,GAAA;AAAA,MACA,IAAA;AAAA,MACA,GAAA;AAAA,MACA,IAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAKO,IAAM,WAA4D,EAAE,EAAA,EAAI,EAAA,EAAI,EAAA,EAAI,IAAI,EAAA;AAGpF,SAAS,YAAY,KAAA,EAAmD;AAC7E,EAAA,OAAO,KAAA,CAAM,GAAA,CAAI,CAAC,CAAA,KAAM,QAAA,CAAS,CAAC,CAAC,CAAA,CAAE,MAAA,CAAO,CAAC,CAAA,KAA4B,CAAA,KAAM,MAAS,CAAA;AAC1F","file":"profiles.js","sourcesContent":["// Generated frequency lists — the top everyday words for each language,\n// taken from an open subtitle-frequency corpus (general vocabulary; no\n// project-specific terms). Kept out of the zero-dependency core: this data\n// lives behind the `langtell/profiles` subpath only.\n//\n// Rung 2b of the classifier (internal/classify.ts) reads these. They sharpen\n// distinctive-free disambiguation (e.g. uk \"робота\" vs ru \"работа\").\nexport const FREQUENT_GENERATED: Readonly<Record<string, readonly string[]>> = {\n uk: [\n \"не\",\n \"що\",\n \"на\",\n \"це\",\n \"ти\",\n \"что\",\n \"так\",\n \"ты\",\n \"это\",\n \"ми\",\n \"все\",\n \"тебе\",\n \"він\",\n \"як\",\n \"за\",\n \"ви\",\n \"до\",\n \"мене\",\n \"он\",\n \"мені\",\n \"ні\",\n \"але\",\n \"мы\",\n \"мне\",\n \"да\",\n \"как\",\n \"просто\",\n \"меня\",\n \"нет\",\n \"тут\",\n \"для\",\n \"його\",\n \"вы\",\n \"но\",\n \"то\",\n \"ну\",\n \"знаю\",\n \"про\",\n \"нас\",\n \"вас\",\n \"вони\",\n \"тебя\",\n \"вона\",\n \"она\",\n \"тобі\",\n \"его\",\n \"там\",\n \"якщо\",\n \"ще\",\n \"та\",\n \"добре\",\n \"щоб\",\n \"вам\",\n \"по\",\n \"нам\",\n \"хочу\",\n \"коли\",\n \"если\",\n \"бы\",\n \"же\",\n \"вже\",\n \"було\",\n \"був\",\n \"они\",\n \"може\",\n \"чи\",\n \"чтобы\",\n \"дуже\",\n \"від\",\n \"хорошо\",\n \"чому\",\n \"буде\",\n \"хто\",\n \"есть\",\n \"всё\",\n \"давай\",\n \"тому\",\n \"де\",\n \"здесь\",\n \"треба\",\n \"от\",\n \"думаю\",\n \"когда\",\n \"из\",\n \"того\",\n \"щось\",\n \"гаразд\",\n \"был\",\n \"цього\",\n \"только\",\n \"зараз\",\n \"было\",\n \"может\",\n \"нічого\",\n \"моя\",\n \"тільки\",\n \"те\",\n \"бути\",\n \"один\",\n \"уже\",\n \"можу\",\n \"тепер\",\n \"мій\",\n \"быть\",\n \"ось\",\n \"почему\",\n \"дякую\",\n \"раз\",\n \"вот\",\n \"могу\",\n \"через\",\n \"нужно\",\n \"всі\",\n \"бо\",\n \"кто\",\n \"будет\",\n \"правда\",\n \"без\",\n \"себе\",\n \"більше\",\n \"тоді\",\n \"час\",\n \"день\",\n \"ладно\",\n \"сейчас\",\n \"можна\",\n \"була\",\n \"их\",\n \"боже\",\n \"еще\",\n \"спасибо\",\n \"или\",\n \"будь\",\n \"йому\",\n \"где\",\n \"лише\",\n \"очень\",\n \"была\",\n \"ним\",\n \"привіт\",\n \"них\",\n \"чем\",\n \"навіть\",\n \"ніколи\",\n \"знаешь\",\n \"этого\",\n \"буду\",\n \"би\",\n \"её\",\n \"зі\",\n \"ее\",\n \"мой\",\n \"ничего\",\n \"хочеш\",\n \"нього\",\n \"цей\",\n \"чого\",\n \"життя\",\n \"этом\",\n \"теж\",\n \"люди\",\n \"твоя\",\n \"со\",\n \"потому\",\n \"сказав\",\n \"больше\",\n \"теперь\",\n \"ещё\",\n \"людей\",\n \"або\",\n \"свою\",\n \"привет\",\n \"який\",\n \"сюди\",\n \"таке\",\n \"даже\",\n \"із\",\n \"об\",\n \"тобою\",\n \"були\",\n \"два\",\n \"краще\",\n \"пока\",\n \"ему\",\n \"завжди\",\n \"хочешь\",\n \"должен\",\n \"багато\",\n \"сьогодні\",\n \"ей\",\n \"точно\",\n \"ласка\",\n \"время\",\n \"эй\",\n \"той\",\n \"себя\",\n \"надо\",\n \"сам\",\n \"мною\",\n \"тоже\",\n \"этот\",\n \"прошу\",\n \"маю\",\n \"сказал\",\n \"мама\",\n \"ли\",\n \"зробити\",\n \"сказати\",\n \"сказала\",\n \"назад\",\n \"які\",\n \"можеш\",\n \"тобой\",\n \"трохи\",\n \"потрібно\",\n \"ці\",\n \"тогда\",\n \"порядке\",\n \"ага\",\n \"саме\",\n \"хотів\",\n \"можешь\",\n \"під\",\n \"три\",\n \"никогда\",\n \"можете\",\n \"років\",\n \"собі\",\n \"скажи\",\n \"него\",\n \"пане\",\n \"робити\",\n \"знову\",\n \"твій\",\n \"люблю\",\n \"пожалуйста\",\n \"наш\",\n \"ни\",\n \"сегодня\",\n \"мной\",\n \"потім\",\n \"ніж\",\n \"были\",\n \"гей\",\n \"дай\",\n \"перед\",\n \"завтра\",\n \"вами\",\n \"яка\",\n \"одного\",\n \"ваша\",\n \"конечно\",\n \"сделать\",\n \"лучше\",\n \"цьому\",\n \"ваш\",\n \"цю\",\n \"сказать\",\n \"пам\",\n \"ніхто\",\n \"сюда\",\n \"нами\",\n \"можливо\",\n \"після\",\n \"значит\",\n \"прямо\",\n \"скільки\",\n \"одна\",\n \"чудово\",\n \"давайте\",\n \"бачив\",\n \"такий\",\n \"кого\",\n \"хтось\",\n \"сер\",\n \"ця\",\n \"мою\",\n \"усе\",\n \"разом\",\n \"чего\",\n \"куди\",\n \"звичайно\",\n \"справді\",\n \"поки\",\n \"должны\",\n \"всегда\",\n \"делать\",\n \"дело\",\n \"гроші\",\n \"цим\",\n \"міг\",\n \"том\",\n \"хіба\",\n \"ней\",\n \"твой\",\n \"этим\",\n \"повинен\",\n \"друг\",\n \"можно\",\n \"дня\",\n \"досить\",\n \"можемо\",\n \"зробив\",\n \"много\",\n \"хотел\",\n \"над\",\n \"должна\",\n \"после\",\n \"хоче\",\n \"якби\",\n \"могли\",\n \"місце\",\n \"всего\",\n \"воно\",\n \"часу\",\n \"робиш\",\n \"хлопці\",\n \"вибачте\",\n \"додому\",\n \"весь\",\n \"руки\",\n \"ведь\",\n \"ясно\",\n \"возможно\",\n \"вибач\",\n \"такой\",\n \"доктор\",\n \"повинні\",\n \"эти\",\n \"говорить\",\n \"мого\",\n \"нема\",\n \"нб\",\n \"йди\",\n \"думаешь\",\n \"моей\",\n \"немного\",\n \"навіщо\",\n \"господи\",\n \"мабуть\",\n \"жизнь\",\n \"твою\",\n \"зачем\",\n \"чоловік\",\n \"ходімо\",\n \"лет\",\n \"такое\",\n \"случилось\",\n \"звідси\",\n \"слишком\",\n \"мати\",\n \"скажу\",\n \"скоро\",\n \"сталося\",\n \"прости\",\n \"нормально\",\n \"пані\",\n \"всіх\",\n \"також\",\n \"вместе\",\n \"кілька\",\n \"брат\",\n \"чорт\",\n \"сама\",\n \"во\",\n \"потом\",\n \"такого\",\n \"казав\",\n \"розумію\",\n \"батько\",\n \"отже\",\n \"сделал\",\n \"таким\",\n \"могла\",\n \"тим\",\n \"швидше\",\n \"жаль\",\n \"туди\",\n \"речі\",\n \"знати\",\n \"поэтому\",\n \"мог\",\n \"хочете\",\n \"помочь\",\n \"одну\",\n \"всех\",\n \"который\",\n \"им\",\n \"ім\",\n \"думав\",\n \"можем\",\n \"под\",\n \"допомогти\",\n \"людина\",\n \"куда\",\n \"яку\",\n \"правильно\",\n \"давно\",\n \"зовсім\",\n \"при\",\n ],\n ru: [\n \"не\",\n \"что\",\n \"ты\",\n \"это\",\n \"на\",\n \"он\",\n \"мы\",\n \"как\",\n \"вы\",\n \"да\",\n \"мне\",\n \"нет\",\n \"меня\",\n \"так\",\n \"но\",\n \"его\",\n \"все\",\n \"она\",\n \"тебя\",\n \"если\",\n \"за\",\n \"бы\",\n \"тебе\",\n \"они\",\n \"чтобы\",\n \"же\",\n \"есть\",\n \"просто\",\n \"из\",\n \"для\",\n \"ну\",\n \"когда\",\n \"хорошо\",\n \"здесь\",\n \"по\",\n \"был\",\n \"знаю\",\n \"то\",\n \"только\",\n \"всё\",\n \"было\",\n \"вас\",\n \"может\",\n \"нас\",\n \"вот\",\n \"от\",\n \"быть\",\n \"кто\",\n \"будет\",\n \"почему\",\n \"вам\",\n \"их\",\n \"очень\",\n \"могу\",\n \"уже\",\n \"спасибо\",\n \"или\",\n \"нам\",\n \"еще\",\n \"там\",\n \"нужно\",\n \"сейчас\",\n \"где\",\n \"чем\",\n \"хочу\",\n \"ничего\",\n \"этого\",\n \"была\",\n \"мой\",\n \"ее\",\n \"ладно\",\n \"знаешь\",\n \"до\",\n \"этом\",\n \"потому\",\n \"теперь\",\n \"думаю\",\n \"больше\",\n \"её\",\n \"со\",\n \"раз\",\n \"ему\",\n \"надо\",\n \"время\",\n \"этот\",\n \"ли\",\n \"ещё\",\n \"пока\",\n \"даже\",\n \"привет\",\n \"сказал\",\n \"себя\",\n \"должен\",\n \"тоже\",\n \"хочешь\",\n \"давай\",\n \"никогда\",\n \"эй\",\n \"того\",\n \"тогда\",\n \"него\",\n \"ни\",\n \"тут\",\n \"были\",\n \"конечно\",\n \"правда\",\n \"об\",\n \"моя\",\n \"пожалуйста\",\n \"тобой\",\n \"сказать\",\n \"сегодня\",\n \"один\",\n \"лучше\",\n \"можешь\",\n \"сюда\",\n \"мной\",\n \"значит\",\n \"сделать\",\n \"всегда\",\n \"дело\",\n \"можно\",\n \"ей\",\n \"должны\",\n \"порядке\",\n \"без\",\n \"день\",\n \"том\",\n \"буду\",\n \"делать\",\n \"хотел\",\n \"чего\",\n \"эти\",\n \"много\",\n \"после\",\n \"этим\",\n \"всего\",\n \"во\",\n \"твой\",\n \"ним\",\n \"лет\",\n \"боже\",\n \"них\",\n \"сэр\",\n \"ведь\",\n \"мистер\",\n \"жизнь\",\n \"потом\",\n \"ней\",\n \"такой\",\n \"который\",\n \"всех\",\n \"через\",\n \"им\",\n \"возможно\",\n \"немного\",\n \"такое\",\n \"слишком\",\n \"себе\",\n \"зачем\",\n \"должна\",\n \"моей\",\n \"люди\",\n \"знаете\",\n \"этой\",\n \"думаешь\",\n \"свою\",\n \"точно\",\n \"человек\",\n \"твоя\",\n \"под\",\n \"сказала\",\n \"назад\",\n \"эту\",\n \"можем\",\n \"случилось\",\n \"мама\",\n \"мог\",\n \"вместе\",\n \"отец\",\n \"сделал\",\n \"мои\",\n \"кажется\",\n \"друг\",\n \"куда\",\n \"никто\",\n \"поэтому\",\n \"эта\",\n \"которые\",\n \"два\",\n \"тот\",\n \"сколько\",\n \"понимаю\",\n \"снова\",\n \"жизни\",\n \"нравится\",\n \"людей\",\n \"помочь\",\n \"видел\",\n \"люблю\",\n \"хочет\",\n \"место\",\n \"знать\",\n \"прости\",\n \"отлично\",\n \"похоже\",\n \"парень\",\n \"домой\",\n \"всем\",\n \"деньги\",\n \"иди\",\n \"времени\",\n \"дома\",\n \"именно\",\n \"доктор\",\n \"думал\",\n \"говорил\",\n \"делаешь\",\n \"будем\",\n \"прямо\",\n \"стоит\",\n \"поговорить\",\n \"найти\",\n \"разве\",\n \"слушай\",\n \"своей\",\n \"лишь\",\n \"ага\",\n \"можете\",\n \"простите\",\n \"хотела\",\n \"сам\",\n \"тем\",\n \"будешь\",\n \"прошу\",\n \"три\",\n \"деле\",\n \"хотите\",\n \"говорить\",\n \"давайте\",\n \"совсем\",\n \"знал\",\n \"знает\",\n \"какой\",\n \"моего\",\n \"скажи\",\n \"дом\",\n \"дела\",\n \"вами\",\n \"свои\",\n \"говорит\",\n \"несколько\",\n \"должно\",\n \"про\",\n \"ваш\",\n \"происходит\",\n \"жаль\",\n \"туда\",\n \"действительно\",\n \"папа\",\n \"завтра\",\n \"оно\",\n \"черт\",\n \"одна\",\n \"перед\",\n \"наш\",\n \"уверен\",\n \"отсюда\",\n \"нужна\",\n \"самом\",\n \"тех\",\n \"нужен\",\n \"свой\",\n \"мою\",\n \"кого\",\n \"верно\",\n \"работу\",\n \"каждый\",\n \"твоей\",\n \"будут\",\n \"хватит\",\n \"понял\",\n \"нее\",\n \"уж\",\n \"имя\",\n \"против\",\n \"пор\",\n \"чём\",\n \"раньше\",\n \"говорю\",\n \"более\",\n \"надеюсь\",\n \"итак\",\n \"при\",\n \"ваша\",\n \"вообще\",\n \"пошли\",\n \"мать\",\n \"нельзя\",\n \"наверное\",\n \"нами\",\n \"твои\",\n \"могли\",\n \"дай\",\n \"ради\",\n \"всю\",\n \"ребята\",\n \"ко\",\n \"хотя\",\n \"понимаешь\",\n \"идти\",\n \"этих\",\n \"откуда\",\n \"такая\",\n \"ясно\",\n \"другой\",\n \"извините\",\n \"вижу\",\n \"видеть\",\n \"над\",\n \"могут\",\n \"равно\",\n \"мисс\",\n \"скоро\",\n \"будто\",\n \"зовут\",\n \"виду\",\n \"наши\",\n \"думала\",\n \"послушай\",\n \"между\",\n \"своего\",\n \"вопрос\",\n \"этому\",\n \"почти\",\n \"года\",\n \"человека\",\n \"которая\",\n \"подожди\",\n \"руки\",\n \"нормально\",\n \"такие\",\n \"возьми\",\n \"минут\",\n \"извини\",\n \"вещи\",\n \"могла\",\n \"смотри\",\n \"хоть\",\n \"работа\",\n \"пару\",\n \"сын\",\n \"ваше\",\n \"дня\",\n \"пора\",\n \"неё\",\n \"жить\",\n \"видишь\",\n \"достаточно\",\n \"господи\",\n \"быстро\",\n \"твою\",\n \"весь\",\n \"убить\",\n \"ночь\",\n \"говоришь\",\n \"собой\",\n \"скажу\",\n \"готов\",\n \"слышал\",\n \"какая\",\n \"посмотри\",\n \"первый\",\n \"самое\",\n \"видела\",\n \"пусть\",\n \"месте\",\n \"нашли\",\n \"сказали\",\n \"плохо\",\n \"смогу\",\n \"ваши\",\n \"отца\",\n \"детей\",\n \"знаем\",\n \"рад\",\n \"прав\",\n \"никаких\",\n \"имею\",\n \"миссис\",\n \"иногда\",\n \"смерти\",\n \"своих\",\n \"пойду\",\n \"брат\",\n \"вроде\",\n \"рядом\",\n \"мир\",\n \"произошло\",\n \"которую\",\n ],\n en: [],\n};\n","/**\n * `langtell/profiles` — ready-to-use {@link LanguageProfile} data.\n *\n * This is the heavy DATA half of the library: alphabets, curated function-word\n * lists, and corpus-frequent word lists. It is deliberately kept behind its own\n * subpath, OUT of the zero-dependency core, so `import { compile } from\n * \"langtell\"` never drags the word corpora into a bundle that only needs the\n * script/letter rungs. Pass these into `compile({ candidates: [...] })`.\n *\n * Each profile is declarative and auditable:\n * - `alphabet` — the language's lowercased alphabet (raw; distinctiveness\n * is computed at runtime per candidate set).\n * - `marks` — orthographic marks that count as rung-1 evidence but\n * are not alphabet letters (the intra-word apostrophe).\n * - `words.function` — curated grammatical markers, hand-verified.\n * - `words.frequent` — common everyday words from a subtitle-frequency corpus.\n * - `iso6393` — ISO 639-3 code for the optional franc engine.\n *\n * Curation rule for `function`: a token may appear in exactly one candidate's\n * list ONLY if that form is genuinely used by only that language among those we\n * support. Shared forms must be in every list that uses them (set-difference\n * then cancels them) or omitted from all. When in doubt, omit: a missing marker\n * only costs recall.\n */\nimport type { LanguageCode, LanguageProfile } from \"./types.js\";\nimport { FREQUENT_GENERATED } from \"./internal/frequent.js\";\n\n/** Belarusian has no subtitle frequency data — hand-curated content words. */\nconst BE_FREQUENT: readonly string[] = [\n \"навіны\",\n \"відэа\",\n \"горад\",\n \"краіна\",\n \"дзень\",\n \"жыццё\",\n \"людзі\",\n \"праца\",\n \"беларуская\",\n \"сёння\",\n \"вядомы\",\n];\n\n/** Bulgarian has no subtitle frequency data here — hand-curated content words,\n * mirroring the BE fallback. Forms whose spelling is genuinely Bulgarian\n * (e.g. `град`/`днес` vs ru `город`/`сегодня`) so set-difference can use them. */\nconst BG_FREQUENT: readonly string[] = [\n \"новини\",\n \"видео\",\n \"град\",\n \"държава\",\n \"днес\",\n \"живот\",\n \"хора\",\n \"работа\",\n \"българия\",\n \"български\",\n \"известен\",\n \"страна\",\n];\n\nconst uk: LanguageProfile = {\n code: \"uk\",\n iso6393: \"ukr\",\n // has і ї є ґ and и; lacks ё ъ ы э\n alphabet: \"абвгґдеєжзиіїйклмнопрстуфхцчшщьюя\",\n // intra-word apostrophe (uk/be use it where ru uses ъ/nothing). All three\n // codepoints: U+0027 ' U+2019 ’ U+02BC ʼ.\n marks: \"'’ʼ\",\n words: {\n function: [\n \"і\",\n \"й\",\n \"що\",\n \"як\",\n \"це\",\n \"бо\",\n \"ще\",\n \"дуже\",\n \"де\",\n \"його\",\n \"її\",\n \"але\",\n \"який\",\n \"яка\",\n \"цей\",\n \"ця\",\n \"навіть\",\n \"чи\",\n \"або\",\n \"ні\",\n \"щоб\",\n \"теж\",\n \"також\",\n \"він\",\n \"вона\",\n \"вони\",\n ],\n frequent: FREQUENT_GENERATED[\"uk\"] ?? [],\n },\n};\n\nconst ru: LanguageProfile = {\n code: \"ru\",\n iso6393: \"rus\",\n // has ё ъ ы э and и; lacks і ї є ґ ў\n alphabet: \"абвгдеёжзийклмнопрстуфхцчшщъыьэюя\",\n words: {\n function: [\n \"и\",\n \"что\",\n \"как\",\n \"это\",\n \"бы\",\n \"уже\",\n \"или\",\n \"нет\",\n \"очень\",\n \"его\",\n \"её\",\n \"ее\",\n \"где\",\n \"когда\",\n \"этот\",\n \"эта\",\n \"но\",\n \"какой\",\n \"какая\",\n \"даже\",\n \"ещё\",\n \"чтобы\",\n \"потому\",\n \"тоже\",\n \"он\",\n \"она\",\n \"они\",\n ],\n frequent: FREQUENT_GENERATED[\"ru\"] ?? [],\n },\n};\n\nconst be: LanguageProfile = {\n code: \"be\",\n iso6393: \"bel\",\n // has і ў and ы ё э; lacks и щ ъ ї є ґ\n alphabet: \"абвгдеёжзійклмнопрстуўфхцчшыьэюя\",\n // intra-word apostrophe — same uk/be keep-signal; inert between uk and be.\n marks: \"'’ʼ\",\n words: {\n function: [\n \"і\",\n \"што\",\n \"гэта\",\n \"вельмі\",\n \"дзе\",\n \"ці\",\n \"таксама\",\n \"як\",\n \"але\",\n \"бо\",\n \"каб\",\n \"ён\",\n \"яна\",\n \"яны\",\n \"быў\",\n ],\n frequent: FREQUENT_GENERATED[\"be\"] ?? BE_FREQUENT,\n },\n};\n\nconst bg: LanguageProfile = {\n code: \"bg\",\n iso6393: \"bul\",\n // 30-letter Bulgarian alphabet. Has и й щ ъ ь (ъ is a full vowel); lacks ё ы э\n // і ї є ґ ў. `ъ` is shared with ru — inert between {ru, bg}; the word rungs\n // decide, exactly the mechanism `be` relies on for its shared letters.\n alphabet: \"абвгдежзийклмнопрстуфхцчшщъьюя\",\n words: {\n // Bulgarian-distinctive grammatical markers. Shared forms are omitted.\n function: [\n \"ще\",\n \"съм\",\n \"това\",\n \"този\",\n \"тази\",\n \"който\",\n \"която\",\n \"което\",\n \"които\",\n \"защото\",\n \"също\",\n \"тя\",\n \"ние\",\n \"вие\",\n ],\n frequent: FREQUENT_GENERATED[\"bg\"] ?? BG_FREQUENT,\n },\n};\n\nconst en: LanguageProfile = {\n code: \"en\",\n iso6393: \"eng\",\n alphabet: \"abcdefghijklmnopqrstuvwxyz\",\n words: {\n function: [\n \"the\",\n \"and\",\n \"or\",\n \"but\",\n \"of\",\n \"to\",\n \"in\",\n \"on\",\n \"at\",\n \"is\",\n \"are\",\n \"was\",\n \"were\",\n \"this\",\n \"that\",\n \"for\",\n \"with\",\n \"you\",\n \"we\",\n \"they\",\n \"he\",\n \"she\",\n \"it\",\n \"his\",\n \"her\",\n \"what\",\n \"how\",\n \"when\",\n \"where\",\n \"why\",\n \"who\",\n \"which\",\n \"a\",\n \"an\",\n \"i\",\n \"my\",\n \"your\",\n ],\n frequent: FREQUENT_GENERATED[\"en\"] ?? [],\n },\n};\n\nexport { uk, ru, be, bg, en };\n\n/** Registry of shipped profiles, keyed by BCP-47 code. */\nexport const PROFILES: Readonly<Record<LanguageCode, LanguageProfile>> = { uk, ru, be, bg, en };\n\n/** Resolve profiles for the given codes, skipping any without a shipped profile. */\nexport function getProfiles(codes: readonly LanguageCode[]): LanguageProfile[] {\n return codes.map((c) => PROFILES[c]).filter((p): p is LanguageProfile => p !== undefined);\n}\n"]}
1
+ {"version":3,"sources":["../src/internal/frequent.ts","../src/profiles.ts"],"names":[],"mappings":";AAOO,IAAM,kBAAA,GAAkE;AAAA,EAC7E,EAAA,EAAI;AAAA,IACF,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,8DAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA;AAAA,GACF;AAAA,EACA,EAAA,EAAI;AAAA,IACF,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,8DAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,cAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,8DAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,wDAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,8DAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gFAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,cAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,sCAAA;AAAA,IACA,cAAA;AAAA,IACA,0BAAA;AAAA,IACA,wDAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,oBAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,4CAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,wDAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,8DAAA;AAAA,IACA,4CAAA;AAAA,IACA,sCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,kDAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,kDAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,4CAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,0BAAA;AAAA,IACA,4CAAA;AAAA,IACA,0BAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,sCAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,0BAAA;AAAA,IACA,gCAAA;AAAA,IACA,gCAAA;AAAA,IACA,oBAAA;AAAA,IACA,wDAAA;AAAA,IACA;AAAA,GACF;AAAA,EACA,IAAI;AACN,CAAA;;;ACjxBA,IAAM,WAAA,GAAiC;AAAA,EACrC,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,gCAAA;AAAA,EACA,8DAAA;AAAA,EACA,gCAAA;AAAA,EACA;AACF,CAAA;AAKA,IAAM,WAAA,GAAiC;AAAA,EACrC,sCAAA;AAAA,EACA,gCAAA;AAAA,EACA,0BAAA;AAAA,EACA,4CAAA;AAAA,EACA,0BAAA;AAAA,EACA,gCAAA;AAAA,EACA,0BAAA;AAAA,EACA,sCAAA;AAAA,EACA,kDAAA;AAAA,EACA,wDAAA;AAAA,EACA,kDAAA;AAAA,EACA;AACF,CAAA;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,wMAAA;AAAA;AAAA;AAAA,EAGV,KAAA,EAAO,eAAA;AAAA,EACP,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,QAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,sCAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,wMAAA;AAAA,EACV,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,0BAAA;AAAA,MACA,oBAAA;AAAA,MACA,gCAAA;AAAA,MACA,sCAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA,EAET,QAAA,EAAU,kMAAA;AAAA;AAAA,EAEV,KAAA,EAAO,eAAA;AAAA,EACP,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,QAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,sCAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,4CAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAA;AAE1C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA;AAAA;AAAA;AAAA,EAIT,QAAA,EAAU,sLAAA;AAAA,EACV,KAAA,EAAO;AAAA;AAAA,IAEL,QAAA,EAAU;AAAA,MACR,cAAA;AAAA,MACA,oBAAA;AAAA,MACA,0BAAA;AAAA,MACA,0BAAA;AAAA,MACA,0BAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,gCAAA;AAAA,MACA,sCAAA;AAAA,MACA,0BAAA;AAAA,MACA,cAAA;AAAA,MACA,oBAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAA;AAE1C;AAEA,IAAM,EAAA,GAAsB;AAAA,EAC1B,IAAA,EAAM,IAAA;AAAA,EACN,OAAA,EAAS,KAAA;AAAA,EACT,QAAA,EAAU,4BAAA;AAAA,EACV,KAAA,EAAO;AAAA,IACL,QAAA,EAAU;AAAA,MACR,KAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,MAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,MAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,IAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,KAAA;AAAA,MACA,MAAA;AAAA,MACA,OAAA;AAAA,MACA,KAAA;AAAA,MACA,KAAA;AAAA,MACA,OAAA;AAAA,MACA,GAAA;AAAA,MACA,IAAA;AAAA,MACA,GAAA;AAAA,MACA,IAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,QAAA,EAAU,kBAAA,CAAmB,IAAI,CAAA,IAAK;AAAC;AAE3C;AAKO,IAAM,WAA4D,EAAE,EAAA,EAAI,EAAA,EAAI,EAAA,EAAI,IAAI,EAAA;AAKpF,IAAM,cAAA,GAA0C,MAAA,CAAO,IAAA,CAAK,QAAQ;AAKpE,SAAS,WAAW,IAAA,EAA6B;AACtD,EAAA,OAAO,MAAA,CAAO,SAAA,CAAU,cAAA,CAAe,IAAA,CAAK,UAAU,IAAI,CAAA;AAC5D;AAGO,SAAS,YAAY,KAAA,EAAmD;AAC7E,EAAA,OAAO,KAAA,CAAM,GAAA,CAAI,CAAC,CAAA,KAAM,QAAA,CAAS,CAAC,CAAC,CAAA,CAAE,MAAA,CAAO,CAAC,CAAA,KAA4B,CAAA,KAAM,MAAS,CAAA;AAC1F","file":"profiles.js","sourcesContent":["// Generated frequency lists — the top everyday words for each language,\n// taken from an open subtitle-frequency corpus (general vocabulary; no\n// project-specific terms). Kept out of the zero-dependency core: this data\n// lives behind the `langtell/profiles` subpath only.\n//\n// Rung 2b of the classifier (internal/classify.ts) reads these. They sharpen\n// distinctive-free disambiguation (e.g. uk \"робота\" vs ru \"работа\").\nexport const FREQUENT_GENERATED: Readonly<Record<string, readonly string[]>> = {\n uk: [\n \"не\",\n \"що\",\n \"на\",\n \"це\",\n \"ти\",\n \"что\",\n \"так\",\n \"ты\",\n \"это\",\n \"ми\",\n \"все\",\n \"тебе\",\n \"він\",\n \"як\",\n \"за\",\n \"ви\",\n \"до\",\n \"мене\",\n \"он\",\n \"мені\",\n \"ні\",\n \"але\",\n \"мы\",\n \"мне\",\n \"да\",\n \"как\",\n \"просто\",\n \"меня\",\n \"нет\",\n \"тут\",\n \"для\",\n \"його\",\n \"вы\",\n \"но\",\n \"то\",\n \"ну\",\n \"знаю\",\n \"про\",\n \"нас\",\n \"вас\",\n \"вони\",\n \"тебя\",\n \"вона\",\n \"она\",\n \"тобі\",\n \"его\",\n \"там\",\n \"якщо\",\n \"ще\",\n \"та\",\n \"добре\",\n \"щоб\",\n \"вам\",\n \"по\",\n \"нам\",\n \"хочу\",\n \"коли\",\n \"если\",\n \"бы\",\n \"же\",\n \"вже\",\n \"було\",\n \"був\",\n \"они\",\n \"може\",\n \"чи\",\n \"чтобы\",\n \"дуже\",\n \"від\",\n \"хорошо\",\n \"чому\",\n \"буде\",\n \"хто\",\n \"есть\",\n \"всё\",\n \"давай\",\n \"тому\",\n \"де\",\n \"здесь\",\n \"треба\",\n \"от\",\n \"думаю\",\n \"когда\",\n \"из\",\n \"того\",\n \"щось\",\n \"гаразд\",\n \"был\",\n \"цього\",\n \"только\",\n \"зараз\",\n \"было\",\n \"может\",\n \"нічого\",\n \"моя\",\n \"тільки\",\n \"те\",\n \"бути\",\n \"один\",\n \"уже\",\n \"можу\",\n \"тепер\",\n \"мій\",\n \"быть\",\n \"ось\",\n \"почему\",\n \"дякую\",\n \"раз\",\n \"вот\",\n \"могу\",\n \"через\",\n \"нужно\",\n \"всі\",\n \"бо\",\n \"кто\",\n \"будет\",\n \"правда\",\n \"без\",\n \"себе\",\n \"більше\",\n \"тоді\",\n \"час\",\n \"день\",\n \"ладно\",\n \"сейчас\",\n \"можна\",\n \"була\",\n \"их\",\n \"боже\",\n \"еще\",\n \"спасибо\",\n \"или\",\n \"будь\",\n \"йому\",\n \"где\",\n \"лише\",\n \"очень\",\n \"была\",\n \"ним\",\n \"привіт\",\n \"них\",\n \"чем\",\n \"навіть\",\n \"ніколи\",\n \"знаешь\",\n \"этого\",\n \"буду\",\n \"би\",\n \"её\",\n \"зі\",\n \"ее\",\n \"мой\",\n \"ничего\",\n \"хочеш\",\n \"нього\",\n \"цей\",\n \"чого\",\n \"життя\",\n \"этом\",\n \"теж\",\n \"люди\",\n \"твоя\",\n \"со\",\n \"потому\",\n \"сказав\",\n \"больше\",\n \"теперь\",\n \"ещё\",\n \"людей\",\n \"або\",\n \"свою\",\n \"привет\",\n \"який\",\n \"сюди\",\n \"таке\",\n \"даже\",\n \"із\",\n \"об\",\n \"тобою\",\n \"були\",\n \"два\",\n \"краще\",\n \"пока\",\n \"ему\",\n \"завжди\",\n \"хочешь\",\n \"должен\",\n \"багато\",\n \"сьогодні\",\n \"ей\",\n \"точно\",\n \"ласка\",\n \"время\",\n \"эй\",\n \"той\",\n \"себя\",\n \"надо\",\n \"сам\",\n \"мною\",\n \"тоже\",\n \"этот\",\n \"прошу\",\n \"маю\",\n \"сказал\",\n \"мама\",\n \"ли\",\n \"зробити\",\n \"сказати\",\n \"сказала\",\n \"назад\",\n \"які\",\n \"можеш\",\n \"тобой\",\n \"трохи\",\n \"потрібно\",\n \"ці\",\n \"тогда\",\n \"порядке\",\n \"ага\",\n \"саме\",\n \"хотів\",\n \"можешь\",\n \"під\",\n \"три\",\n \"никогда\",\n \"можете\",\n \"років\",\n \"собі\",\n \"скажи\",\n \"него\",\n \"пане\",\n \"робити\",\n \"знову\",\n \"твій\",\n \"люблю\",\n \"пожалуйста\",\n \"наш\",\n \"ни\",\n \"сегодня\",\n \"мной\",\n \"потім\",\n \"ніж\",\n \"были\",\n \"гей\",\n \"дай\",\n \"перед\",\n \"завтра\",\n \"вами\",\n \"яка\",\n \"одного\",\n \"ваша\",\n \"конечно\",\n \"сделать\",\n \"лучше\",\n \"цьому\",\n \"ваш\",\n \"цю\",\n \"сказать\",\n \"пам\",\n \"ніхто\",\n \"сюда\",\n \"нами\",\n \"можливо\",\n \"після\",\n \"значит\",\n \"прямо\",\n \"скільки\",\n \"одна\",\n \"чудово\",\n \"давайте\",\n \"бачив\",\n \"такий\",\n \"кого\",\n \"хтось\",\n \"сер\",\n \"ця\",\n \"мою\",\n \"усе\",\n \"разом\",\n \"чего\",\n \"куди\",\n \"звичайно\",\n \"справді\",\n \"поки\",\n \"должны\",\n \"всегда\",\n \"делать\",\n \"дело\",\n \"гроші\",\n \"цим\",\n \"міг\",\n \"том\",\n \"хіба\",\n \"ней\",\n \"твой\",\n \"этим\",\n \"повинен\",\n \"друг\",\n \"можно\",\n \"дня\",\n \"досить\",\n \"можемо\",\n \"зробив\",\n \"много\",\n \"хотел\",\n \"над\",\n \"должна\",\n \"после\",\n \"хоче\",\n \"якби\",\n \"могли\",\n \"місце\",\n \"всего\",\n \"воно\",\n \"часу\",\n \"робиш\",\n \"хлопці\",\n \"вибачте\",\n \"додому\",\n \"весь\",\n \"руки\",\n \"ведь\",\n \"ясно\",\n \"возможно\",\n \"вибач\",\n \"такой\",\n \"доктор\",\n \"повинні\",\n \"эти\",\n \"говорить\",\n \"мого\",\n \"нема\",\n \"нб\",\n \"йди\",\n \"думаешь\",\n \"моей\",\n \"немного\",\n \"навіщо\",\n \"господи\",\n \"мабуть\",\n \"жизнь\",\n \"твою\",\n \"зачем\",\n \"чоловік\",\n \"ходімо\",\n \"лет\",\n \"такое\",\n \"случилось\",\n \"звідси\",\n \"слишком\",\n \"мати\",\n \"скажу\",\n \"скоро\",\n \"сталося\",\n \"прости\",\n \"нормально\",\n \"пані\",\n \"всіх\",\n \"також\",\n \"вместе\",\n \"кілька\",\n \"брат\",\n \"чорт\",\n \"сама\",\n \"во\",\n \"потом\",\n \"такого\",\n \"казав\",\n \"розумію\",\n \"батько\",\n \"отже\",\n \"сделал\",\n \"таким\",\n \"могла\",\n \"тим\",\n \"швидше\",\n \"жаль\",\n \"туди\",\n \"речі\",\n \"знати\",\n \"поэтому\",\n \"мог\",\n \"хочете\",\n \"помочь\",\n \"одну\",\n \"всех\",\n \"который\",\n \"им\",\n \"ім\",\n \"думав\",\n \"можем\",\n \"под\",\n \"допомогти\",\n \"людина\",\n \"куда\",\n \"яку\",\n \"правильно\",\n \"давно\",\n \"зовсім\",\n \"при\",\n ],\n ru: [\n \"не\",\n \"что\",\n \"ты\",\n \"это\",\n \"на\",\n \"он\",\n \"мы\",\n \"как\",\n \"вы\",\n \"да\",\n \"мне\",\n \"нет\",\n \"меня\",\n \"так\",\n \"но\",\n \"его\",\n \"все\",\n \"она\",\n \"тебя\",\n \"если\",\n \"за\",\n \"бы\",\n \"тебе\",\n \"они\",\n \"чтобы\",\n \"же\",\n \"есть\",\n \"просто\",\n \"из\",\n \"для\",\n \"ну\",\n \"когда\",\n \"хорошо\",\n \"здесь\",\n \"по\",\n \"был\",\n \"знаю\",\n \"то\",\n \"только\",\n \"всё\",\n \"было\",\n \"вас\",\n \"может\",\n \"нас\",\n \"вот\",\n \"от\",\n \"быть\",\n \"кто\",\n \"будет\",\n \"почему\",\n \"вам\",\n \"их\",\n \"очень\",\n \"могу\",\n \"уже\",\n \"спасибо\",\n \"или\",\n \"нам\",\n \"еще\",\n \"там\",\n \"нужно\",\n \"сейчас\",\n \"где\",\n \"чем\",\n \"хочу\",\n \"ничего\",\n \"этого\",\n \"была\",\n \"мой\",\n \"ее\",\n \"ладно\",\n \"знаешь\",\n \"до\",\n \"этом\",\n \"потому\",\n \"теперь\",\n \"думаю\",\n \"больше\",\n \"её\",\n \"со\",\n \"раз\",\n \"ему\",\n \"надо\",\n \"время\",\n \"этот\",\n \"ли\",\n \"ещё\",\n \"пока\",\n \"даже\",\n \"привет\",\n \"сказал\",\n \"себя\",\n \"должен\",\n \"тоже\",\n \"хочешь\",\n \"давай\",\n \"никогда\",\n \"эй\",\n \"того\",\n \"тогда\",\n \"него\",\n \"ни\",\n \"тут\",\n \"были\",\n \"конечно\",\n \"правда\",\n \"об\",\n \"моя\",\n \"пожалуйста\",\n \"тобой\",\n \"сказать\",\n \"сегодня\",\n \"один\",\n \"лучше\",\n \"можешь\",\n \"сюда\",\n \"мной\",\n \"значит\",\n \"сделать\",\n \"всегда\",\n \"дело\",\n \"можно\",\n \"ей\",\n \"должны\",\n \"порядке\",\n \"без\",\n \"день\",\n \"том\",\n \"буду\",\n \"делать\",\n \"хотел\",\n \"чего\",\n \"эти\",\n \"много\",\n \"после\",\n \"этим\",\n \"всего\",\n \"во\",\n \"твой\",\n \"ним\",\n \"лет\",\n \"боже\",\n \"них\",\n \"сэр\",\n \"ведь\",\n \"мистер\",\n \"жизнь\",\n \"потом\",\n \"ней\",\n \"такой\",\n \"который\",\n \"всех\",\n \"через\",\n \"им\",\n \"возможно\",\n \"немного\",\n \"такое\",\n \"слишком\",\n \"себе\",\n \"зачем\",\n \"должна\",\n \"моей\",\n \"люди\",\n \"знаете\",\n \"этой\",\n \"думаешь\",\n \"свою\",\n \"точно\",\n \"человек\",\n \"твоя\",\n \"под\",\n \"сказала\",\n \"назад\",\n \"эту\",\n \"можем\",\n \"случилось\",\n \"мама\",\n \"мог\",\n \"вместе\",\n \"отец\",\n \"сделал\",\n \"мои\",\n \"кажется\",\n \"друг\",\n \"куда\",\n \"никто\",\n \"поэтому\",\n \"эта\",\n \"которые\",\n \"два\",\n \"тот\",\n \"сколько\",\n \"понимаю\",\n \"снова\",\n \"жизни\",\n \"нравится\",\n \"людей\",\n \"помочь\",\n \"видел\",\n \"люблю\",\n \"хочет\",\n \"место\",\n \"знать\",\n \"прости\",\n \"отлично\",\n \"похоже\",\n \"парень\",\n \"домой\",\n \"всем\",\n \"деньги\",\n \"иди\",\n \"времени\",\n \"дома\",\n \"именно\",\n \"доктор\",\n \"думал\",\n \"говорил\",\n \"делаешь\",\n \"будем\",\n \"прямо\",\n \"стоит\",\n \"поговорить\",\n \"найти\",\n \"разве\",\n \"слушай\",\n \"своей\",\n \"лишь\",\n \"ага\",\n \"можете\",\n \"простите\",\n \"хотела\",\n \"сам\",\n \"тем\",\n \"будешь\",\n \"прошу\",\n \"три\",\n \"деле\",\n \"хотите\",\n \"говорить\",\n \"давайте\",\n \"совсем\",\n \"знал\",\n \"знает\",\n \"какой\",\n \"моего\",\n \"скажи\",\n \"дом\",\n \"дела\",\n \"вами\",\n \"свои\",\n \"говорит\",\n \"несколько\",\n \"должно\",\n \"про\",\n \"ваш\",\n \"происходит\",\n \"жаль\",\n \"туда\",\n \"действительно\",\n \"папа\",\n \"завтра\",\n \"оно\",\n \"черт\",\n \"одна\",\n \"перед\",\n \"наш\",\n \"уверен\",\n \"отсюда\",\n \"нужна\",\n \"самом\",\n \"тех\",\n \"нужен\",\n \"свой\",\n \"мою\",\n \"кого\",\n \"верно\",\n \"работу\",\n \"каждый\",\n \"твоей\",\n \"будут\",\n \"хватит\",\n \"понял\",\n \"нее\",\n \"уж\",\n \"имя\",\n \"против\",\n \"пор\",\n \"чём\",\n \"раньше\",\n \"говорю\",\n \"более\",\n \"надеюсь\",\n \"итак\",\n \"при\",\n \"ваша\",\n \"вообще\",\n \"пошли\",\n \"мать\",\n \"нельзя\",\n \"наверное\",\n \"нами\",\n \"твои\",\n \"могли\",\n \"дай\",\n \"ради\",\n \"всю\",\n \"ребята\",\n \"ко\",\n \"хотя\",\n \"понимаешь\",\n \"идти\",\n \"этих\",\n \"откуда\",\n \"такая\",\n \"ясно\",\n \"другой\",\n \"извините\",\n \"вижу\",\n \"видеть\",\n \"над\",\n \"могут\",\n \"равно\",\n \"мисс\",\n \"скоро\",\n \"будто\",\n \"зовут\",\n \"виду\",\n \"наши\",\n \"думала\",\n \"послушай\",\n \"между\",\n \"своего\",\n \"вопрос\",\n \"этому\",\n \"почти\",\n \"года\",\n \"человека\",\n \"которая\",\n \"подожди\",\n \"руки\",\n \"нормально\",\n \"такие\",\n \"возьми\",\n \"минут\",\n \"извини\",\n \"вещи\",\n \"могла\",\n \"смотри\",\n \"хоть\",\n \"работа\",\n \"пару\",\n \"сын\",\n \"ваше\",\n \"дня\",\n \"пора\",\n \"неё\",\n \"жить\",\n \"видишь\",\n \"достаточно\",\n \"господи\",\n \"быстро\",\n \"твою\",\n \"весь\",\n \"убить\",\n \"ночь\",\n \"говоришь\",\n \"собой\",\n \"скажу\",\n \"готов\",\n \"слышал\",\n \"какая\",\n \"посмотри\",\n \"первый\",\n \"самое\",\n \"видела\",\n \"пусть\",\n \"месте\",\n \"нашли\",\n \"сказали\",\n \"плохо\",\n \"смогу\",\n \"ваши\",\n \"отца\",\n \"детей\",\n \"знаем\",\n \"рад\",\n \"прав\",\n \"никаких\",\n \"имею\",\n \"миссис\",\n \"иногда\",\n \"смерти\",\n \"своих\",\n \"пойду\",\n \"брат\",\n \"вроде\",\n \"рядом\",\n \"мир\",\n \"произошло\",\n \"которую\",\n ],\n en: [],\n};\n","/**\n * `langtell/profiles` — ready-to-use {@link LanguageProfile} data.\n *\n * This is the heavy DATA half of the library: alphabets, curated function-word\n * lists, and corpus-frequent word lists. It is deliberately kept behind its own\n * subpath, OUT of the zero-dependency core, so `import { compile } from\n * \"langtell\"` never drags the word corpora into a bundle that only needs the\n * script/letter rungs. Pass these into `compile({ candidates: [...] })`.\n *\n * Each profile is declarative and auditable:\n * - `alphabet` — the language's lowercased alphabet (raw; distinctiveness\n * is computed at runtime per candidate set).\n * - `marks` — orthographic marks that count as rung-1 evidence but\n * are not alphabet letters (the intra-word apostrophe).\n * - `words.function` — curated grammatical markers, hand-verified.\n * - `words.frequent` — common everyday words from a subtitle-frequency corpus.\n * - `iso6393` — ISO 639-3 code for the optional franc engine.\n *\n * Curation rule for `function`: a token may appear in exactly one candidate's\n * list ONLY if that form is genuinely used by only that language among those we\n * support. Shared forms must be in every list that uses them (set-difference\n * then cancels them) or omitted from all. When in doubt, omit: a missing marker\n * only costs recall.\n */\nimport type { LanguageCode, LanguageProfile } from \"./types.js\";\nimport { FREQUENT_GENERATED } from \"./internal/frequent.js\";\n\n/** Belarusian has no subtitle frequency data — hand-curated content words. */\nconst BE_FREQUENT: readonly string[] = [\n \"навіны\",\n \"відэа\",\n \"горад\",\n \"краіна\",\n \"дзень\",\n \"жыццё\",\n \"людзі\",\n \"праца\",\n \"беларуская\",\n \"сёння\",\n \"вядомы\",\n];\n\n/** Bulgarian has no subtitle frequency data here — hand-curated content words,\n * mirroring the BE fallback. Forms whose spelling is genuinely Bulgarian\n * (e.g. `град`/`днес` vs ru `город`/`сегодня`) so set-difference can use them. */\nconst BG_FREQUENT: readonly string[] = [\n \"новини\",\n \"видео\",\n \"град\",\n \"държава\",\n \"днес\",\n \"живот\",\n \"хора\",\n \"работа\",\n \"българия\",\n \"български\",\n \"известен\",\n \"страна\",\n];\n\nconst uk: LanguageProfile = {\n code: \"uk\",\n iso6393: \"ukr\",\n // has і ї є ґ and и; lacks ё ъ ы э\n alphabet: \"абвгґдеєжзиіїйклмнопрстуфхцчшщьюя\",\n // intra-word apostrophe (uk/be use it where ru uses ъ/nothing). All three\n // codepoints: U+0027 ' U+2019 ’ U+02BC ʼ.\n marks: \"'’ʼ\",\n words: {\n function: [\n \"і\",\n \"й\",\n \"що\",\n \"як\",\n \"це\",\n \"бо\",\n \"ще\",\n \"дуже\",\n \"де\",\n \"його\",\n \"її\",\n \"але\",\n \"який\",\n \"яка\",\n \"цей\",\n \"ця\",\n \"навіть\",\n \"чи\",\n \"або\",\n \"ні\",\n \"щоб\",\n \"теж\",\n \"також\",\n \"він\",\n \"вона\",\n \"вони\",\n ],\n frequent: FREQUENT_GENERATED[\"uk\"] ?? [],\n },\n};\n\nconst ru: LanguageProfile = {\n code: \"ru\",\n iso6393: \"rus\",\n // has ё ъ ы э and и; lacks і ї є ґ ў\n alphabet: \"абвгдеёжзийклмнопрстуфхцчшщъыьэюя\",\n words: {\n function: [\n \"и\",\n \"что\",\n \"как\",\n \"это\",\n \"бы\",\n \"уже\",\n \"или\",\n \"нет\",\n \"очень\",\n \"его\",\n \"её\",\n \"ее\",\n \"где\",\n \"когда\",\n \"этот\",\n \"эта\",\n \"но\",\n \"какой\",\n \"какая\",\n \"даже\",\n \"ещё\",\n \"чтобы\",\n \"потому\",\n \"тоже\",\n \"он\",\n \"она\",\n \"они\",\n ],\n frequent: FREQUENT_GENERATED[\"ru\"] ?? [],\n },\n};\n\nconst be: LanguageProfile = {\n code: \"be\",\n iso6393: \"bel\",\n // has і ў and ы ё э; lacks и щ ъ ї є ґ\n alphabet: \"абвгдеёжзійклмнопрстуўфхцчшыьэюя\",\n // intra-word apostrophe — same uk/be keep-signal; inert between uk and be.\n marks: \"'’ʼ\",\n words: {\n function: [\n \"і\",\n \"што\",\n \"гэта\",\n \"вельмі\",\n \"дзе\",\n \"ці\",\n \"таксама\",\n \"як\",\n \"але\",\n \"бо\",\n \"каб\",\n \"ён\",\n \"яна\",\n \"яны\",\n \"быў\",\n ],\n frequent: FREQUENT_GENERATED[\"be\"] ?? BE_FREQUENT,\n },\n};\n\nconst bg: LanguageProfile = {\n code: \"bg\",\n iso6393: \"bul\",\n // 30-letter Bulgarian alphabet. Has и й щ ъ ь (ъ is a full vowel); lacks ё ы э\n // і ї є ґ ў. `ъ` is shared with ru — inert between {ru, bg}; the word rungs\n // decide, exactly the mechanism `be` relies on for its shared letters.\n alphabet: \"абвгдежзийклмнопрстуфхцчшщъьюя\",\n words: {\n // Bulgarian-distinctive grammatical markers. Shared forms are omitted.\n function: [\n \"ще\",\n \"съм\",\n \"това\",\n \"този\",\n \"тази\",\n \"който\",\n \"която\",\n \"което\",\n \"които\",\n \"защото\",\n \"също\",\n \"тя\",\n \"ние\",\n \"вие\",\n ],\n frequent: FREQUENT_GENERATED[\"bg\"] ?? BG_FREQUENT,\n },\n};\n\nconst en: LanguageProfile = {\n code: \"en\",\n iso6393: \"eng\",\n alphabet: \"abcdefghijklmnopqrstuvwxyz\",\n words: {\n function: [\n \"the\",\n \"and\",\n \"or\",\n \"but\",\n \"of\",\n \"to\",\n \"in\",\n \"on\",\n \"at\",\n \"is\",\n \"are\",\n \"was\",\n \"were\",\n \"this\",\n \"that\",\n \"for\",\n \"with\",\n \"you\",\n \"we\",\n \"they\",\n \"he\",\n \"she\",\n \"it\",\n \"his\",\n \"her\",\n \"what\",\n \"how\",\n \"when\",\n \"where\",\n \"why\",\n \"who\",\n \"which\",\n \"a\",\n \"an\",\n \"i\",\n \"my\",\n \"your\",\n ],\n frequent: FREQUENT_GENERATED[\"en\"] ?? [],\n },\n};\n\nexport { uk, ru, be, bg, en };\n\n/** Registry of shipped profiles, keyed by BCP-47 code. */\nexport const PROFILES: Readonly<Record<LanguageCode, LanguageProfile>> = { uk, ru, be, bg, en };\n\n/** BCP-47 codes for which langtell ships a ready-made {@link LanguageProfile}.\n * Handy for narrowing a caller's roster to codes that can actually classify —\n * e.g. `codes.filter(hasProfile)`. Derived from {@link PROFILES}. */\nexport const PROFILED_CODES: readonly LanguageCode[] = Object.keys(PROFILES);\n\n/** Whether langtell ships a ready-made {@link LanguageProfile} for `code`. An\n * own-property check, so inherited names (`\"toString\"`, `\"constructor\"`) read\n * as absent. */\nexport function hasProfile(code: LanguageCode): boolean {\n return Object.prototype.hasOwnProperty.call(PROFILES, code);\n}\n\n/** Resolve profiles for the given codes, skipping any without a shipped profile. */\nexport function getProfiles(codes: readonly LanguageCode[]): LanguageProfile[] {\n return codes.map((c) => PROFILES[c]).filter((p): p is LanguageProfile => p !== undefined);\n}\n"]}
package/dist/text.d.ts CHANGED
@@ -1,48 +1,5 @@
1
- import { L as LanguageProfile, a as LanguageEvidence } from './types-D4Ux-xA6.js';
2
-
3
- /**
4
- * Per-snippet language classification by candidate-set-relative set-difference.
5
- *
6
- * A ladder of rungs; the first rung whose leader clears a lead (margin) of ≥1
7
- * wins; otherwise `"unknown"`:
8
- *
9
- * 1 alphabet — characters distinctive within the candidate set
10
- * 2a function words — curated grammatical markers (highest precision)
11
- * 2b frequent words — corpus content words
12
- * 3 franc — optional trigram backstop for the distinctive-free
13
- * residual, injected as a resolver (this module stays
14
- * franc-free and importable without franc's tables)
15
- *
16
- * "Distinctive" is ALWAYS relative to the candidate set: a signal counts for a
17
- * candidate iff it appears in that candidate's profile and in NO other
18
- * candidate's. So `і` decides {uk, ru} (only uk has it) but is inert in
19
- * {uk, be} (both have it), and the word `и` decides {uk, ru} even though the
20
- * *letter* `и` is shared. Nothing is precomputed — uniqueness is the runtime
21
- * output, never stored.
22
- *
23
- * Adapted to langtell's {@link LanguageProfile} shape: the `words` and `iso6393`
24
- * fields are optional here, so a bare `{ code, alphabet }` profile still
25
- * classifies on rung 1.
26
- */
27
-
28
- declare const FRANC_RUNG = 3;
29
- /** Which rung decided a verdict; `null` when unknown. */
30
- type Rung = 1 | "2a" | "2b" | typeof FRANC_RUNG | null;
31
- interface SnippetVerdict {
32
- /** Winning language code, or the sentinel `"unknown"`. */
33
- language: string;
34
- /** Lead of the winner over the runner-up, in the rung's own unit (distinctive
35
- * char/word count for rungs 1–2; franc score-gap for rung 3). 0 when unknown. */
36
- margin: number;
37
- /** Which rung decided; `null` when unknown. */
38
- rung: Rung;
39
- }
40
- /** Resolver for rung 3 (the optional trigram backstop), injected into
41
- * {@link classifyBySnippet} by callers that have franc available. Kept as an
42
- * injected seam — not a direct import — so this module stays franc-free and
43
- * importable without pulling franc's tables. Returns a rung-3 verdict or
44
- * `null` (abstain). */
45
- type Rung3Resolver = (text: string, scoped: readonly LanguageProfile[]) => SnippetVerdict | null;
1
+ import { L as LanguageProfile, a as LanguageEvidence } from './types-BIXrkuAr.js';
2
+ import { Rung3Resolver } from './classify.js';
46
3
 
47
4
  /**
48
5
  * Producer: candidate-relative script + lexical signals from the title text.
package/dist/text.js CHANGED
@@ -1,4 +1,4 @@
1
- export { evidenceFromText } from './chunk-3SO2WI75.js';
2
- import './chunk-RFR5I7P7.js';
1
+ export { evidenceFromText } from './chunk-PT7R2BRQ.js';
2
+ import './chunk-NCGZPEDA.js';
3
3
  //# sourceMappingURL=text.js.map
4
4
  //# sourceMappingURL=text.js.map
@@ -13,6 +13,13 @@ interface LanguageEvidence {
13
13
  source: string;
14
14
  /** The raw signal value, for debugging the verdict. */
15
15
  value: string;
16
+ /** Set to `false` on a script read whose winning script is owned by ≤1 roster
17
+ * candidate — the script alone selected the language (a lone-candidate
18
+ * default), not the distinctive-letter/word machinery, so the read carries no
19
+ * evidence that the text is *distinctively* that language. Omitted (treated as
20
+ * discriminating) when ≥2 same-script candidates were in play. Consumed by
21
+ * {@link fuse}'s `nonDiscriminatingScript` option; otherwise informational. */
22
+ discriminating?: boolean;
16
23
  }
17
24
  /** The verdict: the winning language, a confidence, and the evidence behind it. */
18
25
  interface Classification {
@@ -80,6 +87,18 @@ type EvidenceSource = SyncSource | AsyncSource;
80
87
  type HasAsync<E extends readonly EvidenceSource[]> = Extract<E[number], AsyncSource> extends never ? false : true;
81
88
  /** Weights keyed by evidence `source` id or `kind`; missing keys use defaults. */
82
89
  type Weights = Partial<Record<string, number>>;
90
+ /** How {@link fuse} resolves a *non-discriminating* script read — one whose
91
+ * winning script is owned by ≤1 roster candidate, so the script alone (not the
92
+ * distinctive-signal machinery) picked the language:
93
+ *
94
+ * - `"candidate"` (default) keeps the lone candidate, preserving today's
95
+ * behavior: a closed roster where the script is taken to imply the language.
96
+ * - `"unknown"` drops such a read from the verdict *unless* non-script evidence
97
+ * (a page tag, a `Content-Language` header) corroborates the same language —
98
+ * the conservative "name a language only on real evidence" policy. In a
99
+ * `[uk, en]` roster a Latin-only title then resolves to `unknown`, while a
100
+ * Latin title plus an explicit `en` `Content-Language` stays `en`. */
101
+ type NonDiscriminatingScript = "candidate" | "unknown";
83
102
  interface EarlyExit {
84
103
  /** Stop running further (cheaper-first) sources once confidence clears this. */
85
104
  minConfidence: number;
@@ -89,9 +108,13 @@ interface DetectorConfig<E extends readonly EvidenceSource[] = []> {
89
108
  engines?: E;
90
109
  weights?: Weights;
91
110
  earlyExit?: EarlyExit;
111
+ /** Forwarded to {@link fuse}. See {@link NonDiscriminatingScript}. Defaults to
112
+ * `"candidate"` (current behavior); opt into `"unknown"` for a roster-closed,
113
+ * evidence-only policy. */
114
+ nonDiscriminatingScript?: NonDiscriminatingScript;
92
115
  }
93
116
  /** The compiled detector. Synchronous when every source is sync; `Promise`-typed
94
117
  * the moment an async engine is registered — so callers never guess `await`. */
95
118
  type DetectFn<E extends readonly EvidenceSource[]> = HasAsync<E> extends true ? (input: DetectInput, ctx?: DetectContext) => Promise<Classification> : (input: DetectInput) => Classification;
96
119
 
97
- export type { AsyncSource as A, Classification as C, DetectorConfig as D, EvidenceSource as E, HasAsync as H, LanguageProfile as L, SyncSource as S, Weights as W, LanguageEvidence as a, DetectFn as b, DetectContext as c, DetectInput as d, EarlyExit as e, EvidenceKind as f, HeaderBag as g, LanguageCode as h, SourceInput as i };
120
+ export type { AsyncSource as A, Classification as C, DetectorConfig as D, EvidenceSource as E, HeaderBag as H, LanguageProfile as L, NonDiscriminatingScript as N, SyncSource as S, Weights as W, LanguageEvidence as a, LanguageCode as b, DetectFn as c, DetectContext as d, DetectInput as e, EarlyExit as f, EvidenceKind as g, HasAsync as h, SourceInput as i };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langtell",
3
- "version": "0.1.0",
3
+ "version": "0.3.0",
4
4
  "description": "Tell me the language — evidence-fusion language detection for short strings, with an auditable confidence trail.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -53,6 +53,10 @@
53
53
  "types": "./dist/fuse.d.ts",
54
54
  "import": "./dist/fuse.js"
55
55
  },
56
+ "./classify": {
57
+ "types": "./dist/classify.d.ts",
58
+ "import": "./dist/classify.js"
59
+ },
56
60
  "./franc": {
57
61
  "types": "./dist/franc.d.ts",
58
62
  "import": "./dist/franc.js"
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/text.ts"],"names":[],"mappings":";;;AAmBO,SAAS,gBAAA,CACd,IAAA,EACA,UAAA,EACA,KAAA,EACoB;AACpB,EAAA,IAAI,IAAA,KAAS,UAAa,IAAA,CAAK,IAAA,GAAO,MAAA,KAAW,CAAA,SAAU,EAAC;AAC5D,EAAA,IAAI,eAAe,MAAA,IAAa,UAAA,CAAW,MAAA,KAAW,CAAA,SAAU,EAAC;AAEjE,EAAA,MAAM,OAAA,GAAU,iBAAA,CAAkB,IAAA,EAAM,UAAA,EAAY,KAAK,CAAA;AACzD,EAAA,IAAI,OAAA,CAAQ,QAAA,KAAa,SAAA,EAAW,OAAO,EAAC;AAE5C,EAAA,OAAO;AAAA,IACL;AAAA,MACE,IAAA,EAAM,cAAA;AAAA,MACN,UAAU,OAAA,CAAQ,QAAA;AAAA,MAClB,UAAA,EAAY,kBAAA,CAAmB,OAAA,CAAQ,MAAA,EAAQ,QAAQ,IAAI,CAAA;AAAA,MAC3D,MAAA,EAAQ,cAAA;AAAA,MACR,OAAO,IAAA,CAAK,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,EAAE;AAAA;AAChC,GACF;AACF;AAWA,SAAS,kBAAA,CAAmB,QAAgB,IAAA,EAAoB;AAC9D,EAAA,IAAI,SAAS,CAAA,EAAG;AAEd,IAAA,OAAO,OAAA,CAAQ,GAAA,GAAM,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,GAAA,CAAI,MAAA,EAAQ,CAAC,CAAA,EAAG,CAAC,CAAA,GAAI,IAAI,CAAA;AAAA,EAC9D;AACA,EAAA,MAAM,IAAA,GAAO,IAAA,CAAK,GAAA,CAAI,MAAA,EAAQ,CAAC,CAAA;AAC/B,EAAA,OAAO,OAAA,CAAQ,MAAO,IAAA,CAAK,GAAA,CAAI,MAAM,CAAC,CAAA,GAAI,IAAK,IAAI,CAAA;AACrD;AAEA,SAAS,QAAQ,KAAA,EAAuB;AACtC,EAAA,IAAI,CAAC,MAAA,CAAO,QAAA,CAAS,KAAK,GAAG,OAAO,CAAA;AACpC,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,OAAO,KAAA;AACT","file":"chunk-3SO2WI75.js","sourcesContent":["import type { LanguageEvidence, LanguageProfile } from \"./types.js\";\nimport { classifyBySnippet, type Rung, type Rung3Resolver } from \"./internal/classify.js\";\n\n/**\n * Producer: candidate-relative script + lexical signals from the title text.\n *\n * Wraps the ported snippet classifier ({@link classifyBySnippet}): noise strip →\n * dominant-script scope → distinctive letters (rung 1) → function words (2a) →\n * frequent words (2b). The `candidates` roster makes scoring roster-relative —\n * `і` decides Ukrainian only when Russian is also a candidate. Sync and\n * zero-dependency; the optional franc rung is injected via `rung3`.\n *\n * Emits at most one `kind: \"title-script\"` evidence item. The classifier's\n * integer `margin` (the winner's lead over the runner-up) maps to a 0..1\n * `confidence`: a verdict at all means the dominant script and the deciding rung\n * agreed, so the floor is high; a wider lead nudges it up. With no candidates\n * (or no usable distinctive signal) it abstains — emitting nothing rather than a\n * coarse \"unknown\", since the roster decides relevance.\n */\nexport function evidenceFromText(\n text: string | undefined,\n candidates?: readonly LanguageProfile[],\n rung3?: Rung3Resolver,\n): LanguageEvidence[] {\n if (text === undefined || text.trim().length === 0) return [];\n if (candidates === undefined || candidates.length === 0) return [];\n\n const verdict = classifyBySnippet(text, candidates, rung3);\n if (verdict.language === \"unknown\") return [];\n\n return [\n {\n kind: \"title-script\",\n language: verdict.language,\n confidence: marginToConfidence(verdict.margin, verdict.rung),\n source: \"title-script\",\n value: text.trim().slice(0, 80),\n },\n ];\n}\n\n/**\n * Map the classifier's per-rung lead to a 0..1 confidence.\n *\n * Rungs 1–2 carry an integer count of distinctive items (≥1). A verdict already\n * means script + rung agreed, so the floor is high (0.6) and each extra\n * distinctive item adds up to a 0.35 bonus, saturating by a lead of 4. Rung 3\n * (franc) carries franc's own 0..1 score-gap, which is weaker evidence, so it is\n * scaled into a 0.4..0.75 band.\n */\nfunction marginToConfidence(margin: number, rung: Rung): number {\n if (rung === 3) {\n // franc score-gap is already 0..1; weaker than the distinctive rungs.\n return clamp01(0.4 + Math.min(Math.max(margin, 0), 1) * 0.35);\n }\n const lead = Math.max(margin, 1);\n return clamp01(0.6 + (Math.min(lead, 4) / 4) * 0.35);\n}\n\nfunction clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0;\n if (value < 0) return 0;\n if (value > 1) return 1;\n return value;\n}\n"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/internal/classify.ts"],"names":[],"mappings":";AAyCA,IAAM,UAA0B,EAAE,QAAA,EAAU,WAAW,MAAA,EAAQ,CAAA,EAAG,MAAM,IAAA,EAAK;AAY7E,IAAM,WAAA,GAAc,sBAAA;AACpB,IAAM,QAAA,GAAW,mBAAA;AAkBjB,IAAM,cAAA,GAAoC;AAAA,EACxC,oBAAA;AAAA;AAAA,EACA,cAAA;AAAA;AAAA,EACA,2CAAA;AAAA;AAAA,EACA;AAAA;AACF,CAAA;AAIO,SAAS,WAAW,IAAA,EAAsB;AAC/C,EAAA,IAAI,GAAA,GAAM,IAAA;AACV,EAAA,KAAA,MAAW,MAAM,cAAA,EAAgB,GAAA,GAAM,GAAA,CAAI,OAAA,CAAQ,IAAI,GAAG,CAAA;AAC1D,EAAA,OAAO,GAAA;AACT;AAKA,SAAS,eAAe,IAAA,EAA2C;AACjE,EAAA,IAAI,GAAA,GAAM,CAAA;AACV,EAAA,IAAI,GAAA,GAAM,CAAA;AACV,EAAA,KAAA,MAAW,EAAA,IAAM,UAAA,CAAW,IAAI,CAAA,EAAG;AACjC,IAAA,IAAI,WAAA,CAAY,IAAA,CAAK,EAAE,CAAA,EAAG,GAAA,IAAO,CAAA;AAAA,SAAA,IACxB,QAAA,CAAS,IAAA,CAAK,EAAE,CAAA,EAAG,GAAA,IAAO,CAAA;AAAA,EACrC;AACA,EAAA,IAAI,GAAA,KAAQ,CAAA,IAAK,GAAA,KAAQ,CAAA,EAAG,OAAO,IAAA;AACnC,EAAA,OAAO,GAAA,IAAO,MAAM,UAAA,GAAa,OAAA;AACnC;AAGA,SAAS,cAAc,OAAA,EAAuD;AAC5E,EAAA,KAAA,MAAW,EAAA,IAAM,QAAQ,QAAA,EAAU;AACjC,IAAA,IAAI,WAAA,CAAY,IAAA,CAAK,EAAE,CAAA,EAAG,OAAO,UAAA;AACjC,IAAA,IAAI,QAAA,CAAS,IAAA,CAAK,EAAE,CAAA,EAAG,OAAO,OAAA;AAAA,EAChC;AACA,EAAA,OAAO,IAAA;AACT;AAIO,SAAS,eAAA,CACd,MACA,UAAA,EACmB;AACnB,EAAA,MAAM,MAAA,GAAS,eAAe,IAAI,CAAA;AAClC,EAAA,IAAI,MAAA,KAAW,IAAA,EAAM,OAAO,EAAC;AAI7B,EAAA,MAAM,IAAA,uBAAW,GAAA,EAAY;AAC7B,EAAA,MAAM,SAA4B,EAAC;AACnC,EAAA,KAAA,MAAW,KAAK,UAAA,EAAY;AAC1B,IAAA,IAAI,aAAA,CAAc,CAAC,CAAA,KAAM,MAAA,IAAU,KAAK,GAAA,CAAI,CAAA,CAAE,IAAI,CAAA,EAAG;AACrD,IAAA,IAAA,CAAK,GAAA,CAAI,EAAE,IAAI,CAAA;AACf,IAAA,MAAA,CAAO,KAAK,CAAC,CAAA;AAAA,EACf;AACA,EAAA,OAAO,MAAA;AACT;AA8BA,SAAS,SAAS,IAAA,EAAwB;AACxC,EAAA,OAAO,KAAK,WAAA,EAAY,CAAE,KAAA,CAAM,UAAU,KAAK,EAAC;AAClD;AAOA,SAAS,KAAA,CAAM,OAAyB,UAAA,EAAwD;AAC9F,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAoB,UAAA,CAAW,GAAA,CAAI,CAAC,CAAA,KAAM,CAAC,CAAA,CAAE,IAAA,EAAM,CAAC,CAAC,CAAC,CAAA;AACzE,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,IAAA,IAAI,KAAA,GAAuB,IAAA;AAC3B,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,MAAW,KAAK,UAAA,EAAY;AAC1B,MAAA,IAAI,CAAA,CAAE,GAAA,CAAI,GAAA,CAAI,IAAI,CAAA,EAAG;AACnB,QAAA,MAAA,IAAU,CAAA;AACV,QAAA,IAAI,SAAS,CAAA,EAAG;AACd,UAAA,KAAA,GAAQ,IAAA;AACR,UAAA;AAAA,QACF;AACA,QAAA,KAAA,GAAQ,CAAA,CAAE,IAAA;AAAA,MACZ;AAAA,IACF;AACA,IAAA,IAAI,KAAA,KAAU,IAAA,EAAM,MAAA,CAAO,GAAA,CAAI,KAAA,EAAA,CAAQ,OAAO,GAAA,CAAI,KAAK,CAAA,IAAK,CAAA,IAAK,CAAC,CAAA;AAAA,EACpE;AACA,EAAA,OAAO,MAAA;AACT;AAGA,SAAS,OAAO,MAAA,EAAsE;AACpF,EAAA,IAAI,GAAA,GAAM,EAAA;AACV,EAAA,IAAI,MAAA,GAAS,EAAA;AACb,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,KAAA,MAAW,CAAC,CAAA,EAAG,KAAK,CAAA,IAAK,MAAA,EAAQ;AAC/B,IAAA,IAAI,QAAQ,GAAA,EAAK;AACf,MAAA,MAAA,GAAS,GAAA;AACT,MAAA,GAAA,GAAM,KAAA;AACN,MAAA,IAAA,GAAO,CAAA;AAAA,IACT,CAAA,MAAA,IAAW,QAAQ,MAAA,EAAQ;AACzB,MAAA,MAAA,GAAS,KAAA;AAAA,IACX;AAAA,EACF;AACA,EAAA,IAAI,IAAA,KAAS,IAAA,IAAQ,GAAA,GAAM,CAAA,EAAG,OAAO,IAAA;AACrC,EAAA,MAAM,MAAA,GAAS,GAAA,GAAM,IAAA,CAAK,GAAA,CAAI,QAAQ,CAAC,CAAA;AACvC,EAAA,OAAO,MAAA,IAAU,CAAA,GAAI,EAAE,IAAA,EAAM,QAAO,GAAI,IAAA;AAC1C;AAEA,SAAS,aAAA,CACP,YACA,IAAA,EACc;AACd,EAAA,OAAO,UAAA,CAAW,GAAA,CAAI,CAAC,CAAA,MAAO,EAAE,IAAA,EAAM,CAAA,CAAE,IAAA,EAAM,GAAA,EAAK,IAAI,GAAA,CAAI,IAAA,CAAK,CAAC,CAAC,GAAE,CAAE,CAAA;AACxE;AAIA,SAAS,UAAA,CAAW,MAAc,MAAA,EAA2D;AAC3F,EAAA,MAAM,CAAA,GAAI,MAAA;AAAA,IACR,KAAA;AAAA,MACE,KAAK,WAAA,EAAY;AAAA,MACjB,aAAA,CAAc,QAAQ,CAAC,CAAA,KAAM,EAAE,QAAA,IAAY,CAAA,CAAE,SAAS,EAAA,CAAG;AAAA;AAC3D,GACF;AACA,EAAA,OAAO,CAAA,GAAI,EAAE,QAAA,EAAU,CAAA,CAAE,IAAA,EAAM,QAAQ,CAAA,CAAE,MAAA,EAAQ,IAAA,EAAM,CAAA,EAAE,GAAI,IAAA;AAC/D;AAGA,SAAS,QAAA,CACP,MAAA,EACA,MAAA,EACA,IAAA,EACA,IAAA,EACuB;AACvB,EAAA,MAAM,CAAA,GAAI,MAAA;AAAA,IACR,KAAA;AAAA,MACE,MAAA;AAAA,MACA,aAAA,CAAc,QAAQ,CAAC,CAAA,KAAM,EAAE,KAAA,GAAQ,IAAI,CAAA,IAAK,EAAE;AAAA;AACpD,GACF;AACA,EAAA,OAAO,CAAA,GAAI,EAAE,QAAA,EAAU,CAAA,CAAE,MAAM,MAAA,EAAQ,CAAA,CAAE,MAAA,EAAQ,IAAA,EAAK,GAAI,IAAA;AAC5D;AAOO,SAAS,iBAAA,CACd,IAAA,EACA,UAAA,EACA,KAAA,EACgB;AAChB,EAAA,IAAI,CAAC,IAAA,IAAQ,UAAA,CAAW,MAAA,KAAW,GAAG,OAAO,OAAA;AAI7C,EAAA,MAAM,OAAA,GAAU,WAAW,IAAI,CAAA;AAG/B,EAAA,MAAM,MAAA,GAAS,eAAA,CAAgB,OAAA,EAAS,UAAU,CAAA;AAClD,EAAA,IAAI,MAAA,CAAO,MAAA,KAAW,CAAA,EAAG,OAAO,OAAA;AAEhC,EAAA,MAAM,QAAA,GAAW,UAAA,CAAW,OAAA,EAAS,MAAM,CAAA;AAC3C,EAAA,IAAI,UAAU,OAAO,QAAA;AAErB,EAAA,MAAM,MAAA,GAAS,SAAS,OAAO,CAAA;AAC/B,EAAA,IAAI,MAAA,CAAO,MAAA,KAAW,CAAA,EAAG,OAAO,OAAA;AAEhC,EAAA,OACE,QAAA,CAAS,MAAA,EAAQ,MAAA,EAAQ,UAAA,EAAY,IAAI,CAAA,IACzC,QAAA,CAAS,MAAA,EAAQ,MAAA,EAAQ,YAAY,IAAI,CAAA,IACzC,KAAA,GAAQ,OAAA,EAAS,MAAM,CAAA,IACvB,OAAA;AAEJ","file":"chunk-RFR5I7P7.js","sourcesContent":["/**\n * Per-snippet language classification by candidate-set-relative set-difference.\n *\n * A ladder of rungs; the first rung whose leader clears a lead (margin) of ≥1\n * wins; otherwise `\"unknown\"`:\n *\n * 1 alphabet — characters distinctive within the candidate set\n * 2a function words — curated grammatical markers (highest precision)\n * 2b frequent words — corpus content words\n * 3 franc — optional trigram backstop for the distinctive-free\n * residual, injected as a resolver (this module stays\n * franc-free and importable without franc's tables)\n *\n * \"Distinctive\" is ALWAYS relative to the candidate set: a signal counts for a\n * candidate iff it appears in that candidate's profile and in NO other\n * candidate's. So `і` decides {uk, ru} (only uk has it) but is inert in\n * {uk, be} (both have it), and the word `и` decides {uk, ru} even though the\n * *letter* `и` is shared. Nothing is precomputed — uniqueness is the runtime\n * output, never stored.\n *\n * Adapted to langtell's {@link LanguageProfile} shape: the `words` and `iso6393`\n * fields are optional here, so a bare `{ code, alphabet }` profile still\n * classifies on rung 1.\n */\nimport type { LanguageProfile } from \"../types.js\";\n\nexport const FRANC_RUNG = 3;\n\n/** Which rung decided a verdict; `null` when unknown. */\nexport type Rung = 1 | \"2a\" | \"2b\" | typeof FRANC_RUNG | null;\n\nexport interface SnippetVerdict {\n /** Winning language code, or the sentinel `\"unknown\"`. */\n language: string;\n /** Lead of the winner over the runner-up, in the rung's own unit (distinctive\n * char/word count for rungs 1–2; franc score-gap for rung 3). 0 when unknown. */\n margin: number;\n /** Which rung decided; `null` when unknown. */\n rung: Rung;\n}\n\nconst UNKNOWN: SnippetVerdict = { language: \"unknown\", margin: 0, rung: null };\n\n/** Resolver for rung 3 (the optional trigram backstop), injected into\n * {@link classifyBySnippet} by callers that have franc available. Kept as an\n * injected seam — not a direct import — so this module stays franc-free and\n * importable without pulling franc's tables. Returns a rung-3 verdict or\n * `null` (abstain). */\nexport type Rung3Resolver = (\n text: string,\n scoped: readonly LanguageProfile[],\n) => SnippetVerdict | null;\n\nconst CYRILLIC_RE = /\\p{Script=Cyrillic}/u;\nconst LATIN_RE = /\\p{Script=Latin}/u;\n\n/** Below this length, trigrams are too noisy to justify a rung-3 verdict. */\nexport const RUNG3_MIN_LENGTH = 24;\n\n/**\n * Trailing/inline Latin \"noise\" tokens — URLs, @handles, #hashtags — that a\n * Cyrillic title commonly carries (a headline followed by a link or a social\n * handle). These are almost always Latin even on Cyrillic-language content, so\n * left in they can flip {@link dominantScript} to Latin and let genuinely\n * Cyrillic content scope to the wrong roster. Stripped before the script vote\n * AND before the rung tallies so the URL's letters never contribute either.\n *\n * Kept as separate simple patterns (applied in order — schemes/www before bare\n * domains) rather than one big alternation, so each stays readable. ASCII-only\n * `[a-z0-9-]` in the domain pattern means a Cyrillic word is never mistaken for\n * a domain.\n */\nconst NOISE_PATTERNS: readonly RegExp[] = [\n /\\bhttps?:\\/\\/\\S+/gi, // full URLs\n /\\bwww\\.\\S+/gi, // www.… without a scheme\n /\\b[a-z0-9-]+(?:\\.[a-z0-9-]+)+(?:\\/\\S*)?/gi, // bare domains (example.com/path)\n /[@#][\\p{L}\\p{N}_]+/gu, // @handles and #hashtags\n];\n\n/** Drop URLs / @handles / #hashtags so trailing Latin noise can't outvote the\n * prose's script or pollute the per-rung tallies. */\nexport function stripNoise(text: string): string {\n let out = text;\n for (const re of NOISE_PATTERNS) out = out.replace(re, \" \");\n return out;\n}\n\n/** The script most of `text` is written in, or `null` if it carries no letters.\n * Noise (URLs/handles/hashtags) is stripped first so a single trailing link\n * can't flip a multi-word Cyrillic title's vote to Latin. */\nfunction dominantScript(text: string): \"cyrillic\" | \"latin\" | null {\n let cyr = 0;\n let lat = 0;\n for (const ch of stripNoise(text)) {\n if (CYRILLIC_RE.test(ch)) cyr += 1;\n else if (LATIN_RE.test(ch)) lat += 1;\n }\n if (cyr === 0 && lat === 0) return null;\n return cyr >= lat ? \"cyrillic\" : \"latin\";\n}\n\n/** The script of a profile's alphabet. */\nfunction profileScript(profile: LanguageProfile): \"cyrillic\" | \"latin\" | null {\n for (const ch of profile.alphabet) {\n if (CYRILLIC_RE.test(ch)) return \"cyrillic\";\n if (LATIN_RE.test(ch)) return \"latin\";\n }\n return null;\n}\n\n/** Candidates whose script matches the text's dominant script (others can't tip\n * the verdict). Empty when the text carries no letters. */\nexport function scopeCandidates(\n text: string,\n candidates: readonly LanguageProfile[],\n): LanguageProfile[] {\n const script = dominantScript(text);\n if (script === null) return [];\n // Keep one profile per code. A language listed twice would otherwise make its\n // own distinctive chars/words read as \"owned by ≥2 candidates\" in `tally`,\n // cancelling them out and collapsing the verdict to \"unknown\".\n const seen = new Set<string>();\n const scoped: LanguageProfile[] = [];\n for (const c of candidates) {\n if (profileScript(c) !== script || seen.has(c.code)) continue;\n seen.add(c.code);\n scoped.push(c);\n }\n return scoped;\n}\n\n/**\n * Per-language set of characters globally unique within `profiles` — present in\n * exactly one profile's alphabet. Relative to the given profile set: the unique\n * set shrinks as languages are added (a second Latin language un-uniques a–z).\n */\nexport function distinctiveChars(profiles: readonly LanguageProfile[]): Map<string, Set<string>> {\n const owners = new Map<string, string[]>();\n for (const p of profiles) {\n for (const ch of new Set(p.alphabet)) {\n const list = owners.get(ch);\n if (list) list.push(p.code);\n else owners.set(ch, [p.code]);\n }\n }\n const result = new Map<string, Set<string>>(profiles.map((p) => [p.code, new Set()]));\n for (const [ch, codes] of owners) {\n const [only] = codes;\n if (codes.length === 1 && only !== undefined) result.get(only)?.add(ch);\n }\n return result;\n}\n\ninterface Membership {\n code: string;\n set: ReadonlySet<string>;\n}\n\n/** Lowercased Unicode letter-run tokens. Keeps single-char tokens (`і`, `и`). */\nfunction tokenize(text: string): string[] {\n return text.toLowerCase().match(/\\p{L}+/gu) ?? [];\n}\n\n/**\n * Tally how many items (characters or word tokens) are distinctive to each\n * candidate — present in exactly one candidate's set. Items owned by zero or by\n * ≥2 candidates contribute nothing.\n */\nfunction tally(items: Iterable<string>, membership: readonly Membership[]): Map<string, number> {\n const scores = new Map<string, number>(membership.map((m) => [m.code, 0]));\n for (const item of items) {\n let owner: string | null = null;\n let owners = 0;\n for (const m of membership) {\n if (m.set.has(item)) {\n owners += 1;\n if (owners > 1) {\n owner = null;\n break;\n }\n owner = m.code;\n }\n }\n if (owner !== null) scores.set(owner, (scores.get(owner) ?? 0) + 1);\n }\n return scores;\n}\n\n/** The leading candidate and its lead over the runner-up, or `null` if <1. */\nfunction leader(scores: Map<string, number>): { code: string; margin: number } | null {\n let max = -1;\n let second = -1;\n let code: string | null = null;\n for (const [c, score] of scores) {\n if (score > max) {\n second = max;\n max = score;\n code = c;\n } else if (score > second) {\n second = score;\n }\n }\n if (code === null || max < 1) return null;\n const margin = max - Math.max(second, 0);\n return margin >= 1 ? { code, margin } : null;\n}\n\nfunction membershipFor(\n candidates: readonly LanguageProfile[],\n pick: (p: LanguageProfile) => Iterable<string>,\n): Membership[] {\n return candidates.map((c) => ({ code: c.code, set: new Set(pick(c)) }));\n}\n\n/** Rung 1 — characters (alphabet + orthographic {@link LanguageProfile.marks})\n * distinctive within the scoped candidate set. */\nfunction letterRung(text: string, scoped: readonly LanguageProfile[]): SnippetVerdict | null {\n const r = leader(\n tally(\n text.toLowerCase(),\n membershipFor(scoped, (p) => p.alphabet + (p.marks ?? \"\")),\n ),\n );\n return r ? { language: r.code, margin: r.margin, rung: 1 } : null;\n}\n\n/** Rung 2 — distinctive words from the given tier (2a function, 2b frequent). */\nfunction wordRung(\n tokens: readonly string[],\n scoped: readonly LanguageProfile[],\n tier: \"function\" | \"frequent\",\n rung: \"2a\" | \"2b\",\n): SnippetVerdict | null {\n const r = leader(\n tally(\n tokens,\n membershipFor(scoped, (p) => p.words?.[tier] ?? []),\n ),\n );\n return r ? { language: r.code, margin: r.margin, rung } : null;\n}\n\n/**\n * Classify `text` among `candidates`. Synchronous and allocation-light. Returns\n * `\"unknown\"` on empty evidence, on a tie inside the candidate set, or when\n * nothing is distinctive.\n */\nexport function classifyBySnippet(\n text: string,\n candidates: readonly LanguageProfile[],\n rung3?: Rung3Resolver,\n): SnippetVerdict {\n if (!text || candidates.length === 0) return UNKNOWN;\n\n // Drop URLs / @handles / #hashtags once, up front: trailing Latin noise must\n // not flip the dominant-script vote nor pollute the per-rung tallies.\n const cleaned = stripNoise(text);\n\n // Restrict to candidates in the text's dominant script.\n const scoped = scopeCandidates(cleaned, candidates);\n if (scoped.length === 0) return UNKNOWN;\n\n const byLetter = letterRung(cleaned, scoped);\n if (byLetter) return byLetter;\n\n const tokens = tokenize(cleaned);\n if (tokens.length === 0) return UNKNOWN;\n\n return (\n wordRung(tokens, scoped, \"function\", \"2a\") ??\n wordRung(tokens, scoped, \"frequent\", \"2b\") ??\n rung3?.(cleaned, scoped) ??\n UNKNOWN\n );\n}\n"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/fuse.ts"],"names":[],"mappings":";;;AAcA,IAAM,mBAAA,GAA8C;AAAA,EAClD,cAAA,EAAgB,CAAA;AAAA,EAChB,iBAAA,EAAmB,CAAA;AAAA,EACnB,WAAA,EAAa,CAAA;AAAA,EACb,cAAA,EAAgB,GAAA;AAAA,EAChB,KAAA,EAAO,GAAA;AAAA,EACP,uBAAA,EAAyB,GAAA;AAAA,EACzB,uBAAA,EAAyB,IAAA;AAAA,EACzB,gBAAA,EAAkB,IAAA;AAAA,EAClB,WAAA,EAAa;AACf,CAAA;AAKA,IAAM,+BAAe,IAAI,GAAA,CAAY,CAAC,cAAA,EAAgB,OAAA,EAAS,WAAW,CAAC,CAAA;AAI3E,IAAM,uBAAA,GAA0B,GAAA;AAEhC,IAAM,iBAAA,GAAoB,IAAA;AAC1B,IAAM,UAAA,GAAa,IAAA;AAcZ,SAAS,IAAA,CACd,QAAA,EACA,OAAA,GAAuB,EAAC,EACR;AAChB,EAAA,MAAM,OAAA,GAAU,OAAA,CAAQ,OAAA,IAAW,EAAC;AACpC,EAAA,MAAM,UAAA,GAAa,iBAAA,CAAkB,QAAA,EAAU,OAAA,CAAQ,UAAU,CAAA;AAEjE,EAAA,MAAM,MAAA,uBAAa,GAAA,EAAoB;AACvC,EAAA,KAAA,MAAW,QAAQ,UAAA,EAAY;AAC7B,IAAA,IAAI,IAAA,CAAK,aAAa,SAAA,EAAW;AACjC,IAAA,MAAM,MAAA,GACJ,OAAA,CAAQ,IAAA,CAAK,MAAM,CAAA,IAAK,OAAA,CAAQ,IAAA,CAAK,IAAI,CAAA,IAAK,mBAAA,CAAoB,IAAA,CAAK,IAAI,CAAA,IAAK,GAAA;AAClF,IAAA,MAAA,CAAO,GAAA,CAAI,IAAA,CAAK,QAAA,EAAA,CAAW,MAAA,CAAO,GAAA,CAAI,IAAA,CAAK,QAAQ,CAAA,IAAK,CAAA,IAAK,OAAA,CAAQ,IAAA,CAAK,UAAU,IAAI,MAAM,CAAA;AAAA,EAChG;AAGA,EAAA,MAAM,MAAA,GAAS,wBAAwB,UAAU,CAAA;AAEjD,EAAA,MAAM,EAAE,IAAA,EAAM,SAAA,EAAW,aAAY,GAAI,MAAA,CAAO,QAAQ,MAAM,CAAA;AAE9D,EAAA,IAAI,SAAS,IAAA,IAAQ,SAAA,GAAY,iBAAA,IAAqB,SAAA,GAAY,cAAc,UAAA,EAAY;AAG1F,IAAA,IAAI,MAAA,KAAW,IAAA,IAAQ,MAAA,CAAO,GAAA,CAAI,MAAM,CAAA,EAAG;AACzC,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,GAAA,CAAI,MAAM,CAAA,IAAK,CAAA;AACpC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,UAAA,EAAY,OAAA,CAAQ,KAAA,IAAS,KAAA,GAAQ,IAAA,CAAK,CAAA;AAAA,QAC1C,QAAA,EAAU,CAAC,GAAG,UAAU;AAAA,OAC1B;AAAA,IACF;AACA,IAAA,OAAO,EAAE,QAAA,EAAU,SAAA,EAAW,UAAA,EAAY,OAAA,CAAQ,SAAS,CAAA,EAAG,QAAA,EAAU,CAAC,GAAG,UAAU,CAAA,EAAE;AAAA,EAC1F;AAEA,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,IAAA;AAAA,IACV,UAAA,EAAY,OAAA,CAAQ,SAAA,IAAa,SAAA,GAAY,cAAc,IAAA,CAAK,CAAA;AAAA,IAChE,QAAA,EAAU,CAAC,GAAG,UAAU;AAAA,GAC1B;AACF;AAYA,SAAS,iBAAA,CACP,UACA,WAAA,EACoB;AACpB,EAAA,OAAO,QAAA,CAAS,GAAA,CAAI,CAAC,IAAA,KAAS;AAC5B,IAAA,IAAI,IAAA,CAAK,QAAA,KAAa,SAAA,EAAW,OAAO,IAAA;AACxC,IAAA,MAAM,UAAA,GAAa,cAAA,CAAe,IAAA,CAAK,QAAQ,KAAK,IAAA,CAAK,QAAA;AACzD,IAAA,IAAI,UAAA,KAAe,IAAA,CAAK,QAAA,EAAU,OAAO,IAAA;AACzC,IAAA,OAAO,EAAE,GAAG,IAAA,EAAM,QAAA,EAAU,UAAA,EAAW;AAAA,EACzC,CAAC,CAAA;AACH;AAKA,SAAS,wBAAwB,QAAA,EAAsD;AACrF,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,IAAI,cAAA,GAAiB,CAAA;AACrB,EAAA,KAAA,MAAW,QAAQ,QAAA,EAAU;AAC3B,IAAA,IAAI,IAAA,CAAK,aAAa,SAAA,IAAa,CAAC,aAAa,GAAA,CAAI,IAAA,CAAK,IAAI,CAAA,EAAG;AACjE,IAAA,MAAM,CAAA,GAAI,OAAA,CAAQ,IAAA,CAAK,UAAU,CAAA;AACjC,IAAA,IAAI,IAAI,uBAAA,EAAyB;AACjC,IAAA,IAAI,IAAI,cAAA,EAAgB;AACtB,MAAA,cAAA,GAAiB,CAAA;AACjB,MAAA,IAAA,GAAO,IAAA,CAAK,QAAA;AAAA,IACd,CAAA,MAAA,IAAW,CAAA,KAAM,cAAA,IAAkB,IAAA,CAAK,aAAa,IAAA,EAAM;AAEzD,MAAA,IAAA,GAAO,IAAA;AAAA,IACT;AAAA,EACF;AACA,EAAA,OAAO,IAAA;AACT;AAQA,SAAS,MAAA,CACP,QACA,MAAA,EACiE;AACjE,EAAA,IAAI,IAAA,GAAsB,IAAA;AAC1B,EAAA,IAAI,SAAA,GAAY,CAAA;AAChB,EAAA,IAAI,WAAA,GAAc,CAAA;AAClB,EAAA,MAAM,cAAc,MAAA,KAAW,IAAA,GAAQ,OAAO,GAAA,CAAI,MAAM,KAAK,CAAA,GAAK,CAAA;AAElE,EAAA,KAAA,MAAW,CAAC,QAAA,EAAU,GAAG,CAAA,IAAK,MAAA,EAAQ;AAEpC,IAAA,MAAM,KAAA,GAAQ,WAAW,IAAA,IAAQ,QAAA,KAAa,SAAS,IAAA,CAAK,GAAA,CAAI,GAAA,EAAK,WAAW,CAAA,GAAI,GAAA;AACpF,IAAA,IAAI,QAAQ,SAAA,EAAW;AACrB,MAAA,WAAA,GAAc,SAAA;AACd,MAAA,SAAA,GAAY,KAAA;AACZ,MAAA,IAAA,GAAO,QAAA;AAAA,IACT,CAAA,MAAA,IAAW,QAAQ,WAAA,EAAa;AAC9B,MAAA,WAAA,GAAc,KAAA;AAAA,IAChB;AAAA,EACF;AAEA,EAAA,IAAI,WAAW,IAAA,IAAQ,IAAA,KAAS,UAAU,SAAA,KAAc,WAAA,IAAe,cAAc,CAAA,EAAG;AACtF,IAAA,WAAA,GAAc,SAAA;AACd,IAAA,IAAA,GAAO,MAAA;AACP,IAAA,SAAA,GAAY,WAAA;AAAA,EACd;AACA,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,WAAA,EAAY;AACxC;AAEA,SAAS,QAAQ,KAAA,EAAuB;AACtC,EAAA,IAAI,CAAC,MAAA,CAAO,QAAA,CAAS,KAAK,GAAG,OAAO,CAAA;AACpC,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,IAAI,KAAA,GAAQ,GAAG,OAAO,CAAA;AACtB,EAAA,OAAO,KAAA;AACT","file":"chunk-TYSRYQN7.js","sourcesContent":["import type { Classification, LanguageEvidence, LanguageProfile, Weights } from \"./types.js\";\nimport { normalizeBCP47 } from \"./internal/bcp47.js\";\n\nexport interface FuseOptions {\n weights?: Weights;\n /** The candidate roster. When present, incoming evidence tags are normalized\n * into it (`uk-UA` → `uk`, `ua` → `uk`) so context signals (page/header\n * locale) land on the same code the text rungs use. */\n candidates?: readonly LanguageProfile[];\n}\n\n/** Default per-kind weights. Clear lexical signal (script, explicit locale)\n * outweighs contextual signal (page tags, headers). Callers override per\n * `source` id or `kind` via {@link FuseOptions.weights}. */\nconst DEFAULT_KIND_WEIGHT: Record<string, number> = {\n \"title-script\": 1,\n \"explicit-locale\": 1,\n \"chrome-ai\": 1,\n \"source-prior\": 0.7,\n franc: 0.7,\n \"http-content-language\": 0.6,\n \"meta-content-language\": 0.55,\n \"meta-og-locale\": 0.55,\n \"html-lang\": 0.5,\n};\n\n/** Evidence kinds that constitute *clear script evidence* — a verdict the text\n * classifier or an on-device model reached by actually reading the string. The\n * guard below forbids weaker page/header *context* from flipping these. */\nconst SCRIPT_KINDS = new Set<string>([\"title-script\", \"franc\", \"chrome-ai\"]);\n\n/** A script verdict this confident is treated as settled — context may add to it\n * but must not flip the winner to a different language. */\nconst SCRIPT_CONFIDENCE_FLOOR = 0.6;\n\nconst MIN_WINNING_SCORE = 0.35;\nconst MIN_MARGIN = 0.12;\n\n/**\n * Combine evidence into a single weighted verdict with an audit trail.\n *\n * Three steps:\n * 1. Normalize each item's language tag into the candidate roster (BCP-47:\n * `uk-UA`/`ua` → `uk`) so text, page, and header signals agree on a code.\n * 2. Weighted argmax over languages (caller weights override per `source`/`kind`).\n * 3. Apply the guard **context must never override clear script evidence**: when\n * the text classifier (or an on-device model) confidently read one language,\n * weaker page/header context for a *different* language cannot win — a\n * Ukrainian page chrome does not make a Latin/English title Ukrainian.\n */\nexport function fuse(\n evidence: readonly LanguageEvidence[],\n options: FuseOptions = {},\n): Classification {\n const weights = options.weights ?? {};\n const normalized = normalizeEvidence(evidence, options.candidates);\n\n const scores = new Map<string, number>();\n for (const item of normalized) {\n if (item.language === \"unknown\") continue;\n const weight =\n weights[item.source] ?? weights[item.kind] ?? DEFAULT_KIND_WEIGHT[item.kind] ?? 0.5;\n scores.set(item.language, (scores.get(item.language) ?? 0) + clamp01(item.confidence) * weight);\n }\n\n // The context-vs-script guard: a confident script read pins the winner.\n const pinned = confidentScriptLanguage(normalized);\n\n const { best, bestScore, secondScore } = argmax(scores, pinned);\n\n if (best === null || bestScore < MIN_WINNING_SCORE || bestScore - secondScore < MIN_MARGIN) {\n // A pinned script language still wins even on a thin margin — clear script\n // evidence is never demoted to \"unknown\" by competing context.\n if (pinned !== null && scores.has(pinned)) {\n const score = scores.get(pinned) ?? 0;\n return {\n language: pinned,\n confidence: clamp01(score / (score + 0.15)),\n evidence: [...normalized],\n };\n }\n return { language: \"unknown\", confidence: clamp01(bestScore), evidence: [...normalized] };\n }\n\n return {\n language: best,\n confidence: clamp01(bestScore / (bestScore + secondScore + 0.15)),\n evidence: [...normalized],\n };\n}\n\n/** Normalize each item's tag into the roster's code space (BCP-47-aware). Items\n * already `\"unknown\"` pass through untouched. Tags are BCP-47-normalized\n * (`en-US` → `en`, `ua` → `uk`) so text, page, and header signals land on the\n * same code. The normalized code is kept even when it falls outside the roster —\n * argmax simply won't favor an out-of-roster context tag, but it stays in the\n * audit trail.\n *\n * The roster is accepted (and reserved) so a future revision can fold roster\n * aliasing in without a signature change; today BCP-47 normalization alone\n * reconciles the codes the producers emit. */\nfunction normalizeEvidence(\n evidence: readonly LanguageEvidence[],\n _candidates: readonly LanguageProfile[] | undefined,\n): LanguageEvidence[] {\n return evidence.map((item) => {\n if (item.language === \"unknown\") return item;\n const normalized = normalizeBCP47(item.language) ?? item.language;\n if (normalized === item.language) return item;\n return { ...item, language: normalized };\n });\n}\n\n/** The language of a *clear script* read confident enough to pin the verdict, or\n * `null` when none qualifies. When two script reads disagree, the higher-\n * confidence one pins (a tie leaves nothing pinned — argmax decides normally). */\nfunction confidentScriptLanguage(evidence: readonly LanguageEvidence[]): string | null {\n let best: string | null = null;\n let bestConfidence = 0;\n for (const item of evidence) {\n if (item.language === \"unknown\" || !SCRIPT_KINDS.has(item.kind)) continue;\n const c = clamp01(item.confidence);\n if (c < SCRIPT_CONFIDENCE_FLOOR) continue;\n if (c > bestConfidence) {\n bestConfidence = c;\n best = item.language;\n } else if (c === bestConfidence && item.language !== best) {\n // Two equally-confident script reads for different languages — ambiguous.\n best = null;\n }\n }\n return best;\n}\n\n/**\n * Weighted argmax. When `pinned` is set (a confident script language), any\n * *other* language's score may only come from context kinds; that score is\n * capped so it can never exceed the pinned language. This enforces the guard\n * without discarding the context from the audit trail.\n */\nfunction argmax(\n scores: Map<string, number>,\n pinned: string | null,\n): { best: string | null; bestScore: number; secondScore: number } {\n let best: string | null = null;\n let bestScore = 0;\n let secondScore = 0;\n const pinnedScore = pinned !== null ? (scores.get(pinned) ?? 0) : 0;\n\n for (const [language, raw] of scores) {\n // Guard: a non-pinned language cannot out-score the pinned one.\n const score = pinned !== null && language !== pinned ? Math.min(raw, pinnedScore) : raw;\n if (score > bestScore) {\n secondScore = bestScore;\n bestScore = score;\n best = language;\n } else if (score > secondScore) {\n secondScore = score;\n }\n }\n // On a pinned tie (pinned capped equal to a context language), prefer pinned.\n if (pinned !== null && best !== pinned && bestScore === pinnedScore && pinnedScore > 0) {\n secondScore = bestScore;\n best = pinned;\n bestScore = pinnedScore;\n }\n return { best, bestScore, secondScore };\n}\n\nfunction clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0;\n if (value < 0) return 0;\n if (value > 1) return 1;\n return value;\n}\n"]}