langtell 0.0.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ import { b as LanguageCode, L as LanguageProfile } from './types-BIXrkuAr.js';
2
+
3
+ /**
4
+ * `langtell/profiles` — ready-to-use {@link LanguageProfile} data.
5
+ *
6
+ * This is the heavy DATA half of the library: alphabets, curated function-word
7
+ * lists, and corpus-frequent word lists. It is deliberately kept behind its own
8
+ * subpath, OUT of the zero-dependency core, so `import { compile } from
9
+ * "langtell"` never drags the word corpora into a bundle that only needs the
10
+ * script/letter rungs. Pass these into `compile({ candidates: [...] })`.
11
+ *
12
+ * Each profile is declarative and auditable:
13
+ * - `alphabet` — the language's lowercased alphabet (raw; distinctiveness
14
+ * is computed at runtime per candidate set).
15
+ * - `marks` — orthographic marks that count as rung-1 evidence but
16
+ * are not alphabet letters (the intra-word apostrophe).
17
+ * - `words.function` — curated grammatical markers, hand-verified.
18
+ * - `words.frequent` — common everyday words from a subtitle-frequency corpus.
19
+ * - `iso6393` — ISO 639-3 code for the optional franc engine.
20
+ *
21
+ * Curation rule for `function`: a token may appear in exactly one candidate's
22
+ * list ONLY if that form is genuinely used by only that language among those we
23
+ * support. Shared forms must be in every list that uses them (set-difference
24
+ * then cancels them) or omitted from all. When in doubt, omit: a missing marker
25
+ * only costs recall.
26
+ */
27
+
28
+ declare const uk: LanguageProfile;
29
+ declare const ru: LanguageProfile;
30
+ declare const be: LanguageProfile;
31
+ declare const bg: LanguageProfile;
32
+ declare const en: LanguageProfile;
33
+
34
+ /** Registry of shipped profiles, keyed by BCP-47 code. */
35
+ declare const PROFILES: Readonly<Record<LanguageCode, LanguageProfile>>;
36
+ /** BCP-47 codes for which langtell ships a ready-made {@link LanguageProfile}.
37
+ * Handy for narrowing a caller's roster to codes that can actually classify —
38
+ * e.g. `codes.filter(hasProfile)`. Derived from {@link PROFILES}. */
39
+ declare const PROFILED_CODES: readonly LanguageCode[];
40
+ /** Whether langtell ships a ready-made {@link LanguageProfile} for `code`. An
41
+ * own-property check, so inherited names (`"toString"`, `"constructor"`) read
42
+ * as absent. */
43
+ declare function hasProfile(code: LanguageCode): boolean;
44
+ /** Resolve profiles for the given codes, skipping any without a shipped profile. */
45
+ declare function getProfiles(codes: readonly LanguageCode[]): LanguageProfile[];
46
+
47
+ export { PROFILED_CODES, PROFILES, be, bg, en, getProfiles, hasProfile, ru, uk };