@ingglish/ipa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3881 @@
1
+ // src/index.ts
2
+ import { registerFormat } from "@ingglish/phonemes";
3
+
4
+ // src/to-ipa.ts
5
+ import { findOnsetStart, getStress, isVowel, stripStress } from "@ingglish/phonemes";
6
+
7
+ // src/ipa-maps.ts
8
+ var IPA_VOWEL_MAP = {
9
+ // Monophthongs
10
+ AA: "\u0251",
11
+ // father, hot, bother
12
+ AE: "\xE6",
13
+ // cat, bat, had
14
+ AH: "\u028C",
15
+ // but, cup, son (stressed)
16
+ AO: "\u0254",
17
+ // thought, caught, law
18
+ // Diphthongs
19
+ AW: "a\u028A",
20
+ // cow, how, out
21
+ AY: "a\u026A",
22
+ // my, eye, time
23
+ EH: "\u025B",
24
+ // bed, red, said
25
+ ER: "\u025D",
26
+ // bird, her, nurse
27
+ EY: "e\u026A",
28
+ // say, day, make
29
+ IH: "\u026A",
30
+ // bit, sit, gym
31
+ IY: "i",
32
+ // bee, see, machine
33
+ OW: "o\u028A",
34
+ // go, show, coat
35
+ OY: "\u0254\u026A",
36
+ // boy, toy, coin
37
+ UH: "\u028A",
38
+ // book, put, could
39
+ UW: "u"
40
+ // too, blue, food
41
+ };
42
+ var IPA_CONSONANT_MAP = {
43
+ // Stops (plosives)
44
+ B: "b",
45
+ // Affricates
46
+ CH: "t\u0283",
47
+ // chat, batch
48
+ D: "d",
49
+ // Fricatives
50
+ DH: "\xF0",
51
+ // the, this (voiced dental)
52
+ F: "f",
53
+ G: "\u0261",
54
+ // Note: IPA uses ɡ (U+0261), not g
55
+ HH: "h",
56
+ JH: "d\u0292",
57
+ // just, edge
58
+ K: "k",
59
+ // Liquids
60
+ L: "l",
61
+ // Nasals
62
+ M: "m",
63
+ N: "n",
64
+ NG: "\u014B",
65
+ // sing, thing
66
+ P: "p",
67
+ R: "\u0279",
68
+ // alveolar approximant
69
+ S: "s",
70
+ SH: "\u0283",
71
+ // ship
72
+ T: "t",
73
+ TH: "\u03B8",
74
+ // think (voiceless dental)
75
+ V: "v",
76
+ // Glides (semivowels)
77
+ W: "w",
78
+ Y: "j",
79
+ Z: "z",
80
+ ZH: "\u0292"
81
+ // measure, beige
82
+ };
83
+ var ARPABET_TO_IPA_MAP = {
84
+ ...IPA_VOWEL_MAP,
85
+ ...IPA_CONSONANT_MAP
86
+ };
87
+ var IPA_VARIANT_MAP = {
88
+ a: "AE",
89
+ // plain /a/ — maps to "a" (cat) for recognizable foreign word output
90
+ e: "EH",
91
+ // plain /e/ — mid front vowel, like "bed"
92
+ \u0259: "AH0",
93
+ // schwa (unstressed) — forward map uses ʌ→AH for the stressed variant
94
+ \u025A: "ER",
95
+ // r-colored schwa variant — forward map uses ɝ→ER
96
+ g: "G",
97
+ // ASCII g — forward map uses ɡ (U+0261)
98
+ \u026B: "L",
99
+ // dark l
100
+ o: "OW",
101
+ // some IPA uses plain o for goat vowel
102
+ r: "R"
103
+ // common variant — forward map uses ɹ (alveolar approximant)
104
+ // Note: IPA /y/ is the close front rounded vowel (French "tu", German "über").
105
+ // It's handled in IPA_APPROXIMATION_MAP as y→UW. The consonant /j/ (palatal
106
+ // approximant) is already mapped via the forward map reversal (j→Y).
107
+ };
108
+ var IPA_APPROXIMATION_MAP = {
109
+ // --- Open vowels ---
110
+ \u00E4: "AA",
111
+ // /ä/ open central — IPA diacritic variant ≈ "father"
112
+ // --- Open vowel diphthongs (Finnish, etc.) ---
113
+ // English uses /aɪ/ and /aʊ/ (with plain 'a'), but Finnish/other languages
114
+ // use /ɑi/ and /ɑu/ (with open back 'ɑ'). Treat as diphthongs, not two vowels.
115
+ \u00E6i: "AY",
116
+ // /æi/ — Finnish "päivä" ≈ "my" diphthong (uses near-open front æ)
117
+ \u0250: "AH",
118
+ // /ɐ/ near-open central — Portuguese unstressed "a" ≈ "but"
119
+ \u0251i: "AY",
120
+ // /ɑi/ — Finnish "taivas" ≈ "my" diphthong
121
+ \u0251u: "AW",
122
+ // /ɑu/ — Finnish "sauna" ≈ "cow" diphthong
123
+ \u0251\u028A: "AW",
124
+ // /ɑʊ/ — Chinese 好 /xɑʊ/ ≈ "cow" diphthong (uses IPA ʊ not plain u)
125
+ // Nasal vowels (ɑ̃, ɛ̃, ɔ̃, etc.) are handled in from-ipa.ts by
126
+ // converting vowel+combining-tilde to vowel+"n" before map lookup.
127
+ \u0252: "AO",
128
+ // /ɒ/ open back rounded — British "lot" ≈ "thought"
129
+ // --- Implosives and other stops ---
130
+ \u0253: "B",
131
+ // /ɓ/ voiced bilabial implosive ≈ B
132
+ \u00E7: "SH",
133
+ // /ç/ voiceless palatal fricative — "ich" (German) ≈ "sh"
134
+ // --- Alveolo-palatal (Mandarin, Japanese, Polish) ---
135
+ \u0255: "SH",
136
+ // /ɕ/ voiceless alveolo-palatal fricative — Mandarin "xi" ≈ "sh"
137
+ d\u0291: "JH",
138
+ // /dʑ/ voiced alveolo-palatal affricate — Japanese "ji" ≈ "j"
139
+ // --- Retroflex (Hindi, Mandarin) ---
140
+ \u0256: "D",
141
+ // /ɖ/ voiced retroflex stop — Hindi ≈ D
142
+ \u0257: "D",
143
+ // /ɗ/ voiced alveolar implosive ≈ D
144
+ // --- Diphthong sequences (non-English vowel pairs) ---
145
+ // These use standard IPA vowels that don't appear in English diphthongs
146
+ // (English uses ɪ/ʊ as second element, not i/u/o).
147
+ ei: "EY",
148
+ // /ei/ — Finnish "ei", various ≈ "say" diphthong
149
+ \u0258: "AH0",
150
+ // /ɘ/ close-mid central ≈ schwa
151
+ \u025C: "ER",
152
+ // /ɜ/ open-mid central — non-rhotic "bird" ≈ "er"
153
+ \u025E: "ER",
154
+ // /ɞ/ open-mid central rounded ≈ "er"
155
+ \u0264: "AH",
156
+ // /ɤ/ close-mid back unrounded — Korean "ㅓ" ≈ "but"
157
+ \u0262: "G",
158
+ // /ɢ/ voiced uvular stop ≈ G
159
+ \u0263: "G",
160
+ // /ɣ/ voiced velar fricative — Spanish "lago" ≈ G
161
+ // --- Pharyngeal (Arabic) ---
162
+ \u0127: "HH",
163
+ // /ħ/ voiceless pharyngeal fricative — Arabic "ha" ≈ H
164
+ \u0266: "HH",
165
+ // /ɦ/ voiced glottal fricative — Korean 합 ≈ H
166
+ // --- Central/back vowels not in English ---
167
+ \u0268: "IH",
168
+ // /ɨ/ close central — Russian "ы" ≈ "bit"
169
+ \u026C: "L",
170
+ // /ɬ/ voiceless lateral fricative — Welsh "ll" ≈ L
171
+ \u026D: "L",
172
+ // /ɭ/ retroflex lateral ≈ L
173
+ // --- Laterals ---
174
+ \u028E: "L Y",
175
+ // /ʎ/ palatal lateral — Italian "figlio", Spanish "ll" ≈ LY
176
+ \u0271: "M",
177
+ // /ɱ/ labiodental nasal ≈ M
178
+ \u0274: "N",
179
+ // /ɴ/ uvular nasal — Japanese moraic ん ≈ "n" (not "ng")
180
+ // --- Nasals ---
181
+ \u0272: "N Y",
182
+ // /ɲ/ palatal nasal — Spanish "ñ", Italian "gn" ≈ NY
183
+ // tɕ and dʑ are handled as two-char sequences below
184
+ \u0273: "N",
185
+ // /ɳ/ retroflex nasal ≈ N
186
+ \u00F8: "UH",
187
+ // /ø/ close-mid front rounded — "peu" (French), "schön" (German) ≈ "u"
188
+ \u0153: "AH1",
189
+ // /œ/ open-mid front rounded — "peur" (French) ≈ "uh"
190
+ \u0153y: "OY",
191
+ // /œy/ — Dutch "huis" diphthong ≈ "boy"
192
+ oi: "OY",
193
+ // /oi/ — Finnish "koira", Portuguese "coisa" ≈ "boy" diphthong
194
+ ou: "OW",
195
+ // /ou/ — Finnish "koulu" ≈ "go" diphthong
196
+ o\u026F: "OW",
197
+ // /oɯ/ — Japanese long /oː/ (dict convention: 東京 /toɯkjoɯ/) ≈ "oh"
198
+ \u0278: "F",
199
+ // /ɸ/ voiceless bilabial fricative — Japanese "fu" ≈ F
200
+ q: "K",
201
+ // /q/ voiceless uvular stop — Arabic "Quran" ≈ K
202
+ \u0280: "R",
203
+ // /ʀ/ uvular trill — some German dialects ≈ English R
204
+ // --- Taps, trills, and approximants (Spanish, Italian, Mandarin, etc.) ---
205
+ \u027B: "R",
206
+ // /ɻ/ voiced retroflex approximant — Mandarin "er" (二) ≈ R
207
+ \u027D: "D",
208
+ // /ɽ/ retroflex flap — Hindi ≈ D
209
+ \u027E: "R",
210
+ // /ɾ/ alveolar tap — Spanish "pero" ≈ R (also like "butter" flap)
211
+ // --- Uvular consonants (French, German, Arabic) ---
212
+ \u0281: "R",
213
+ // /ʁ/ voiced uvular fricative — French/German R ≈ English R
214
+ \u0282: "SH",
215
+ // /ʂ/ voiceless retroflex fricative — Mandarin "shi" ≈ "sh"
216
+ // --- Affricates (two-char sequences matched by the converter) ---
217
+ t\u0255: "CH",
218
+ // /tɕ/ voiceless alveolo-palatal affricate — Mandarin "ji", Korean "ㅈ" ≈ "ch"
219
+ \u0288: "T",
220
+ // /ʈ/ voiceless retroflex stop — Hindi ≈ T
221
+ \u0288\u0282: "CH",
222
+ // /ʈʂ/ voiceless retroflex affricate — Mandarin "zhi" ≈ "ch"
223
+ // --- Labial-velar ---
224
+ \u0265: "W",
225
+ // /ɥ/ labial-palatal approximant — French "lui" ≈ W
226
+ \u026F: "UH",
227
+ // /ɯ/ close back unrounded — Japanese "u", Turkish "ı" ≈ "book" (shorter than "oo")
228
+ \u0270: "W",
229
+ // /ɰ/ voiced velar approximant — Japanese 川 /kaɰᵝa/ ≈ W
230
+ \u028B: "V",
231
+ // /ʋ/ labiodental approximant — Hindi, Dutch ≈ V
232
+ // --- Velar/palatal fricatives (German, Mandarin, etc.) ---
233
+ x: "HH",
234
+ // /x/ voiceless velar fricative — default H (override to K for German)
235
+ // --- Front rounded vowels (French, German, Turkish, etc.) ---
236
+ y: "UW",
237
+ // /y/ close front rounded — "tu" (French) ≈ "too"
238
+ \u028F: "UH",
239
+ // /ʏ/ near-close front rounded — "Glück" (German) ≈ "book"
240
+ \u0290: "ZH",
241
+ // /ʐ/ voiced retroflex fricative — Mandarin "ri" ≈ "zh"
242
+ \u0291: "ZH",
243
+ // /ʑ/ voiced alveolo-palatal fricative ≈ "zh"
244
+ // --- Glottal ---
245
+ \u0294: "",
246
+ // /ʔ/ glottal stop — often silent in approximation
247
+ // Note: length mark ː and combining diacritics are stripped in from-ipa.ts
248
+ // before lookup. Nasal vowels (◌̃) are converted to vowel+n there.
249
+ // --- Dental fricatives already in English ---
250
+ // θ and ð are in the main map
251
+ \u0295: "AH",
252
+ // /ʕ/ voiced pharyngeal fricative — Arabic "ain" ≈ "uh" (rough)
253
+ // --- Bilabial fricatives (Spanish) ---
254
+ \u03B2: "V",
255
+ // /β/ voiced bilabial fricative — Spanish "b" between vowels ≈ V
256
+ \u03C7: "HH"
257
+ // /χ/ voiceless uvular fricative — default H (override to K for German)
258
+ };
259
+ var IPA_TO_ARPABET_MAP = {
260
+ ...IPA_APPROXIMATION_MAP,
261
+ ...Object.fromEntries(Object.entries(ARPABET_TO_IPA_MAP).map(([arpabet, ipa]) => [ipa, arpabet])),
262
+ ...IPA_VARIANT_MAP
263
+ };
264
+ var IPA_LANGUAGE_OVERRIDES = {
265
+ // German: /x/ and /χ/ are the "ach-Laut" — English speakers say "bahk" for Bach
266
+ de: { x: "K", \u03C7: "K" },
267
+ // Dutch: /ɣ/ is a breathy G (closer to H than hard G) — "goed" ≈ "hood" not "good"
268
+ nl: { \u0263: "HH" }
269
+ };
270
+
271
+ // src/to-ipa.ts
272
+ var WORD_JOINER = "\u2060";
273
+ var STRESS_MARKERS = {
274
+ 0: "",
275
+ 1: WORD_JOINER + "\u02C8" + WORD_JOINER,
276
+ 2: WORD_JOINER + "\u02CC" + WORD_JOINER
277
+ };
278
+ function arpabetPhonemeToIPA(phoneme) {
279
+ const base = stripStress(phoneme);
280
+ const stress = getStress(phoneme);
281
+ const ipa = ARPABET_TO_IPA_MAP[base];
282
+ if (ipa === void 0) {
283
+ return phoneme.toLowerCase();
284
+ }
285
+ if (base === "AH" && stress === 0) {
286
+ return "\u0259";
287
+ }
288
+ if (stress === 1 || stress === 2) {
289
+ return STRESS_MARKERS[stress] + ipa;
290
+ }
291
+ return ipa;
292
+ }
293
+ function arpabetToIPA(arpabet) {
294
+ const ipaSegments = [];
295
+ const stressPositions = [];
296
+ for (let i = 0; i < arpabet.length; i++) {
297
+ const symbol = arpabet[i];
298
+ const base = stripStress(symbol);
299
+ const stress = getStress(symbol);
300
+ const ipa = ARPABET_TO_IPA_MAP[base];
301
+ if (ipa === void 0) {
302
+ ipaSegments.push(symbol.toLowerCase());
303
+ continue;
304
+ }
305
+ if (base === "AH" && stress === 0) {
306
+ ipaSegments.push("\u0259");
307
+ continue;
308
+ }
309
+ if (stress === 1 || stress === 2) {
310
+ const marker = stress === 1 ? WORD_JOINER + "\u02C8" + WORD_JOINER : WORD_JOINER + "\u02CC" + WORD_JOINER;
311
+ let onsetIndex = ipaSegments.length;
312
+ if (i > 0) {
313
+ let j = i - 1;
314
+ const consonants = [];
315
+ while (j >= 0 && !isVowel(arpabet[j])) {
316
+ consonants.push(stripStress(arpabet[j]));
317
+ j--;
318
+ }
319
+ consonants.reverse();
320
+ if (consonants.length > 0) {
321
+ const onsetStartInCluster = findOnsetStart(consonants);
322
+ onsetIndex = j + 1 + onsetStartInCluster;
323
+ }
324
+ }
325
+ stressPositions.push({ index: onsetIndex, marker });
326
+ }
327
+ ipaSegments.push(ipa);
328
+ }
329
+ const sortedStress = stressPositions.toSorted((a, b) => a.index - b.index);
330
+ const result = [];
331
+ let stressIdx = 0;
332
+ for (let i = 0; i <= ipaSegments.length; i++) {
333
+ while (stressIdx < sortedStress.length && sortedStress[stressIdx].index === i) {
334
+ result.push(sortedStress[stressIdx].marker);
335
+ stressIdx++;
336
+ }
337
+ if (i < ipaSegments.length) {
338
+ result.push(ipaSegments[i]);
339
+ }
340
+ }
341
+ return `/${result.join("")}/`;
342
+ }
343
+ function arpabetToIPARaw(arpabet) {
344
+ const full = arpabetToIPA(arpabet);
345
+ return full.slice(1, -1);
346
+ }
347
+
348
+ // src/foreign.ts
349
+ import {
350
+ applyCasePattern,
351
+ detectCasePattern,
352
+ normalizeApostrophes,
353
+ stripDiacritics
354
+ } from "@ingglish/normalize";
355
+ import {
356
+ arpabetToFormat,
357
+ arpabetToIngglish,
358
+ getFormatPreservesCase,
359
+ getStress as getStress2,
360
+ isVowel as isVowel2
361
+ } from "@ingglish/phonemes";
362
+
363
+ // src/from-ipa.ts
364
+ import { stripStress as stripStress2 } from "@ingglish/phonemes";
365
+ var ARPABET_VOWELS = new Set(Object.keys(IPA_VOWEL_MAP));
366
+ var NASAL_VOWEL_RE = /([aeiouɑɛɔəɐɒæøœʌɝɚɘɜɞɤʏʊɪɨɯy])\u0303/g;
367
+ var MODIFIER_RE = /[\u02B0\u02D0\u02D1\u02E5-\u02E9\u0303\u1D5D]/g;
368
+ var COMBINING_NON_CEDILLA_RE = /(?!\u0327)\p{Mn}/gu;
369
+ var mergedMapCache = /* @__PURE__ */ new Map();
370
+ function ipaToArpabet(ipa, overrides) {
371
+ const normalized = ipa.normalize("NFD");
372
+ const denasalized = normalized.replaceAll(NASAL_VOWEL_RE, "$1n");
373
+ const stripped = denasalized.replaceAll(MODIFIER_RE, "");
374
+ const stripped2 = stripped.replaceAll(COMBINING_NON_CEDILLA_RE, "");
375
+ const clean = stripped2.normalize("NFC");
376
+ let map = IPA_TO_ARPABET_MAP;
377
+ if (overrides) {
378
+ let cached = mergedMapCache.get(overrides);
379
+ if (!cached) {
380
+ cached = { ...IPA_TO_ARPABET_MAP, ...overrides };
381
+ mergedMapCache.set(overrides, cached);
382
+ }
383
+ map = cached;
384
+ }
385
+ const result = [];
386
+ let pendingStress = null;
387
+ let i = 0;
388
+ const push = (phoneme) => {
389
+ const base = stripStress2(phoneme);
390
+ if (ARPABET_VOWELS.has(base) && pendingStress !== null) {
391
+ result.push(base + String(pendingStress));
392
+ pendingStress = null;
393
+ } else {
394
+ result.push(phoneme);
395
+ }
396
+ };
397
+ while (i < clean.length) {
398
+ const ch = clean[i];
399
+ if (ch === "\u02C8") {
400
+ pendingStress = 1;
401
+ i++;
402
+ continue;
403
+ }
404
+ if (ch === "\u02CC") {
405
+ pendingStress = 2;
406
+ i++;
407
+ continue;
408
+ }
409
+ if (i + 1 < clean.length) {
410
+ const twoChar = clean.slice(i, i + 2);
411
+ const twoCharArpabet = map[twoChar];
412
+ if (twoCharArpabet !== void 0) {
413
+ if (twoCharArpabet.includes(" ")) {
414
+ for (const p of twoCharArpabet.split(" ")) {
415
+ push(p);
416
+ }
417
+ } else {
418
+ push(twoCharArpabet);
419
+ }
420
+ i += 2;
421
+ continue;
422
+ }
423
+ }
424
+ const oneCharArpabet = map[ch];
425
+ if (oneCharArpabet !== void 0) {
426
+ if (oneCharArpabet.includes(" ")) {
427
+ for (const p of oneCharArpabet.split(" ")) {
428
+ push(p);
429
+ }
430
+ } else {
431
+ push(oneCharArpabet);
432
+ }
433
+ }
434
+ i++;
435
+ }
436
+ const deduped = [];
437
+ for (const phoneme of result) {
438
+ if (phoneme !== deduped.at(-1)) {
439
+ deduped.push(phoneme);
440
+ }
441
+ }
442
+ return deduped;
443
+ }
444
+ function ipaToArpabetClean(ipa) {
445
+ const arpabet = ipaToArpabet(ipa).map((p) => stripStress2(p));
446
+ return arpabet.length > 0 ? arpabet : null;
447
+ }
448
+
449
+ // src/g2p.ts
450
+ var G2P_CONVERTERS = {
451
+ eo: esperantoG2P,
452
+ fi: finnishG2P,
453
+ ma: malayG2P,
454
+ sw: swahiliG2P
455
+ };
456
+ var IPA_VOWELS = new Set("aeiou\u0251\xE6\xF8y\u025B\u0254\u0259");
457
+ function addFirstSyllableStress(ipa) {
458
+ return ipa ? "\u02C8" + ipa : ipa;
459
+ }
460
+ function addPenultimateStress(ipa) {
461
+ const vowelPositions = [];
462
+ for (const [i, ch] of Array.from(ipa).entries()) {
463
+ if (IPA_VOWELS.has(ch)) {
464
+ vowelPositions.push(i);
465
+ }
466
+ }
467
+ if (vowelPositions.length <= 1) {
468
+ return ipa ? "\u02C8" + ipa : ipa;
469
+ }
470
+ const stressPos = vowelPositions.at(-2);
471
+ let onset = stressPos;
472
+ while (onset > 0 && !IPA_VOWELS.has(ipa[onset - 1]) && ipa[onset - 1] !== "\u02D0") {
473
+ onset--;
474
+ }
475
+ return ipa.slice(0, onset) + "\u02C8" + ipa.slice(onset);
476
+ }
477
+ function applyRules(word, rules) {
478
+ const normalized = word.normalize("NFC");
479
+ let result = "";
480
+ let i = 0;
481
+ while (i < normalized.length) {
482
+ let matched = false;
483
+ for (const [grapheme, phoneme] of rules) {
484
+ if (normalized.startsWith(grapheme, i)) {
485
+ result += phoneme;
486
+ i += grapheme.length;
487
+ matched = true;
488
+ break;
489
+ }
490
+ }
491
+ if (!matched) {
492
+ i++;
493
+ }
494
+ }
495
+ return result;
496
+ }
497
+ var FINNISH_RULES = [
498
+ // Digraphs (must precede single-char rules)
499
+ ["nk", "\u014Bk"],
500
+ ["ng", "\u014B\u02D0"],
501
+ // Long vowels (double letters → vowel + length mark)
502
+ ["aa", "\u0251\u02D0"],
503
+ ["ee", "e\u02D0"],
504
+ ["ii", "i\u02D0"],
505
+ ["oo", "o\u02D0"],
506
+ ["uu", "u\u02D0"],
507
+ ["yy", "y\u02D0"],
508
+ ["\xE4\xE4", "\xE6\u02D0"],
509
+ ["\xF6\xF6", "\xF8\u02D0"],
510
+ // Geminate consonants
511
+ ["pp", "p\u02D0"],
512
+ ["tt", "t\u02D0"],
513
+ ["kk", "k\u02D0"],
514
+ ["mm", "m\u02D0"],
515
+ ["nn", "n\u02D0"],
516
+ ["ll", "l\u02D0"],
517
+ ["rr", "r\u02D0"],
518
+ ["ss", "s\u02D0"],
519
+ // Single vowels
520
+ ["a", "\u0251"],
521
+ ["e", "e"],
522
+ ["i", "i"],
523
+ ["o", "o"],
524
+ ["u", "u"],
525
+ ["y", "y"],
526
+ ["\xE4", "\xE6"],
527
+ ["\xF6", "\xF8"],
528
+ // Single consonants
529
+ ["b", "b"],
530
+ ["d", "d"],
531
+ ["f", "f"],
532
+ ["g", "\u0261"],
533
+ ["h", "h"],
534
+ ["j", "j"],
535
+ ["k", "k"],
536
+ ["l", "l"],
537
+ ["m", "m"],
538
+ ["n", "n"],
539
+ ["p", "p"],
540
+ ["r", "r"],
541
+ ["s", "s"],
542
+ ["t", "t"],
543
+ ["v", "\u028B"],
544
+ ["w", "\u028B"],
545
+ ["z", "ts"]
546
+ ];
547
+ function finnishG2P(word) {
548
+ return addFirstSyllableStress(applyRules(word, FINNISH_RULES));
549
+ }
550
+ var ESPERANTO_RULES = [
551
+ // Special Esperanto characters (must precede base letters)
552
+ ["\u0109", "t\u0283"],
553
+ ["\u011D", "d\u0292"],
554
+ ["\u0125", "x"],
555
+ ["\u0135", "\u0292"],
556
+ ["\u015D", "\u0283"],
557
+ ["\u016D", "w"],
558
+ ["c", "ts"],
559
+ // Vowels
560
+ ["a", "a"],
561
+ ["e", "e"],
562
+ ["i", "i"],
563
+ ["o", "o"],
564
+ ["u", "u"],
565
+ // Consonants
566
+ ["b", "b"],
567
+ ["d", "d"],
568
+ ["f", "f"],
569
+ ["g", "\u0261"],
570
+ ["h", "h"],
571
+ ["j", "j"],
572
+ ["k", "k"],
573
+ ["l", "l"],
574
+ ["m", "m"],
575
+ ["n", "n"],
576
+ ["p", "p"],
577
+ ["r", "r"],
578
+ ["s", "s"],
579
+ ["t", "t"],
580
+ ["v", "v"],
581
+ ["z", "z"]
582
+ ];
583
+ function esperantoG2P(word) {
584
+ return addPenultimateStress(applyRules(word, ESPERANTO_RULES));
585
+ }
586
+ var SWAHILI_RULES = [
587
+ // Trigraph (must precede digraph 'ng')
588
+ ["ng'", "\u014B"],
589
+ // Digraphs
590
+ ["ch", "t\u0283"],
591
+ ["dh", "\xF0"],
592
+ ["gh", "\u0263"],
593
+ ["ng", "\u014B\u0261"],
594
+ ["nj", "nd\u0292"],
595
+ ["ny", "\u0272"],
596
+ ["sh", "\u0283"],
597
+ ["th", "\u03B8"],
598
+ // Vowels
599
+ ["a", "a"],
600
+ ["e", "\u025B"],
601
+ ["i", "i"],
602
+ ["o", "\u0254"],
603
+ ["u", "u"],
604
+ // Consonants
605
+ ["b", "b"],
606
+ ["d", "d"],
607
+ ["f", "f"],
608
+ ["g", "\u0261"],
609
+ ["h", "h"],
610
+ ["j", "d\u0292"],
611
+ ["k", "k"],
612
+ ["l", "l"],
613
+ ["m", "m"],
614
+ ["n", "n"],
615
+ ["p", "p"],
616
+ ["r", "\u027E"],
617
+ ["s", "s"],
618
+ ["t", "t"],
619
+ ["v", "v"],
620
+ ["w", "w"],
621
+ ["y", "j"],
622
+ ["z", "z"]
623
+ ];
624
+ function swahiliG2P(word) {
625
+ return addPenultimateStress(applyRules(word, SWAHILI_RULES));
626
+ }
627
+ var MALAY_RULES = [
628
+ // Digraphs
629
+ ["gh", "\u0263"],
630
+ ["kh", "x"],
631
+ ["ng", "\u014B"],
632
+ ["ny", "\u0272"],
633
+ ["sy", "\u0283"],
634
+ // Vowels
635
+ ["a", "a"],
636
+ ["e", "\u0259"],
637
+ ["i", "i"],
638
+ ["o", "o"],
639
+ ["u", "u"],
640
+ // Consonants
641
+ ["b", "b"],
642
+ ["c", "t\u0283"],
643
+ ["d", "d"],
644
+ ["f", "f"],
645
+ ["g", "\u0261"],
646
+ ["h", "h"],
647
+ ["j", "d\u0292"],
648
+ ["k", "k"],
649
+ ["l", "l"],
650
+ ["m", "m"],
651
+ ["n", "n"],
652
+ ["p", "p"],
653
+ ["r", "\u027E"],
654
+ ["s", "s"],
655
+ ["t", "t"],
656
+ ["v", "v"],
657
+ ["w", "w"],
658
+ ["y", "j"],
659
+ ["z", "z"]
660
+ ];
661
+ function addMalayStress(ipa) {
662
+ const vowelPositions = [];
663
+ for (const [i, ch] of Array.from(ipa).entries()) {
664
+ if (IPA_VOWELS.has(ch)) {
665
+ vowelPositions.push(i);
666
+ }
667
+ }
668
+ if (vowelPositions.length <= 1) {
669
+ return ipa ? "\u02C8" + ipa : ipa;
670
+ }
671
+ const penultPos = vowelPositions.at(-2);
672
+ const stressPos = ipa[penultPos] === "\u0259" ? vowelPositions.at(-1) : penultPos;
673
+ let onset = stressPos;
674
+ while (onset > 0 && !IPA_VOWELS.has(ipa[onset - 1]) && ipa[onset - 1] !== "\u02D0") {
675
+ onset--;
676
+ }
677
+ return ipa.slice(0, onset) + "\u02C8" + ipa.slice(onset);
678
+ }
679
+ function malayG2P(word) {
680
+ return addMalayStress(applyRules(word, MALAY_RULES));
681
+ }
682
+
683
+ // src/lemmatizers.ts
684
+ var LEMMATIZERS = {
685
+ eo: lemmatizeEo,
686
+ fa: lemmatizeFa,
687
+ fi: lemmatizeFi,
688
+ ma: lemmatizeMa,
689
+ nb: lemmatizeNb,
690
+ ro: lemmatizeRo,
691
+ sv: lemmatizeSv,
692
+ sw: lemmatizeSw
693
+ };
694
+ function tryLookup(dict, ...candidates) {
695
+ for (const c of candidates) {
696
+ if (c && dict[c]) {
697
+ return dict[c];
698
+ }
699
+ }
700
+ return void 0;
701
+ }
702
+ var SV_SUFFIXES = [
703
+ // 4+ char suffixes
704
+ ["erna", ["", "e"]],
705
+ ["orna", ["", "a"]],
706
+ ["ande", ["", "a"]],
707
+ ["ende", ["", "a"]],
708
+ ["aste", [""]],
709
+ // 3 char
710
+ ["ade", ["", "a"]],
711
+ ["igt", ["ig"]],
712
+ // 2 char
713
+ ["en", [""]],
714
+ ["et", ["", "e"]],
715
+ ["an", ["", "a"]],
716
+ ["ar", [""]],
717
+ ["er", ["", "a"]],
718
+ ["de", ["", "a"]],
719
+ ["te", ["", "a"]],
720
+ // 1 char
721
+ ["a", [""]],
722
+ ["t", ["", "a"]],
723
+ ["s", [""]],
724
+ ["r", ["", "a"]],
725
+ ["n", [""]]
726
+ ];
727
+ function lemmatizeSv(dict, word) {
728
+ for (const [suffix, replacements] of SV_SUFFIXES) {
729
+ if (word.length > suffix.length && word.endsWith(suffix)) {
730
+ const stem = word.slice(0, -suffix.length);
731
+ const candidates = replacements.map((r) => stem + r);
732
+ const ipa = tryLookup(dict, ...candidates);
733
+ if (ipa) {
734
+ return ipa;
735
+ }
736
+ }
737
+ }
738
+ if (word.endsWith("s") && word.length > 2) {
739
+ const inner = word.slice(0, -1);
740
+ if (dict[inner]) {
741
+ return dict[inner];
742
+ }
743
+ return lemmatizeSv(dict, inner);
744
+ }
745
+ return void 0;
746
+ }
747
+ var RO_SUFFIXES = [
748
+ // 4+ char
749
+ ["ului", [""]],
750
+ ["ilor", [""]],
751
+ ["e\u0219te", ["", "i"]],
752
+ // 3 char
753
+ ["ele", ["", "\u0103"]],
754
+ ["uri", [""]],
755
+ ["eau", ["", "i", "ea"]],
756
+ // 2 char
757
+ ["ul", [""]],
758
+ ["ii", ["", "ie", "iu"]],
759
+ ["ea", ["", "e"]],
760
+ ["\u021Bi", ["t"]],
761
+ ["c\u0103", ["c", "ca"]],
762
+ // 1 char
763
+ ["a", ["", "\u0103"]],
764
+ ["e", ["", "\u0103"]],
765
+ ["i", ["", "e"]]
766
+ ];
767
+ var RO_PREFIX_RESTORE = [
768
+ ["n", "\xEEn"],
769
+ ["l", "\xEEl"],
770
+ ["m", "m\u0103"]
771
+ ];
772
+ function lemmatizeRo(dict, word) {
773
+ for (const [suffix, replacements] of RO_SUFFIXES) {
774
+ if (word.length > suffix.length && word.endsWith(suffix)) {
775
+ const stem = word.slice(0, -suffix.length);
776
+ const candidates = replacements.map((r) => stem + r);
777
+ const ipa = tryLookup(dict, ...candidates);
778
+ if (ipa) {
779
+ return ipa;
780
+ }
781
+ }
782
+ }
783
+ for (const [prefix, restored] of RO_PREFIX_RESTORE) {
784
+ if (word.startsWith(prefix)) {
785
+ const remainder = restored + word.slice(prefix.length);
786
+ if (dict[remainder]) {
787
+ return dict[remainder];
788
+ }
789
+ }
790
+ }
791
+ const withI = "\xEE" + word;
792
+ if (dict[withI]) {
793
+ return dict[withI];
794
+ }
795
+ return void 0;
796
+ }
797
+ var SW_VERB_PREFIXES = [
798
+ // 5+ char
799
+ "hatuku",
800
+ "hawaku",
801
+ "haiku",
802
+ "hatua",
803
+ "hatui",
804
+ // 4 char
805
+ "wali",
806
+ "tuli",
807
+ "nili",
808
+ "aali",
809
+ "wame",
810
+ "tume",
811
+ "nime",
812
+ "ame",
813
+ "wana",
814
+ "tuna",
815
+ "nina",
816
+ "ana",
817
+ "wata",
818
+ "tuta",
819
+ "nita",
820
+ "ata",
821
+ "yame",
822
+ "yata",
823
+ "yana",
824
+ "yali",
825
+ "kime",
826
+ "kita",
827
+ "kina",
828
+ "kili",
829
+ "lime",
830
+ "lita",
831
+ "lina",
832
+ "lili",
833
+ "vime",
834
+ "vita",
835
+ "vina",
836
+ "vili",
837
+ "zime",
838
+ "zita",
839
+ "zina",
840
+ "zili",
841
+ "haku",
842
+ "hatu",
843
+ "hani",
844
+ "hawa",
845
+ // 3 char
846
+ "ali",
847
+ "uli",
848
+ "ume",
849
+ "una",
850
+ "uta",
851
+ "tua",
852
+ "tui",
853
+ "wai",
854
+ "wal",
855
+ "iku",
856
+ "ina",
857
+ "hue",
858
+ "huj",
859
+ "hui",
860
+ "yat",
861
+ "yam",
862
+ "yan",
863
+ "kum",
864
+ "kui",
865
+ "kua",
866
+ // 2 char
867
+ "wa",
868
+ "tu",
869
+ "ni",
870
+ "li",
871
+ "ki",
872
+ "vi",
873
+ "zi",
874
+ "ya",
875
+ "ku",
876
+ "hu"
877
+ ];
878
+ var SW_DERIV_SUFFIXES = [
879
+ ["ika", ["a"]],
880
+ ["isha", ["a"]],
881
+ ["ana", ["a"]],
882
+ ["wa", ["a"]],
883
+ ["ia", ["a"]],
884
+ ["ika", ["a", "ea"]]
885
+ ];
886
+ function lemmatizeEo(dict, word) {
887
+ let w = word;
888
+ if (w.endsWith("n") && w.length > 2) {
889
+ const stripped = w.slice(0, -1);
890
+ if (dict[stripped]) {
891
+ return dict[stripped];
892
+ }
893
+ w = stripped;
894
+ }
895
+ if (w.endsWith("j") && w.length > 2) {
896
+ const stripped = w.slice(0, -1);
897
+ if (dict[stripped]) {
898
+ return dict[stripped];
899
+ }
900
+ w = stripped;
901
+ }
902
+ if (dict[w]) {
903
+ return dict[w];
904
+ }
905
+ for (const ending of ["as", "is", "os", "us"]) {
906
+ if (w.endsWith(ending) && w.length > ending.length + 1) {
907
+ const stem = w.slice(0, -ending.length);
908
+ const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
909
+ if (ipa) {
910
+ return ipa;
911
+ }
912
+ }
913
+ }
914
+ if (w.endsWith("u") && w.length > 2) {
915
+ const stem = w.slice(0, -1);
916
+ const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
917
+ if (ipa) {
918
+ return ipa;
919
+ }
920
+ }
921
+ for (const suffix of [
922
+ "anta",
923
+ "inta",
924
+ "onta",
925
+ "ata",
926
+ "ita",
927
+ "ota",
928
+ "ante",
929
+ "inte",
930
+ "onte"
931
+ ]) {
932
+ if (w.endsWith(suffix) && w.length > suffix.length + 1) {
933
+ const stem = w.slice(0, -suffix.length);
934
+ const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
935
+ if (ipa) {
936
+ return ipa;
937
+ }
938
+ }
939
+ }
940
+ if (w.endsWith("e") && w.length > 2) {
941
+ const stem = w.slice(0, -1);
942
+ const ipa = tryLookup(dict, stem + "o", stem + "a", stem + "i", stem);
943
+ if (ipa) {
944
+ return ipa;
945
+ }
946
+ }
947
+ for (const [suffix, replacements] of [
948
+ ["isto", ["o", "i", ""]],
949
+ ["ejo", ["o", "i", ""]],
950
+ ["ilo", ["o", "i", ""]],
951
+ ["eco", ["o", "a", ""]],
952
+ ["ado", ["o", "i", ""]],
953
+ ["igo", ["o", "a", ""]],
954
+ ["i\u011Do", ["o", "a", ""]]
955
+ ]) {
956
+ if (w.endsWith(suffix) && w.length > suffix.length + 1) {
957
+ const stem = w.slice(0, -suffix.length);
958
+ const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
959
+ if (ipa) {
960
+ return ipa;
961
+ }
962
+ }
963
+ }
964
+ for (const prefix of ["mal", "ek", "re", "ne", "sen"]) {
965
+ if (w.startsWith(prefix) && w.length > prefix.length + 1) {
966
+ const remainder = w.slice(prefix.length);
967
+ if (dict[remainder]) {
968
+ return dict[remainder];
969
+ }
970
+ const inner = lemmatizeEo(dict, remainder);
971
+ if (inner) {
972
+ return inner;
973
+ }
974
+ }
975
+ }
976
+ return void 0;
977
+ }
978
+ function lemmatizeSw(dict, word) {
979
+ for (const prefix of SW_VERB_PREFIXES) {
980
+ if (word.length > prefix.length + 1 && word.startsWith(prefix)) {
981
+ const remainder = word.slice(prefix.length);
982
+ if (dict[remainder]) {
983
+ return dict[remainder];
984
+ }
985
+ const kuForm = "ku" + remainder;
986
+ if (dict[kuForm]) {
987
+ return dict[kuForm];
988
+ }
989
+ for (const [suffix, replacements] of SW_DERIV_SUFFIXES) {
990
+ if (remainder.length > suffix.length && remainder.endsWith(suffix)) {
991
+ const stem = remainder.slice(0, -suffix.length);
992
+ for (const r of replacements) {
993
+ const candidate = stem + r;
994
+ if (dict[candidate]) {
995
+ return dict[candidate];
996
+ }
997
+ if (dict["ku" + candidate]) {
998
+ return dict["ku" + candidate];
999
+ }
1000
+ }
1001
+ }
1002
+ }
1003
+ }
1004
+ }
1005
+ for (const [suffix, replacements] of SW_DERIV_SUFFIXES) {
1006
+ if (word.length > suffix.length && word.endsWith(suffix)) {
1007
+ const stem = word.slice(0, -suffix.length);
1008
+ for (const r of replacements) {
1009
+ const candidate = stem + r;
1010
+ if (dict[candidate]) {
1011
+ return dict[candidate];
1012
+ }
1013
+ }
1014
+ }
1015
+ }
1016
+ return void 0;
1017
+ }
1018
+ var FI_SUFFIXES = [
1019
+ // Possessive + case combinations
1020
+ ["ssani", ["", "nen"]],
1021
+ ["ss\xE4ni", ["", "nen"]],
1022
+ ["llani", ["", "nen"]],
1023
+ ["ll\xE4ni", ["", "nen"]],
1024
+ ["stani", ["", "nen"]],
1025
+ ["st\xE4ni", ["", "nen"]],
1026
+ ["ssaan", ["", "nen"]],
1027
+ ["ss\xE4\xE4n", ["", "nen"]],
1028
+ // Plural case endings (4+ chars)
1029
+ ["issa", ["", "a"]],
1030
+ ["iss\xE4", ["", "\xE4"]],
1031
+ ["illa", ["", "a"]],
1032
+ ["ill\xE4", ["", "\xE4"]],
1033
+ ["ista", ["", "a"]],
1034
+ ["ist\xE4", ["", "\xE4"]],
1035
+ ["ihin", ["", "i"]],
1036
+ ["ojen", ["o"]],
1037
+ ["ujen", ["u"]],
1038
+ ["yjen", ["y"]],
1039
+ ["iden", ["i"]],
1040
+ ["jen", [""]],
1041
+ // Inessive -ssa/-ssä
1042
+ ["ssa", ["", "s"]],
1043
+ ["ss\xE4", ["", "s"]],
1044
+ // Elative -sta/-stä
1045
+ ["sta", ["", "s"]],
1046
+ ["st\xE4", ["", "s"]],
1047
+ // Adessive -lla/-llä
1048
+ ["lla", ["", "a"]],
1049
+ ["ll\xE4", ["", "\xE4"]],
1050
+ // Ablative -lta/-ltä
1051
+ ["lta", ["", "a"]],
1052
+ ["lt\xE4", ["", "\xE4"]],
1053
+ // Allative -lle
1054
+ ["lle", ["", "i"]],
1055
+ // Essive -na/-nä
1056
+ ["na", ["", "nen"]],
1057
+ ["n\xE4", ["", "nen"]],
1058
+ // Translative -ksi
1059
+ ["ksi", ["", "si"]],
1060
+ // Possessive -ni, -si, -nsa/-nsä, -mme, -nne
1061
+ ["nsa", [""]],
1062
+ ["ns\xE4", [""]],
1063
+ ["mme", [""]],
1064
+ ["nne", [""]],
1065
+ ["ni", ["", "n"]],
1066
+ ["si", ["", "s"]],
1067
+ // Partitive -a/-ä, -ta/-tä, -tta/-ttä
1068
+ ["tta", [""]],
1069
+ ["tt\xE4", [""]],
1070
+ ["ta", ["", "nen"]],
1071
+ ["t\xE4", ["", "nen"]],
1072
+ // Genitive -n, plural -t
1073
+ ["en", ["", "i"]],
1074
+ ["ot", ["o"]],
1075
+ ["ut", ["u"]],
1076
+ ["yt", ["y"]],
1077
+ ["\xE4t", ["\xE4"]],
1078
+ ["at", ["a"]],
1079
+ ["et", ["e", "i"]],
1080
+ // Verb past -i
1081
+ ["oi", ["o", "oa"]],
1082
+ ["ui", ["u", "ua"]],
1083
+ // General fallbacks
1084
+ ["a", [""]],
1085
+ ["\xE4", [""]],
1086
+ ["n", [""]],
1087
+ ["t", [""]]
1088
+ ];
1089
+ var FI_VERB_SUFFIXES = [
1090
+ // Archaic Kalevala -(tt)elevi/-(tt)avi patterns
1091
+ ["ttelevi", ["tella", "della"]],
1092
+ ["televi", ["tella", "della"]],
1093
+ ["ttavi", ["ttaa", "t\xE4\xE4"]],
1094
+ ["ttevi", ["tt\xE4\xE4", "ttaa"]],
1095
+ ["elevi", ["ella", "ell\xE4"]],
1096
+ ["alevi", ["alla", "all\xE4"]],
1097
+ ["evi", ["", "a", "\xE4"]],
1098
+ ["avi", ["", "a", "aa"]],
1099
+ ["ovi", ["", "o", "oa"]],
1100
+ ["uvi", ["", "u", "ua"]],
1101
+ // Past participle -nut/-nyt, -neet
1102
+ ["neet", ["", "a", "\xE4"]],
1103
+ ["nut", ["", "a", "da"]],
1104
+ ["nyt", ["", "\xE4", "d\xE4"]],
1105
+ // Present participle -va/-vä
1106
+ ["va", ["", "a"]],
1107
+ ["v\xE4", ["", "\xE4"]],
1108
+ // Past tense 3rd person
1109
+ ["tui", ["tua", "ty\xE4"]],
1110
+ ["lui", ["la", "l\xE4"]],
1111
+ // Conditional
1112
+ ["isi", ["", "a", "\xE4"]],
1113
+ // Agent noun -ja/-jä
1114
+ ["ja", ["", "a"]],
1115
+ ["j\xE4", ["", "\xE4"]]
1116
+ ];
1117
+ function applyFiGradation(stem) {
1118
+ if (stem.endsWith("nt")) {
1119
+ return stem.slice(0, -2) + "nn";
1120
+ }
1121
+ if (stem.endsWith("lt")) {
1122
+ return stem.slice(0, -2) + "ll";
1123
+ }
1124
+ if (stem.endsWith("rt")) {
1125
+ return stem.slice(0, -2) + "rr";
1126
+ }
1127
+ if (stem.endsWith("nk")) {
1128
+ return stem.slice(0, -2) + "ng";
1129
+ }
1130
+ if (stem.endsWith("mp")) {
1131
+ return stem.slice(0, -2) + "mm";
1132
+ }
1133
+ if (stem.endsWith("lk")) {
1134
+ return stem.slice(0, -2) + "l";
1135
+ }
1136
+ if (stem.endsWith("rk")) {
1137
+ return stem.slice(0, -2) + "r";
1138
+ }
1139
+ if (stem.endsWith("hk")) {
1140
+ return stem.slice(0, -2) + "h";
1141
+ }
1142
+ return stem;
1143
+ }
1144
+ function applyFiStrengthening(stem) {
1145
+ if (stem.endsWith("nn")) {
1146
+ return stem.slice(0, -2) + "nt";
1147
+ }
1148
+ if (stem.endsWith("ll")) {
1149
+ return stem.slice(0, -2) + "lt";
1150
+ }
1151
+ if (stem.endsWith("rr")) {
1152
+ return stem.slice(0, -2) + "rt";
1153
+ }
1154
+ if (stem.endsWith("ng")) {
1155
+ return stem.slice(0, -2) + "nk";
1156
+ }
1157
+ if (stem.endsWith("mm")) {
1158
+ return stem.slice(0, -2) + "mp";
1159
+ }
1160
+ return stem;
1161
+ }
1162
+ function lemmatizeFi(dict, word) {
1163
+ for (const [suffix, replacements] of FI_VERB_SUFFIXES) {
1164
+ if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
1165
+ const stem = word.slice(0, -suffix.length);
1166
+ const candidates = replacements.map((r) => stem + r);
1167
+ const ipa = tryLookup(dict, ...candidates);
1168
+ if (ipa) {
1169
+ return ipa;
1170
+ }
1171
+ }
1172
+ }
1173
+ for (const [suffix, replacements] of FI_SUFFIXES) {
1174
+ if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
1175
+ const stem = word.slice(0, -suffix.length);
1176
+ const candidates = replacements.map((r) => stem + r);
1177
+ const ipa = tryLookup(dict, ...candidates);
1178
+ if (ipa) {
1179
+ return ipa;
1180
+ }
1181
+ if (stem.length >= 2) {
1182
+ const gradated = applyFiGradation(stem);
1183
+ if (gradated !== stem) {
1184
+ const gradIpa = tryLookup(dict, ...replacements.map((r) => gradated + r));
1185
+ if (gradIpa) {
1186
+ return gradIpa;
1187
+ }
1188
+ }
1189
+ const strengthened = applyFiStrengthening(stem);
1190
+ if (strengthened !== stem) {
1191
+ const strIpa = tryLookup(dict, ...replacements.map((r) => strengthened + r));
1192
+ if (strIpa) {
1193
+ return strIpa;
1194
+ }
1195
+ }
1196
+ }
1197
+ }
1198
+ }
1199
+ for (const poss of ["ni", "si", "nsa", "ns\xE4", "mme", "nne"]) {
1200
+ if (word.endsWith(poss) && word.length > poss.length + 2) {
1201
+ const inner = word.slice(0, -poss.length);
1202
+ if (dict[inner]) {
1203
+ return dict[inner];
1204
+ }
1205
+ for (const [suffix, replacements] of FI_SUFFIXES) {
1206
+ if (inner.length > suffix.length + 1 && inner.endsWith(suffix)) {
1207
+ const stem = inner.slice(0, -suffix.length);
1208
+ const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
1209
+ if (ipa) {
1210
+ return ipa;
1211
+ }
1212
+ }
1213
+ }
1214
+ }
1215
+ }
1216
+ return void 0;
1217
+ }
1218
+ function modernizeNb(word) {
1219
+ const variants = [];
1220
+ if (word.includes("aa")) {
1221
+ variants.push(word.replaceAll("aa", "\xE5"));
1222
+ }
1223
+ if (word.includes("Aa")) {
1224
+ variants.push(word.replaceAll("Aa", "\xC5"));
1225
+ }
1226
+ if (word === "af") {
1227
+ variants.push("av");
1228
+ }
1229
+ if (word === "efter") {
1230
+ variants.push("etter");
1231
+ }
1232
+ if (word === "imod") {
1233
+ variants.push("imot");
1234
+ }
1235
+ return variants;
1236
+ }
1237
+ var NB_SUFFIXES = [
1238
+ // 4+ char
1239
+ ["erne", ["", "e"]],
1240
+ ["enes", ["", "e"]],
1241
+ ["ande", ["", "e"]],
1242
+ ["ende", ["", "e"]],
1243
+ ["else", ["", "e"]],
1244
+ // Definite plural
1245
+ ["ene", ["", "e"]],
1246
+ ["ane", ["", "e"]],
1247
+ // 2 char definite singular
1248
+ ["en", ["", "e"]],
1249
+ ["et", ["", "e"]],
1250
+ // Past tense / participle
1251
+ ["te", ["", "e"]],
1252
+ ["de", ["", "e"]],
1253
+ ["dde", [""]],
1254
+ // Indefinite plural
1255
+ ["er", ["", "e"]],
1256
+ // Comparative / superlative
1257
+ ["ere", [""]],
1258
+ ["est", [""]],
1259
+ ["este", [""]],
1260
+ // Present tense
1261
+ ["ar", ["", "e"]],
1262
+ // General
1263
+ ["t", ["", "e"]],
1264
+ ["a", ["", "e"]],
1265
+ ["s", [""]],
1266
+ ["e", [""]],
1267
+ ["n", ["", "e"]],
1268
+ ["r", ["", "e"]]
1269
+ ];
1270
+ function lemmatizeNb(dict, word) {
1271
+ const modern = modernizeNb(word);
1272
+ for (const m of modern) {
1273
+ if (dict[m]) {
1274
+ return dict[m];
1275
+ }
1276
+ const lower = m.toLowerCase();
1277
+ if (dict[lower]) {
1278
+ return dict[lower];
1279
+ }
1280
+ }
1281
+ for (const [suffix, replacements] of NB_SUFFIXES) {
1282
+ if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
1283
+ const stem = word.slice(0, -suffix.length);
1284
+ const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
1285
+ if (ipa) {
1286
+ return ipa;
1287
+ }
1288
+ }
1289
+ }
1290
+ for (const [suffix, replacements] of NB_SUFFIXES) {
1291
+ if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
1292
+ const stem = word.slice(0, -suffix.length);
1293
+ for (const r of replacements) {
1294
+ const candidate = stem + r;
1295
+ const modernized = modernizeNb(candidate);
1296
+ for (const m of modernized) {
1297
+ if (dict[m]) {
1298
+ return dict[m];
1299
+ }
1300
+ }
1301
+ }
1302
+ }
1303
+ }
1304
+ return void 0;
1305
+ }
1306
+ var MA_SUFFIXES = ["nya", "mu", "ku", "kan", "an", "lah", "kah", "i"];
1307
+ var MA_PREFIXES = [
1308
+ ["memper", [""]],
1309
+ ["member", [""]],
1310
+ ["menge", [""]],
1311
+ ["meny", ["s", "c"]],
1312
+ ["meng", ["k", "g", "h", ""]],
1313
+ ["mem", ["p", "b", "f", ""]],
1314
+ ["men", ["t", "d", "c", "j", ""]],
1315
+ ["me", [""]],
1316
+ ["diper", [""]],
1317
+ ["ber", [""]],
1318
+ ["per", [""]],
1319
+ ["ter", [""]],
1320
+ ["di", [""]],
1321
+ ["ke", [""]],
1322
+ ["se", [""]],
1323
+ ["ku", [""]]
1324
+ ];
1325
+ function lemmatizeMa(dict, word) {
1326
+ for (const suffix of MA_SUFFIXES) {
1327
+ if (word.length > suffix.length + 2 && word.endsWith(suffix)) {
1328
+ const stem = word.slice(0, -suffix.length);
1329
+ if (dict[stem]) {
1330
+ return dict[stem];
1331
+ }
1332
+ const fromPrefix = tryMaPrefixStrip(dict, stem);
1333
+ if (fromPrefix) {
1334
+ return fromPrefix;
1335
+ }
1336
+ }
1337
+ }
1338
+ return tryMaPrefixStrip(dict, word);
1339
+ }
1340
+ function tryMaPrefixStrip(dict, word) {
1341
+ for (const [prefix, restorations] of MA_PREFIXES) {
1342
+ if (word.length > prefix.length + 1 && word.startsWith(prefix)) {
1343
+ const remainder = word.slice(prefix.length);
1344
+ if (dict[remainder]) {
1345
+ return dict[remainder];
1346
+ }
1347
+ for (const consonant of restorations) {
1348
+ if (consonant) {
1349
+ const restored = consonant + remainder;
1350
+ if (dict[restored]) {
1351
+ return dict[restored];
1352
+ }
1353
+ }
1354
+ }
1355
+ }
1356
+ }
1357
+ return void 0;
1358
+ }
1359
+ var ZWNJ = "\u200C";
1360
+ function lemmatizeFa(dict, word) {
1361
+ if (word.includes(ZWNJ)) {
1362
+ const parts = word.split(ZWNJ);
1363
+ for (const part of parts) {
1364
+ if (dict[part]) {
1365
+ return dict[part];
1366
+ }
1367
+ }
1368
+ if (parts.length === 2 && (parts[0] === "\u0645\u06CC" || parts[0] === "\u0646\u0645\u06CC")) {
1369
+ const verb = parts[1];
1370
+ if (dict[verb]) {
1371
+ return dict[verb];
1372
+ }
1373
+ for (const ending of ["\u0646\u062F", "\u0645", "\u06CC", "\u062F", "\u06CC\u0645", "\u06CC\u062F"]) {
1374
+ if (verb.endsWith(ending) && verb.length > ending.length) {
1375
+ const stem = verb.slice(0, -ending.length);
1376
+ if (dict[stem]) {
1377
+ return dict[stem];
1378
+ }
1379
+ }
1380
+ }
1381
+ }
1382
+ const joined = parts.join("");
1383
+ if (dict[joined]) {
1384
+ return dict[joined];
1385
+ }
1386
+ }
1387
+ for (const suffix of [
1388
+ "\u0647\u0627\u06CC\u06CC",
1389
+ "\u0647\u0627\u06CC",
1390
+ "\u0647\u0627",
1391
+ "\u0627\u06CC",
1392
+ "\u0627\u0646",
1393
+ "\u0627\u062A",
1394
+ "\u06CC\u0646",
1395
+ "\u062A\u0631",
1396
+ "\u062A\u0631\u06CC\u0646",
1397
+ "\u0634",
1398
+ "\u0645",
1399
+ "\u062A"
1400
+ ]) {
1401
+ if (word.endsWith(suffix) && word.length > suffix.length + 1) {
1402
+ const stem = word.slice(0, -suffix.length);
1403
+ if (dict[stem]) {
1404
+ return dict[stem];
1405
+ }
1406
+ }
1407
+ }
1408
+ return void 0;
1409
+ }
1410
+
1411
+ // src/overrides/ar.ts
1412
+ var ar = {
1413
+ \u0623\u062D\u062F\u0627: "/\u0294a\u02C8\u0127adan/",
1414
+ // anyone
1415
+ \u0623\u062D\u062F\u0647\u0645\u0627: "/\u0294a\u0127adu\u02C8huma\u02D0/",
1416
+ // one of them two
1417
+ \u0623\u062D\u0631\u0627\u0631\u0627: "/\u0294a\u0127\u02C8ra\u02D0ran/",
1418
+ \u0623\u0633\u062E\u064A\u0644\u064A\u0648\u0633: "/\u0294asxi\u02D0\u02C8li\u02D0ju\u02D0s/",
1419
+ // Aeschylus
1420
+ \u0623\u0645\u0647\u0627\u062A: "/um\u02D0a\u02C8ha\u02D0t/",
1421
+ \u0623\u0646\u0633\u064A\u062A: "/\u0294un\u02C8si\u02D0t/",
1422
+ \u0623\u0646\u0639\u0645\u062A: "/\u0294an\u02C8\u0295amta/",
1423
+ \u0623\u0648\u0631\u0641\u0644\u064A\u0633: "/\u0294ur\u02C8fali\u02D0s/",
1424
+ // Orphalese (Gibran)
1425
+ \u0623\u064A\u062F\u064A\u0647\u0645: "/\u0294aj\u02C8di\u02D0him/",
1426
+ \u0627\u0633\u062A\u064A\u0642\u0638\u062A: "/istaj\u02C8qa\xF0\u02E4tu/",
1427
+ \u0627\u0633\u0645\u0647: "/\u02C8ismuhu/",
1428
+ // his name
1429
+ \u0627\u0639\u062A\u0632\u0627\u0644\u0647: "/i\u0295ti\u02C8za\u02D0lihi/",
1430
+ // his seclusion
1431
+ \u0627\u0644\u0623\u0642\u0627\u0648\u0627\u0644: "/al\u0294aqa\u02D0\u02C8wi\u02D0l/",
1432
+ // the sayings/talk
1433
+ \u0627\u0644\u0623\u0644\u064A\u0641\u0629: "/al\u0294a\u02C8li\u02D0fah/",
1434
+ \u0627\u0644\u0623\u0648\u0644\u0649: "/al\u02C8\u0294u\u02D0la\u02D0/",
1435
+ // the first (fem.)
1436
+ \u0627\u0644\u062D\u064A\u0627\u0629: "/al\u0127a\u02C8ja\u02D0h/",
1437
+ \u0627\u0644\u062E\u0644\u064A\u0641\u0629: "/alxa\u02C8li\u02D0fah/",
1438
+ \u0627\u0644\u062E\u0644\u064A\u0642\u0629: "/alxa\u02C8li\u02D0qah/",
1439
+ // creation/nature
1440
+ \u0627\u0644\u0630\u0643\u0631\u064A\u0627\u062A: "/a\xF0\u02D0ikra\u02C8ja\u02D0t/",
1441
+ \u0627\u0644\u0633\u0645\u0627\u0648\u0627\u062A: "/as\u02D0ama\u02D0\u02C8wa\u02D0t/",
1442
+ \u0627\u0644\u0636\u0627\u0644\u064A\u0646: "/ad\u02E4\u02D0a\u02D0\u02C8li\u02D0n/",
1443
+ \u0627\u0644\u0639\u0627\u0644\u0645\u064A\u0646: "/al\u0295a\u02D0la\u02C8mi\u02D0n/",
1444
+ \u0627\u0644\u0639\u0634\u0631\u0629: "/al\u02C8\u0295a\u0283ara/",
1445
+ // the ten
1446
+ \u0627\u0644\u063A\u0631\u0628\u0629: "/al\u02C8\u0263urbah/",
1447
+ \u0627\u0644\u063A\u0632\u0627\u0629: "/al\u0263u\u02C8za\u02D0h/",
1448
+ \u0627\u0644\u0643\u0631\u0627\u0645\u0629: "/alka\u02C8ra\u02D0mah/",
1449
+ \u0627\u0644\u0644\u0648\u0649: "/al\u02C8liwa\u02D0/",
1450
+ \u0627\u0644\u0645\u0635\u0637\u0641\u0649: "/almus\u02C8t\u02E4afa\u02D0/",
1451
+ \u0627\u0644\u0646\u062F\u0648\u0627\u062A: "/annada\u02C8wa\u02D0t/",
1452
+ // the seminars/forums
1453
+ \u0627\u0645\u0631\u0623\u0629: "/im\u02C8ra\u0294ah/",
1454
+ \u0627\u0647\u062F\u0646\u0627: "/ih\u02C8dina\u02D0/",
1455
+ \u0628\u0623\u0648\u0642\u0627\u0641\u0647: "/bi\u0294aw\u02C8qa\u02D0fihi/",
1456
+ // with his endowments
1457
+ \u0628\u0625\u0630\u0646\u0647: "/bi\u0294i\xF0\u02C8nihi/",
1458
+ \u0628\u0628\u0631\u0642\u0629: "/bi\u02C8barqah/",
1459
+ // in Barqa (place)
1460
+ \u0628\u062C\u0632\u0627\u0626\u0631: "/bid\u0292a\u02C8za\u02D0\u0294ir/",
1461
+ // with islands
1462
+ \u0628\u062F\u0627\u064A\u0629: "/bi\u02C8da\u02D0jah/",
1463
+ \u0628\u0630\u064A: "/bi\u02C8\xF0i\u02D0/",
1464
+ // of importance
1465
+ \u0628\u0634\u064A\u0621: "/bi\u02C8\u0283aj\u0294/",
1466
+ \u0628\u0639\u0636\u0627: "/\u02C8ba\u0295d\u02E4an/",
1467
+ \u0628\u0639\u0636\u0647\u0645: "/ba\u0295\u02C8d\u02E4uhum/",
1468
+ \u0628\u0639\u064A\u0646\u0647: "/bi\u02C8\u0295ajnihi/",
1469
+ \u0628\u0639\u064A\u0646\u064A\u0647\u0627: "/bi\u0295aj\u02C8najha\u02D0/",
1470
+ // Egyptian National Anthem — بلادي بلادي
1471
+ \u0628\u0644\u0627\u062F\u064A: "/bi\u02C8la\u02D0di\u02D0/",
1472
+ // my country
1473
+ \u0628\u064A\u062F\u0628\u0627: "/bajda\u02C8ba\u02D0/",
1474
+ // Bidpai
1475
+ \u062A\u0623\u062E\u0630\u0647: "/ta\u0294\u02C8xu\xF0uhu/",
1476
+ \u062A\u0633\u0642\u0646\u064A: "/tas\u02C8qini\u02D0/",
1477
+ \u062A\u0639\u0644\u0645\u062A: "/ta\u0295al\u02D0\u02C8amtu/",
1478
+ \u062A\u0642\u0641\u0646: "/taqif\u02C8na/",
1479
+ \u062A\u0642\u0644\u0628\u062A: "/taqal\u02D0a\u02C8bat/",
1480
+ // it turned/fluctuated
1481
+ \u062A\u0643\u0644\u0645\u064A: "/takal\u02D0a\u02C8mi\u02D0/",
1482
+ // speak! (fem.)
1483
+ \u062B\u0647\u0645\u062F: "/\u02C8\u03B8ahmad/",
1484
+ // Thahmad (place name)
1485
+ \u062C\u0644\u0633\u062A: "/d\u0292a\u02C8lasat/",
1486
+ \u062D\u0627\u0631\u062A\u0646\u0627: "/\u0127a\u02D0\u02C8ratna\u02D0/",
1487
+ \u062D\u0641\u0638\u0647\u0645\u0627: "/\u0127if\u02C8\xF0\u02E4ahuma\u02D0/",
1488
+ \u062D\u0643\u0627\u064A\u0629: "/\u0127i\u02C8ka\u02D0jah/",
1489
+ \u062D\u064A\u0627\u062A\u0647: "/\u0127a\u02C8ja\u02D0tahu/",
1490
+ \u062E\u0641\u064A\u0641\u0627: "/xa\u02C8fi\u02D0fan/",
1491
+ \u062E\u0644\u0627\u0644\u0647\u0627: "/xi\u02C8la\u02D0laha\u02D0/",
1492
+ \u062E\u0644\u0641\u0647\u0645: "/xal\u02C8fahum/",
1493
+ \u062E\u0645\u0631\u0627: "/\u02C8xamran/",
1494
+ \u062F\u0628\u0634\u0644\u064A\u0645: "/dab\u0283a\u02C8li\u02D0m/",
1495
+ // King Dabshelim
1496
+ \u0631\u0627\u0626\u062D\u0629: "/\u02C8ra\u02D0\u0294i\u0127ah/",
1497
+ \u0631\u0627\u0641\u0639\u0627: "/\u02C8ra\u02D0fi\u0295an/",
1498
+ \u0631\u0633\u0645\u0647\u0627: "/ras\u02C8muha\u02D0/",
1499
+ \u0631\u064A\u0627\u062D\u0643: "/ri\u02C8ja\u02D0\u0127aka/",
1500
+ \u0632\u0639\u0645\u0648\u0627: "/za\u02C8\u0295amu\u02D0/",
1501
+ \u0632\u0645\u0627\u0646\u0647: "/za\u02C8ma\u02D0nihi/",
1502
+ \u0633\u0627\u062F\u062A\u064A: "/sa\u02D0\u02C8dati\u02D0/",
1503
+ \u0633\u0627\u0633\u0627\u0646: "/sa\u02D0\u02C8sa\u02D0n/",
1504
+ // Sasanian dynasty
1505
+ \u0633\u0628\u0639\u0629: "/\u02C8sab\u0295ah/",
1506
+ \u0633\u062A\u062D\u0645\u0644\u0647: "/sata\u0127\u02C8miluhu/",
1507
+ \u0633\u0641\u064A\u0646\u062A\u0647: "/sa\u02C8fi\u02D0natuhu/",
1508
+ \u0633\u0643\u0631\u0629: "/\u02C8sakrah/",
1509
+ \u0633\u0646\u0629: "/\u02C8sanah/",
1510
+ \u0633\u0646\u064A\u0646: "/si\u02C8ni\u02D0n/",
1511
+ \u0634\u0647\u0631\u064A\u0627\u0631: "/\u0283ahri\u02C8ja\u02D0r/",
1512
+ // King Shahryar (1001 Nights)
1513
+ \u0634\u064A\u0626\u0627: "/\u02C8\u0283aj\u0294an/",
1514
+ \u0635\u0628\u0627\u062D\u0627: "/s\u02E4a\u02C8ba\u02D0\u0127an/",
1515
+ // morning
1516
+ \u0635\u062D\u0628\u064A: "/\u02C8s\u02E4a\u0127bi\u02D0/",
1517
+ // my companions
1518
+ \u0635\u063A\u0627\u0631\u0647\u0627: "/s\u02E4i\u02C8\u0263a\u02D0ruha\u02D0/",
1519
+ \u0637\u0627\u0644\u0639\u0627: "/\u02C8t\u02E4a\u02D0li\u0295an/",
1520
+ \u0637\u0648\u064A\u0644\u0629: "/t\u02E4a\u02C8wi\u02D0lah/",
1521
+ // Ibn Khaldun — المقدمة
1522
+ \u0638\u0627\u0647\u0631\u0647: "/\xF0\u02E4a\u02D0\u02C8hirihi/",
1523
+ // in its outward appearance
1524
+ \u0639\u0627\u0645\u0627: "/\u02C8\u0295a\u02D0man/",
1525
+ \u0639\u0628\u0644\u0629: "/\u02C8\u0295ablah/",
1526
+ // Abla (name)
1527
+ \u0639\u062C\u064A\u0628\u0629: "/\u0295a\u02C8d\u0292i\u02D0bah/",
1528
+ \u0639\u062F\u062A: "/\u02C8\u0295udtu/",
1529
+ \u0639\u0631\u0641\u062A: "/\u0295a\u02C8rafta/",
1530
+ // you knew
1531
+ \u0639\u0635\u0631\u0647: "/\u02C8\u0295as\u02E4rihi/",
1532
+ \u0639\u0644\u0645\u0647: "/\u02C8\u0295ilmihi/",
1533
+ \u0639\u0644\u064A\u0643: "/\u0295a\u02C8lajka/",
1534
+ \u0639\u0644\u064A\u0647\u0645: "/\u0295a\u02C8lajhim/",
1535
+ \u0639\u0646\u062F\u0647: "/\u02C8\u0295indahu/",
1536
+ \u0639\u0646\u0647\u0627: "/\u0295an\u02C8ha\u02D0/",
1537
+ // about it
1538
+ \u063A\u0627\u064A\u062A\u064A: "/\u0263a\u02D0\u02C8jati\u02D0/",
1539
+ // my goal
1540
+ \u063A\u064A\u0628\u0629: "/\u02C8\u0263ajbah/",
1541
+ \u0641\u0625\u0646: "/fa\u02C8\u0294in/",
1542
+ \u0641\u0627\u0633\u0642\u0646\u064A: "/fas\u02C8qini\u02D0/",
1543
+ \u0641\u0627\u063A\u062A\u0646\u0645\u0647\u0627: "/fa\u0263\u02C8tanimha\u02D0/",
1544
+ \u0641\u0627\u0644\u0645\u0642\u0631\u0627\u0629: "/fal\u02C8miqra\u02D0h/",
1545
+ \u0641\u062C\u0631\u0627: "/\u02C8fad\u0292ran/",
1546
+ \u0641\u062D\u0648\u0645\u0644: "/fa\u02C8\u0127awmal/",
1547
+ \u0641\u0645\u0627: "/fa\u02C8ma\u02D0/",
1548
+ \u0641\u064A\u0647: "/fi\u02D0hi/",
1549
+ \u0641\u064A\u0647\u0627: "/\u02C8fi\u02D0ha\u02D0/",
1550
+ \u0642\u0627\u0644\u062A: "/\u02C8qa\u02D0lat/",
1551
+ \u0642\u0635\u0629: "/\u02C8qis\u02E4\u02D0ah/",
1552
+ \u0642\u0644\u062A: "/\u02C8qultu/",
1553
+ // I said
1554
+ \u0643\u062A\u0627\u0628\u0627\u062A: "/kita\u02D0\u02C8ba\u02D0t/",
1555
+ \u0643\u0631\u0633\u064A\u0647: "/kur\u02C8sij\u02D0uhu/",
1556
+ \u0644\u062E\u0648\u0644\u0629: "/lixaw\u02C8latah/",
1557
+ // for Khawla (name)
1558
+ // Tarafa ibn al-Abd — المعلقة
1559
+ \u0644\u0643\u0650: "/\u02C8laki/",
1560
+ // to you (fem.)
1561
+ \u0644\u0646\u064A\u0644\u0643: "/li\u02C8najlika/",
1562
+ // for your Nile
1563
+ \u0644\u0648\u0647\u0644\u0629: "/li\u02C8wahlah/",
1564
+ \u0645\u0628\u0646\u064A\u0629: "/mab\u02C8nij\u02D0ah/",
1565
+ // Antara ibn Shaddad — المعلقة
1566
+ \u0645\u062A\u0631\u062F\u0645: "/muta\u02C8rad\u02D0im/",
1567
+ // worn out/in ruins
1568
+ \u0645\u062A\u0642\u0637\u0639\u0627: "/mutaqat\u02E4\u02C8t\u02E4i\u0295an/",
1569
+ \u0645\u062F\u064A\u0646\u0629: "/ma\u02C8di\u02D0nah/",
1570
+ \u0645\u0637\u064A\u0647\u0645: "/mat\u02E4aj\u02D0a\u02C8hum/",
1571
+ // their riding beasts
1572
+ // Abu al-Ala al-Ma'arri — غير مجد في ملتي
1573
+ \u0645\u0644\u062A\u064A: "/mil\u02D0a\u02C8ti\u02D0/",
1574
+ // my religion/creed
1575
+ \u0645\u0647\u064A\u0627\u0631: "/mah\u02C8ja\u02D0r/",
1576
+ // Mahyar (Adonis)
1577
+ \u0645\u064A\u0644\u0627\u062F\u0647: "/mi\u02D0\u02C8la\u02D0dihi/",
1578
+ \u0646\u0633\u062C\u062A\u0647\u0627: "/nasa\u02C8d\u0292atha\u02D0/",
1579
+ \u0646\u0648\u0645\u0627: "/\u02C8nawman/",
1580
+ \u0647\u0627\u062F\u0645\u0627: "/\u02C8ha\u02D0diman/",
1581
+ \u0647\u0648\u0630\u0627: "/ha\u02D0\u02C8\xF0a\u02D0/",
1582
+ \u0648\u0623\u0639\u0648\u0627\u0646: "/wa\u0294a\u0295\u02C8wa\u02D0n/",
1583
+ // and followers
1584
+ \u0648\u0625\u0646\u0645\u0627: "/wa\u0294in\u02D0a\u02C8ma\u02D0/",
1585
+ \u0648\u0625\u064A\u0627\u0643: "/wa\u0294ij\u02C8ja\u02D0ka/",
1586
+ \u0648\u0627\u0633\u0644\u0645\u064A: "/wasla\u02C8mi\u02D0/",
1587
+ // and farewell/be safe (fem.)
1588
+ \u0648\u0627\u0639\u062A\u0642\u0627\u062F\u064A: "/wa\u0294i\u0295tiqa\u02D0\u02C8di\u02D0/",
1589
+ // and my belief
1590
+ \u0648\u0627\u0644\u0633\u0646\u0629: "/was\u02D0\u02C8anah/",
1591
+ \u0648\u0627\u0644\u0642\u0627\u0644: "/wal\u02C8qa\u02D0l/",
1592
+ // and the talk
1593
+ \u0648\u0627\u0644\u0645\u062D\u0628\u0629: "/walma\u02C8\u0127ab\u02D0ah/",
1594
+ \u0648\u0628\u0634\u0631\u0648\u0637\u0647: "/wabi\u0283u\u02C8ru\u02D0tihi/",
1595
+ // and his conditions
1596
+ \u0648\u062D\u0643\u0627\u064A\u0627\u062A: "/wa\u0127ika\u02D0\u02C8ja\u02D0t/",
1597
+ \u0648\u0634\u0628\u064A\u0647: "/wa\u0283a\u02C8bi\u02D0h/",
1598
+ // and similar to
1599
+ \u0648\u0634\u0645\u0623\u0644: "/wa\u0283a\u02C8ma\u0294l/",
1600
+ \u0648\u0636\u0639\u0647: "/\u02C8wad\u02E4\u0295ihi/",
1601
+ \u0648\u0639\u0644\u0649: "/wa\u02C8\u0295ala\u02D0/",
1602
+ // and upon
1603
+ \u0648\u0639\u0644\u064A\u0647\u0645: "/wa\u0295a\u02C8lajhim/",
1604
+ \u0648\u063A\u0627\u0628: "/wa\u02C8\u0263a\u02D0b/",
1605
+ \u0648\u0641\u0624\u0627\u062F\u064A: "/wafu\u02C8\u0294a\u02D0di\u02D0/",
1606
+ // and my heart
1607
+ \u0648\u0641\u0637\u0646\u0629: "/wa\u02C8fit\u02E4nah/",
1608
+ \u0648\u0642\u0648\u0641\u0627: "/wu\u02C8qu\u02D0fan/",
1609
+ // standing
1610
+ \u0648\u0645\u0627: "/wa\u02C8ma\u02D0/",
1611
+ \u0648\u0645\u062A\u0633\u0627\u0648\u064A\u0646: "/wamutasa\u02D0\u02C8wi\u02D0n/",
1612
+ \u0648\u0645\u0646\u0632\u0644: "/wa\u02C8manzil/",
1613
+ \u0648\u0647\u0628\u0648\u0627: "/wu\u02C8hibu\u02D0/",
1614
+ \u064A\u0624\u0648\u062F\u0647: "/ja\u0294u\u02D0\u02C8duhu/",
1615
+ \u064A\u062A\u062E\u0637\u0649: "/jata\u02C8xat\u02E4\u02D0a\u02D0/",
1616
+ \u064A\u062D\u064A\u0637\u0648\u0646: "/ju\u0127i\u02D0\u02C8t\u02E4u\u02D0n/",
1617
+ \u064A\u0636\u0639\u0647: "/jad\u02E4a\u02C8\u0295uhu/",
1618
+ \u064A\u0639\u0627\u0645\u0644\u0648\u0627: "/ju\u02C8\u0295a\u02D0milu\u02D0/",
1619
+ \u064A\u0642\u0648\u0644\u0648\u0646: "/jaqu\u02D0\u02C8lu\u02D0n/",
1620
+ // they say
1621
+ \u064A\u0647\u062A\u0645\u0648\u0627: "/jahtam\u02C8mu\u02D0/"
1622
+ // they cared
1623
+ };
1624
+
1625
+ // src/overrides/de.ts
1626
+ var de = {
1627
+ abendsonnenschein: "/\u02C8a\u02D0b\u0259nt\u02CCz\u0254n\u0259n\u0283a\u026A\u032Fn/",
1628
+ // Foreign city names in German texts
1629
+ Aires: "/\u02C8a\u026A\u032F\u0281\u025Bs/",
1630
+ andrer: "/\u02C8and\u0281\u0250/",
1631
+ balde: "/\u02C8bald\u0259/",
1632
+ bewu\u00DFtsein: "/b\u0259\u02C8v\u028Ast\u02CCza\u026A\u032Fn/",
1633
+ bl\u00FCtenschimmer: "/\u02C8bly\u02D0t\u0259n\u02CC\u0283\u026Am\u0250/",
1634
+ brahmanensohn: "/\u02C8b\u0281a\u02D0ma\u02D0n\u0259n\u02CCzo\u02D0n/",
1635
+ Buenos: "/\u02C8bue\u02D0n\u0254s/",
1636
+ "c\u2019est": "/s\u025B/",
1637
+ // French in Mann's Buddenbrooks
1638
+ ch\u00E8re: "/\u0283\u025B\u02D0\u0281/",
1639
+ // French in Mann's Buddenbrooks
1640
+ da\u00DF: "/das/",
1641
+ "davos-platz": "/\u02C8da\u02D0v\u0254s\u02CCplats/",
1642
+ // Swiss resort area
1643
+ demoiselle: "/d\u0259mwa\u02C8z\u025Bl/",
1644
+ // French in Mann's Buddenbrooks
1645
+ d\u00FCwel: "/\u02C8dy\u02D0v\u0259l/",
1646
+ // Low German: devil
1647
+ fa\u00DFt: "/fast/",
1648
+ feuertrunken: "/\u02C8f\u0254\u026A\u032F\u0250\u02CCt\u0281\u028A\u014Bk\u0259n/",
1649
+ flu\u00DFufers: "/\u02C8fl\u028As\u02CC\u0294u\u02D0f\u0250s/",
1650
+ fr\u00FChlingsnachmittag: "/\u02C8f\u0281y\u02D0l\u026A\u014Bs\u02CCna\u02D0xm\u026Ata\u02D0k/",
1651
+ gefahrdrohende: "/\u0261\u0259\u02C8fa\u02D0\u0250\u032F\u02CCd\u0281o\u02D0\u0259nd\u0259/",
1652
+ gek\u00FC\u00DFt: "/\u0261\u0259\u02C8k\u028Fst/",
1653
+ g\u00F6tterfunken: "/\u02C8\u0261\u0153t\u0250\u02CCf\u028A\u014Bk\u0259n/",
1654
+ govinda: "/\u0261o\u02C8v\u026Anda/",
1655
+ // Sanskrit name
1656
+ graub\u00FCndischen: "/\u0261\u0281a\u028A\u032F\u02C8b\u028Fnd\u026A\u0283\u0259n/",
1657
+ guizot: "/\u0261i\u02C8zo\u02D0/",
1658
+ // French name
1659
+ h\u00E4tt: "/h\u025Bt/",
1660
+ // contraction of hätte
1661
+ "ich\u2019s": "/\u026A\xE7s/",
1662
+ k\u00FC\u00DFnacht: "/\u02C8k\u028Fsnaxt/",
1663
+ macheath: "/m\u0259\u02C8ki\u02D0\u03B8/",
1664
+ // English name
1665
+ metternich: "/\u02C8m\u025Bt\u0250n\u026A\xE7/",
1666
+ m\u00FC\u00DFt: "/m\u028Fst/",
1667
+ mu\u00DFte: "/\u02C8m\u028Ast\u0259/",
1668
+ New: "/nju\u02D0/",
1669
+ ook: "/o\u02D0k/",
1670
+ // Low German: also
1671
+ "prinz-regentenstra\xDFe": "/\u02C8p\u0281\u026Ants\u0281e\u02CC\u0261\u025Bnt\u0259n\u02CC\u0283t\u0281a\u02D0s\u0259/",
1672
+ question: "/k\u025Bs\u02C8tj\u0254\u0303/",
1673
+ // French in Mann's Buddenbrooks
1674
+ salwaldes: "/\u02C8zal\u02CCvald\u0259s/",
1675
+ // Sal forest
1676
+ samsa: "/\u02C8zamza/",
1677
+ // Kafka character
1678
+ schriee: "/\u02C8\u0283\u0281i\u02D0\u0259/",
1679
+ siddhartha: "/z\u026A\u02C8da\u02D0\u0281ta/",
1680
+ sternklar: "/\u02C8\u0283t\u025B\u0281nkla\u02D0\u0250\u032F/",
1681
+ tr\u00E8s: "/t\u0281\u025B/",
1682
+ // French in Mann's Buddenbrooks
1683
+ verliess: "/f\u025B\u0250\u032F\u02C8li\u02D0s/",
1684
+ York: "/j\u0254\u0281k/",
1685
+ zarathustra: "/tsa\u0281a\u02C8t\u028Ast\u0281a/"
1686
+ };
1687
+
1688
+ // src/overrides/eo.ts
1689
+ var eo = {
1690
+ // G2P handles all Esperanto words with correct penultimate stress.
1691
+ // Overrides here are only needed for words where the IPA dictionary
1692
+ // has an incorrect entry that needs correction.
1693
+ };
1694
+
1695
+ // src/overrides/es.ts
1696
+ var es = {
1697
+ arts: "/a\u0281/",
1698
+ // French "Arts" in Cortázar (Pont des Arts)
1699
+ aureliano: "/aw\u027Ee\u02C8ljano/",
1700
+ beatriz: "/bea\u02C8t\u027Eis/",
1701
+ buend\u00EDa: "/bwen\u02C8dia/",
1702
+ ca\u00F1abrava: "/ka\u0272a\u02C8\u03B2\u027Ea\u03B2a/",
1703
+ conti: "/\u02C8konti/",
1704
+ // Italian name in Cortázar
1705
+ fierro: "/\u02C8fje\u027Eo/",
1706
+ macondo: "/ma\u02C8kondo/",
1707
+ pont: "/p\u0254\u0303/",
1708
+ // French in Cortázar
1709
+ porfirio: "/po\u027E\u02C8fi\u027Ejo/",
1710
+ quai: "/ke/",
1711
+ // French in Cortázar
1712
+ qued\u00F3se: "/ke\u02C8\xF0ose/",
1713
+ rue: "/\u0281y/",
1714
+ // French in Cortázar
1715
+ seine: "/s\u025Bn/",
1716
+ // French river in Cortázar
1717
+ s\u00E9pase: "/\u02C8sepase/",
1718
+ urbino: "/u\u027E\u02C8\u03B2ino/",
1719
+ vacilante: "/basi\u02C8lante/",
1720
+ viterbo: "/bi\u02C8te\u027E\u03B2o/"
1721
+ };
1722
+
1723
+ // src/overrides/fa.ts
1724
+ var fa = {
1725
+ \u0622\u0628\u0650: "/\u0252\u02D0be/",
1726
+ \u0622\u0631\u0627\u06CC: "/\u0252\u02D0\u027E\u0252\u02D0j/",
1727
+ \u0622\u0641\u0631\u06CC\u0646: "/\u0252\u02D0f\xE6\u027Ei\u02D0n/",
1728
+ // creator
1729
+ \u0622\u0645\u0648\u06CC: "/\u0252\u02D0mu\u02D0j/",
1730
+ \u0622\u0648\u0627\u0631\u0647: "/\u0252\u02D0v\u0252\u02D0\u027Ee/",
1731
+ // wandering, exiled
1732
+ \u0622\u0648\u0627\u0632\u0647: "/\u0252\u02D0v\u0252\u02D0ze/",
1733
+ // fame, renown
1734
+ \u0622\u06CC\u064E\u062F: "/\u0252\u02D0j\xE6d/",
1735
+ // --- Forough Farrokhzad, Another Birth ---
1736
+ \u0622\u06CC\u0647: "/\u0252\u02D0je/",
1737
+ // verse (of scripture)
1738
+ \u0627\u0628\u062F\u06CC: "/\xE6b\xE6di\u02D0/",
1739
+ // eternal
1740
+ \u0627\u0628\u0648\u0627\u0644\u0641\u0636\u0644: "/\xE6bolf\xE6zl/",
1741
+ \u0627\u062C\u062A\u0645\u0627\u0639\u06CC: "/ed\u0361\u0292tem\u0252\u02D0\u0294i\u02D0/",
1742
+ \u0627\u062F\u0631: "/\xE6de\u027E/",
1743
+ // pour! (Arabic imperative)
1744
+ \u0627\u0633\u0627\u0633: "/\xE6s\u0252\u02D0s/",
1745
+ // --- Iranian Constitution ---
1746
+ \u0627\u0633\u0627\u0633\u06CC: "/\xE6s\u0252\u02D0si\u02D0/",
1747
+ \u0627\u0635\u0648\u0644: "/osu\u02D0l/",
1748
+ \u0627\u0641\u062A\u0627\u062F: "/oft\u0252\u02D0d/",
1749
+ // fell, befell
1750
+ // --- UDHR Article 1 ---
1751
+ \u0627\u0641\u0631\u0627\u062F: "/\xE6f\u027E\u0252\u02D0d/",
1752
+ // individuals
1753
+ \u0627\u0642\u062A\u0635\u0627\u062F\u06CC: "/e\u0262tes\u0252\u02D0di\u02D0/",
1754
+ // --- Hafez, Divan (Ghazal 1) ---
1755
+ \u0627\u0644\u0627: "/\xE6l\u0252\u02D0/",
1756
+ // O! behold
1757
+ \u0627\u0644\u0633\u0627\u0642\u06CC: "/\xE6ss\u0252\u02D0\u0262i\u02D0/",
1758
+ // the cupbearer (Arabic definite)
1759
+ \u0627\u0644\u0644\u0651\u0647: "/\xE6ll\u0252\u02D0h/",
1760
+ \u0627\u0646\u062F\u0631\u0634: "/\xE6nd\xE6\u027E\xE6\u0283/",
1761
+ // within it
1762
+ \u0627\u0646\u0632\u0648\u0627: "/enzev\u0252\u02D0/",
1763
+ // seclusion
1764
+ \u0627\u0646\u0639\u06A9\u0627\u0633: "/en\u0294ek\u0252\u02D0s/",
1765
+ // --- Sepehri ---
1766
+ \u0627\u0647\u0644: "/\xE6hl/",
1767
+ \u0627\u06CC: "/ej/",
1768
+ // O! (vocative)
1769
+ \u0627\u06CC\u0647\u0627: "/\xE6jjoh\u0252\u02D0/",
1770
+ // O! (Arabic vocative)
1771
+ \u0628\u0627\u062F\u0647\u0627\u06CC: "/b\u0252\u02D0dh\u0252\u02D0je/",
1772
+ // winds (with ezafeh)
1773
+ "\u0628\u0628\u0631\u06CC\u062F\u0647\u200C\u0627\u0646\u062F": "/bob\u027Ei\u02D0de\xE6nd/",
1774
+ // they have cut
1775
+ // --- Baba Taher ---
1776
+ \u0628\u0628\u0646\u062F\u0645: "/beb\xE6nd\xE6m/",
1777
+ \u0628\u062A\u0627\u0628\u062F: "/bet\u0252\u02D0b\xE6d/",
1778
+ // will shine
1779
+ \u0628\u062C\u0627\u0646: "/bed\u0361\u0292\u0252\u02D0n/",
1780
+ // --- Attar, Conference of the Birds ---
1781
+ \u0628\u062E\u0634\u06CC\u062F: "/b\xE6x\u0283i\u02D0d/",
1782
+ \u0628\u062F\u0627\u0631: "/bed\u0252\u02D0\u027E/",
1783
+ \u0628\u062F\u0627\u0634\u062A: "/bed\u0252\u02D0\u0283t/",
1784
+ \u0628\u0631\u062A\u0631: "/b\xE6\u027Et\xE6\u027E/",
1785
+ // higher, superior
1786
+ \u0628\u0631\u062F: "/bo\u027Ed/",
1787
+ // will carry (subjunctive)
1788
+ \u0628\u0633\u062A\u0646\u062F: "/b\xE6st\xE6nd/",
1789
+ \u0628\u0634\u0627\u06CC\u062F: "/be\u0283\u0252\u02D0j\xE6d/",
1790
+ \u0628\u0634\u0631: "/b\xE6\u0283\xE6\u027E/",
1791
+ // humankind
1792
+ // --- Rumi, Masnavi (Song of the Reed) ---
1793
+ \u0628\u0634\u0646\u0648: "/be\u0283no/",
1794
+ // listen!
1795
+ \u0628\u0634\u0648\u06CC\u0645: "/be\u0283u\u02D0j\xE6m/",
1796
+ \u0628\u06AF\u0631\u062F\u0645: "/be\u0261\xE6\u027Ed\xE6m/",
1797
+ \u0628\u06AF\u0634\u0627\u06CC: "/be\u0261o\u0283\u0252\u02D0j/",
1798
+ \u0628\u06AF\u0648\u06CC\u0645: "/be\u0261u\u02D0j\xE6m/",
1799
+ // let me say
1800
+ \u0628\u0646\u0627\u0632\u0645: "/ben\u0252\u02D0z\xE6m/",
1801
+ // --- Saadi, Bustan ---
1802
+ \u0628\u0646\u0627\u0645: "/ben\u0252\u02D0m/",
1803
+ // in the name of
1804
+ \u0628\u0646\u0634\u0633\u062A\u0647: "/bene\u0283\xE6ste/",
1805
+ // has sat
1806
+ \u0628\u0646\u0645\u0627\u06CC: "/ben\xE6m\u0252\u02D0j/",
1807
+ // --- Rudaki ---
1808
+ \u0628\u0648\u06CC\u0650: "/bu\u02D0je/",
1809
+ \u0628\u06CC\u0647\u0642\u06CC: "/bejh\xE6\u0262i\u02D0/",
1810
+ \u067E\u064E\u0631\u0646\u06CC\u0627\u0646: "/p\xE6\u027Eni\u02D0\u0252\u02D0n/",
1811
+ // --- Sanai ---
1812
+ \u067E\u0631\u0648\u0631: "/p\xE6\u027Ev\xE6\u027E/",
1813
+ \u067E\u0633\u062A\u06CC: "/p\xE6sti\u02D0/",
1814
+ "\u067E\u0648\u0632\u0634\u200C\u067E\u0630\u06CC\u0631": "/pu\u02D0ze\u0283p\xE6zi\u02D0\u027E/",
1815
+ // excuse-accepting
1816
+ \u062A\u0627\u0631\u06CC\u06A9\u06CC\u0633\u062A: "/t\u0252\u02D0\u027Ei\u02D0ki\u02D0st/",
1817
+ // is a darkness (contraction)
1818
+ \u062A\u062D\u0642\u06CC\u0642: "/t\xE6h\u0262i\u02D0\u0262/",
1819
+ \u062A\u0648\u0641\u06CC\u0642: "/to\u028Afi\u02D0\u0262/",
1820
+ "\u062C\u062F\u0627\u06CC\u06CC\u200C\u0647\u0627": "/d\u0361\u0292od\u0252\u02D0ji\u02D0h\u0252\u02D0/",
1821
+ // separations
1822
+ \u062C\u0644: "/d\u0361\u0292\xE6ll/",
1823
+ // majesty
1824
+ \u062C\u0648\u0627\u0628\u0650: "/d\u0361\u0292\xE6v\u0252\u02D0be/",
1825
+ \u062C\u0648\u06CC\u0650: "/d\u0361\u0292u\u02D0je/",
1826
+ \u062D\u0627\u0641\u0638: "/h\u0252\u02D0fez/",
1827
+ \u062D\u0627\u0644: "/h\u0252\u02D0l/",
1828
+ // state, condition
1829
+ \u062D\u0627\u0644\u06CC: "/h\u0252\u02D0li\u02D0/",
1830
+ // pleasant (archaic: now)
1831
+ \u062D\u0642\u0648\u0642: "/ho\u0262u\u02D0\u0262/",
1832
+ // rights
1833
+ \u062D\u06A9\u06CC\u0645: "/h\xE6ki\u02D0m/",
1834
+ // wise
1835
+ \u062D\u06CC\u0627\u062A: "/h\xE6j\u0252\u02D0t/",
1836
+ // life
1837
+ \u062D\u06CC\u062B\u06CC\u062A: "/hejsi\u02D0j\xE6t/",
1838
+ // dignity
1839
+ // --- Nezami, Khosrow o Shirin ---
1840
+ \u062E\u062F\u0627\u0648\u0646\u062F\u0627: "/xod\u0252\u02D0v\xE6nd\u0252\u02D0/",
1841
+ // --- Saadi, Golestan ---
1842
+ \u062E\u062F\u0627\u06CC: "/xod\u0252\u02D0j/",
1843
+ // God (with ezafeh)
1844
+ \u062E\u0631\u062F\u0628\u062E\u0634: "/xe\u027E\xE6db\xE6x\u0283/",
1845
+ \u062E\u0637\u0627\u0628\u062E\u0634: "/x\xE6t\u0252\u02D0b\xE6x\u0283/",
1846
+ // sin-forgiving
1847
+ \u062E\u0650\u0646\u06AF\u0650: "/xen\u0261e/",
1848
+ \u062E\u0648\u0634\u0627: "/xo\u0283\u0252\u02D0/",
1849
+ \u062E\u0648\u0634\u062E\u0648\u0627\u0646: "/xo\u0283x\u0252\u02D0n/",
1850
+ // sweet-singing
1851
+ // --- Behbahani ---
1852
+ \u062E\u0648\u06CC\u0634: "/xi\u02D0\u0283/",
1853
+ \u062E\u06CC\u0632\u0631\u0627\u0646: "/xi\u02D0z\u027E\u0252\u02D0n/",
1854
+ // bamboo
1855
+ \u062F\u0627\u0631\u0627\u06CC: "/d\u0252\u02D0\u027E\u0252\u02D0je/",
1856
+ // possessing
1857
+ \u062F\u0627\u0646\u0646\u062F: "/d\u0252\u02D0n\xE6nd/",
1858
+ // they know
1859
+ \u062F\u0631\u0650: "/d\xE6\u027Ee/",
1860
+ \u062F\u064F\u0631\u0634\u062A\u06CC\u200C\u0647\u0627\u06CC: "/do\u027Eo\u0283ti\u02D0h\u0252\u02D0je/",
1861
+ \u062F\u0631\u06CC\u0686\u0647\u200C\u06CC: "/d\xE6\u027Ei\u02D0t\u0361\u0283eje/",
1862
+ \u062F\u0633\u062A\u06AF\u06CC\u0631: "/d\xE6st\u0261i\u02D0\u027E/",
1863
+ // helper
1864
+ \u062F\u0644\u06CC: "/deli/",
1865
+ \u062F\u0648\u0633\u062A\u0627\u0646\u06CC: "/du\u02D0st\u0252\u02D0ni\u02D0/",
1866
+ \u062F\u06CC\u062F\u0627\u0631: "/di\u02D0d\u0252\u02D0\u027E/",
1867
+ \u0630\u0648\u0642\u06CC: "/zo\u028A\u0262i\u02D0/",
1868
+ \u0631\u0627: "/\u027E\u0252\u02D0/",
1869
+ // object marker
1870
+ \u0631\u0627\u0632\u0642: "/\u027E\u0252\u02D0ze\u0262/",
1871
+ "\u0631\u0633\u062A\u0646\u200C\u0647\u0627\u06CC": "/\u027E\xE6st\xE6nh\u0252\u02D0je/",
1872
+ // growings (with ezafeh)
1873
+ \u0631\u0636\u06CC: "/\u027E\xE6zi\u02D0/",
1874
+ \u0631\u0641\u062A\u0627\u0631: "/\u027E\xE6ft\u0252\u02D0\u027E/",
1875
+ // behavior
1876
+ \u0631\u0647: "/\u027E\xE6h/",
1877
+ \u0631\u0647\u0650: "/\u027E\xE6he/",
1878
+ \u0631\u0647\u0646\u0645\u0627\u06CC: "/\u027E\xE6hn\xE6m\u0252\u02D0j/",
1879
+ // guide
1880
+ \u0631\u0648\u062D\u06CC\u0647: "/\u027Eu\u02D0hi\u02D0je/",
1881
+ // spirit, morale
1882
+ \u0631\u0648\u0632\u06AF\u0627\u0631\u0645: "/\u027Eu\u02D0ze\u0261\u0252\u02D0\u027E\xE6m/",
1883
+ "\u0631\u0648\u0632\u06CC\u200C\u062F\u0647": "/\u027Eu\u02D0zi\u02D0deh/",
1884
+ // sustenance-giver
1885
+ \u0631\u06CC\u06AF\u0650: "/\u027Ei\u02D0\u0261e/",
1886
+ \u0632\u0627\u06CC\u06CC\u062F\u0647: "/z\u0252\u02D0ji\u02D0de/",
1887
+ // born
1888
+ \u0632\u0628\u0627\u0646\u06CC: "/z\xE6b\u0252\u02D0ni\u02D0/",
1889
+ // --- Hedayat, The Blind Owl ---
1890
+ "\u0632\u062E\u0645\u200C\u0647\u0627\u06CC\u06CC": "/z\xE6xmh\u0252\u02D0ji\u02D0/",
1891
+ // wounds (indefinite)
1892
+ \u0632\u06CC: "/zi\u02D0/",
1893
+ \u0632\u06CC\u0652: "/zi\u02D0/",
1894
+ "\u0633\u0627\u062D\u0644\u200C\u0647\u0627": "/s\u0252\u02D0helh\u0252\u02D0/",
1895
+ // shores
1896
+ \u0633\u0628\u06A9\u0628\u0627\u0631\u0627\u0646: "/s\xE6bokb\u0252\u02D0\u027E\u0252\u02D0n/",
1897
+ // light-laden ones
1898
+ \u0633\u0628\u06A9\u062A\u06AF\u06CC\u0646: "/sobokte\u0261i\u02D0n/",
1899
+ \u0633\u062D\u0631\u06AF\u0627\u0647: "/s\xE6h\xE6\u027E\u0261\u0252\u02D0h/",
1900
+ // dawn
1901
+ // --- Nezami, Layli o Majnun ---
1902
+ \u0633\u0631\u0622\u063A\u0627\u0632: "/s\xE6r\u0252\u02D0\u0261\u0252\u02D0z/",
1903
+ \u0633\u0631\u0627\u06CC\u062F: "/so\u027E\u0252\u02D0j\xE6d/",
1904
+ \u0633\u0644\u0627\u0645\u06CC: "/s\xE6l\u0252\u02D0mi\u02D0/",
1905
+ // --- Khayyam, Rubaiyat ---
1906
+ \u0633\u0648\u062F\u0627: "/so\u028Ad\u0252\u02D0/",
1907
+ // passion, melancholy
1908
+ \u0633\u06CC\u0627\u0633\u06CC: "/sij\u0252\u02D0si\u02D0/",
1909
+ \u0634\u0627\u062F\u0645\u0627\u0646: "/\u0283\u0252\u02D0dm\u0252\u02D0n/",
1910
+ \u0634\u0631\u062D: "/\u0283\xE6\u027Eh/",
1911
+ // explanation
1912
+ \u0634\u0631\u062D\u0647: "/\u0283\xE6\u027Ehe/",
1913
+ // explanation (with ezafeh)
1914
+ \u0634\u06A9\u0627\u06CC\u062A: "/\u0283ek\u0252\u02D0j\xE6t/",
1915
+ // complaint
1916
+ "\u0634\u06A9\u0641\u062A\u0646\u200C\u0647\u0627": "/\u0283ekoft\xE6nh\u0252\u02D0/",
1917
+ // blossomings
1918
+ \u0635\u0644\u062A\u0650: "/sele\u02D0te/",
1919
+ \u0635\u0646\u0639: "/son\u0294/",
1920
+ \u0636\u0648\u0627\u0628\u0637: "/z\xE6v\u0252\u02D0bet/",
1921
+ \u0637\u0627\u0639\u062A\u0634: "/t\u0252\u02D0\u0294\xE6t\xE6\u0283/",
1922
+ // his obedience
1923
+ \u0639\u0632: "/\u0294\xE6zz/",
1924
+ // glory
1925
+ \u0639\u0645\u0631: "/om\u027E/",
1926
+ \u0639\u0646\u0647: "/\xE6nho/",
1927
+ \u063A\u0627\u0632\u06CC: "/\u0263\u0252\u02D0zi\u02D0/",
1928
+ \u063A\u0627\u06CC\u062A: "/\u0263\u0252\u02D0j\xE6t/",
1929
+ \u063A\u0632\u0644: "/\u0263\xE6z\xE6l/",
1930
+ \u063A\u0632\u0646\u0648\u06CC: "/\u0263\xE6zn\xE6vi\u02D0/",
1931
+ \u063A\u0632\u0646\u06CC: "/\u0263\xE6zni\u02D0/",
1932
+ \u0641\u0631\u0627\u0642: "/fe\u027E\u0252\u02D0\u0262/",
1933
+ // separation
1934
+ \u0641\u0631\u062F: "/f\xE6\u027Ed/",
1935
+ // alone, single
1936
+ \u0641\u0631\u0647\u0646\u06AF\u06CC: "/f\xE6rh\xE6n\u0261i\u02D0/",
1937
+ \u0641\u0631\u0648: "/fo\u027Eu\u02D0/",
1938
+ // down
1939
+ \u0641\u0644\u06A9: "/f\xE6l\xE6k/",
1940
+ \u0642\u062F\u06A9: "/\u0262\xE6d\xE6k/",
1941
+ \u0642\u0631\u0628\u062A: "/\u0262o\u027Eb\xE6t/",
1942
+ // nearness (to God)
1943
+ \u0642\u0635\u06CC\u062F\u0647\u200C\u0627\u06CC: "/\u0262\xE6si\u02D0de\xE6j/",
1944
+ // --- Nima Yushij, The Phoenix ---
1945
+ \u0642\u0642\u0646\u0648\u0633: "/\u0262o\u0262nu\u02D0s/",
1946
+ // phoenix
1947
+ \u06A9\u0622\u0641\u0631\u06CC\u0646\u062A: "/k\u0252\u02D0f\xE6\u027Ei\u02D0n\xE6t/",
1948
+ \u06A9\u0627\u0631\u06AF\u0634\u0627\u06CC: "/k\u0252\u02D0\u027E\u0261o\u0283\u0252\u02D0j/",
1949
+ \u06A9\u0627\u0633\u0627: "/k\xE6\u0294s\u0252\u02D0/",
1950
+ // a cup (Arabic accusative, Persianized)
1951
+ \u06A9\u0627\u0634\u0627\u0646\u0645: "/k\u0252\u02D0\u0283\u0252\u02D0n\xE6m/",
1952
+ \u06A9\u0627\u0648: "/k\u0252\u02D0w/",
1953
+ \u06A9\u0631\u06CC\u0645: "/k\xE6\u027Ei\u02D0m/",
1954
+ // generous
1955
+ // --- Ferdowsi, Shahnameh ---
1956
+ \u06A9\u0632: "/k\xE6z/",
1957
+ // contraction of که از (that from)
1958
+ \u06A9\u0634\u06CC\u062F\u0645: "/ke\u0283i\u02D0d\xE6m/",
1959
+ // I drew (a sigh)
1960
+ \u06A9\u0644\u0627\u0645: "/k\xE6l\u0252\u02D0m/",
1961
+ \u06A9\u0646\u0627\u0646: "/kon\u0252\u02D0n/",
1962
+ // doing (present participle suffix)
1963
+ \u06A9\u0646\u0645: "/kon\xE6m/",
1964
+ \u06A9\u0646\u0646\u062F: "/kon\xE6nd/",
1965
+ // they do (subjunctive)
1966
+ \u06AF\u0631\u062F\u0627\u0628\u06CC: "/\u0261e\u027Ed\u0252\u02D0bi\u02D0/",
1967
+ // a whirlpool (indefinite)
1968
+ // --- Akhavan-Sales ---
1969
+ \u06AF\u0641\u062A: "/\u0261oft/",
1970
+ \u06AF\u064F\u0644: "/\u0261ol/",
1971
+ \u06AF\u0648\u06CC\u062F: "/\u0261u\u02D0j\xE6d/",
1972
+ // --- Beyhaqi ---
1973
+ \u06AF\u0648\u06CC\u0646\u062F\u0647\u0654: "/\u0261u\u02D0j\xE6ndeje/",
1974
+ \u0644\u063A\u0632\u0627\u0646: "/l\xE6\u0263z\u0252\u02D0n/",
1975
+ \u0645\u0627\u062F\u0631\u06CC: "/m\u0252\u02D0d\xE6\u027Ei\u02D0/",
1976
+ \u0645\u0627\u0646\u062F\u0647: "/m\u0252\u02D0nde/",
1977
+ // remaining
1978
+ \u0645\u0627\u0647\u062A\u0627\u0628: "/m\u0252\u02D0ht\u0252\u02D0b/",
1979
+ // moonlight
1980
+ \u0645\u0628\u06CC\u0646: "/mob\xE6jjen/",
1981
+ \u0645\u062D\u0645\u0648\u062F: "/m\xE6hmu\u02D0d/",
1982
+ \u0645\u062F\u0647: "/m\xE6deh/",
1983
+ \u0645\u0632\u06CC\u062F: "/m\xE6zi\u02D0d/",
1984
+ // increase
1985
+ \u0645\u0641\u0631\u062D: "/mof\xE6\u027E\u027Eeh/",
1986
+ // gladdening
1987
+ \u0645\u0645\u062F: "/momedd/",
1988
+ // sustainer
1989
+ \u0645\u0648\u062C\u0628: "/mo\u028Ad\u0361\u0292eb/",
1990
+ // cause of
1991
+ \u0645\u0648\u0644\u06CC\u0627\u0646: "/mu\u02D0li\u02D0\u0252\u02D0n/",
1992
+ \u0645\u0648\u0646\u0633: "/mu\u02D0nes/",
1993
+ \u0645\u0648\u0647\u0628\u062A: "/mo\u028Aheb\xE6t/",
1994
+ // gift, endowment
1995
+ "\u0645\u06CC\u200C\u0622\u06CC\u062F": "/mi\u02D0\u0252\u02D0j\xE6d/",
1996
+ // comes (up)
1997
+ "\u0645\u06CC\u200C\u062A\u0631\u0627\u0634\u062F": "/mi\u02D0t\xE6\u027E\u0252\u02D0\u0283\xE6d/",
1998
+ // scrapes, carves
1999
+ "\u0645\u06CC\u200C\u062E\u0648\u0631\u062F": "/mi\u02D0xo\u027E\xE6d/",
2000
+ // eats, erodes
2001
+ "\u0645\u06CC\u200C\u0631\u0648\u062F": "/mi\u02D0\u027E\xE6v\xE6d/",
2002
+ // goes (down)
2003
+ "\u0645\u06CC\u200C\u0634\u0648\u0646\u062F": "/mi\u02D0\u0283\xE6v\xE6nd/",
2004
+ // they become (passive)
2005
+ "\u0645\u06CC\u200C\u06A9\u0646\u062F": "/mi\u02D0kon\xE6d/",
2006
+ // does, is doing
2007
+ \u0646\u0627\u067E\u0633\u0646\u062F\u200C\u0645: "/n\u0252\u02D0p\xE6s\xE6nd\xE6m/",
2008
+ \u0646\u0627\u062E\u0648\u0628: "/n\u0252\u02D0xu\u02D0b/",
2009
+ \u0646\u0627\u0635\u0631: "/n\u0252\u02D0se\u027E/",
2010
+ "\u0646\u0627\u0644\u06CC\u062F\u0647\u200C\u0627\u0646\u062F": "/n\u0252\u02D0li\u02D0de\xE6nd/",
2011
+ // they have lamented
2012
+ \u0646\u0627\u0646\u06CC: "/n\u0252\u02D0ni\u02D0/",
2013
+ \u0646\u0627\u0648\u0644\u0647\u0627: "/n\u0252\u02D0velh\u0252\u02D0/",
2014
+ // pass it around
2015
+ \u0646\u062A\u0648\u0627\u0646\u062F: "/n\xE6t\xE6v\u0252\u02D0n\xE6d/",
2016
+ \u0646\u0634\u0627\u0637\u0650: "/ne\u0283\u0252\u02D0te/",
2017
+ \u0646\u0638\u0627\u0645\u06CC: "/nez\u0252\u02D0mi\u02D0/",
2018
+ \u0646\u0638\u0631\u0628\u0627\u0632\u06CC\u0627: "/n\xE6z\xE6\u027Eb\u0252\u02D0zi\u02D0\u0252\u02D0/",
2019
+ \u0646\u0639\u0645\u062A: "/ne\u0294m\xE6t/",
2020
+ // blessing
2021
+ \u0646\u0641\u0633\u06CC: "/n\xE6f\xE6si\u02D0/",
2022
+ // a breath (indefinite)
2023
+ \u0646\u0641\u06CC\u0631\u0645: "/n\xE6fi\u02D0\u027E\xE6m/",
2024
+ // my wailing
2025
+ \u0646\u06AF\u0627\u0647\u0650: "/ne\u0261\u0252\u02D0he/",
2026
+ \u0646\u06AF\u0630\u0631\u062F: "/n\xE6\u0261oz\xE6\u027E\xE6d/",
2027
+ // does not pass
2028
+ \u0646\u06AF\u0647: "/ne\u0261\xE6h/",
2029
+ \u0646\u0645\u0648\u062F: "/nemu\u02D0d/",
2030
+ // appeared, showed
2031
+ "\u0646\u0645\u06CC\u200C\u0634\u0648\u062F": "/n\xE6mi\u02D0\u0283\xE6v\xE6d/",
2032
+ // is not possible
2033
+ // --- Shared classical forms ---
2034
+ \u0646\u0647\u0627\u062F: "/neh\u0252\u02D0d/",
2035
+ \u0646\u0647\u0627\u062F\u0647\u0627\u06CC: "/n\xE6h\u0252\u02D0dh\u0252\u02D0je/",
2036
+ \u0646\u0648\u0634: "/nu\u02D0\u0283/",
2037
+ // drink!
2038
+ \u0646\u06CC\u0627\u0628\u062F: "/n\xE6j\u0252\u02D0b\xE6d/",
2039
+ // will not find
2040
+ \u0646\u06CC\u0627\u0631\u062F: "/n\xE6j\u0252\u02D0\u027E\xE6d/",
2041
+ \u0646\u06CC\u0633\u062A\u0627\u0646: "/nejest\u0252\u02D0n/",
2042
+ // reed bed
2043
+ // --- Shamlou ---
2044
+ \u0646\u06CC\u0633\u062A\u06CC: "/ni\u02D0sti\u02D0/",
2045
+ \u0647\u0627\u06CC\u0644: "/h\u0252\u02D0jel/",
2046
+ // terrifying
2047
+ \u0647\u0633\u062A\u0646\u062F: "/h\xE6st\xE6nd/",
2048
+ // they are
2049
+ \u0647\u064E\u0645\u06CC: "/h\xE6mi\u02D0/",
2050
+ \u0647\u0648\u0634\u06CC: "/hu\u02D0\u0283i\u02D0/",
2051
+ \u0648\u062C\u062F\u0627\u0646: "/ved\u0361\u0292d\u0252\u02D0n/",
2052
+ // conscience
2053
+ \u0648\u062F\u06CC\u0639\u062A: "/v\xE6di\u02D0\xE6t/",
2054
+ \u0648\u0632\u0634: "/v\xE6ze\u0283/",
2055
+ // blowing
2056
+ \u0648\u06CC: "/v\xE6j/",
2057
+ \u06CC\u0627\u062F\u0650: "/j\u0252\u02D0de/",
2058
+ \u06CC\u0627\u0631\u0650: "/j\u0252\u02D0\u027Ee/",
2059
+ \u06CC\u0627\u0641\u062A: "/j\u0252\u02D0ft/",
2060
+ \u06CC\u06A9\u062F\u06CC\u06AF\u0631: "/jekdi\u02D0\u0261\xE6\u027E/"
2061
+ // one another
2062
+ };
2063
+
2064
+ // src/overrides/fi.ts
2065
+ var fi = {
2066
+ // G2P handles all Finnish words correctly. Overrides here are only
2067
+ // needed for words where G2P produces an incorrect entry.
2068
+ };
2069
+
2070
+ // src/overrides/fr.ts
2071
+ var fr = {
2072
+ conflans: "/k\u0254\u0303fl\u0251\u0303/",
2073
+ // place name
2074
+ est: "/\u025B/",
2075
+ // verb "is" — st is silent (dict has /ɛst/)
2076
+ jolies: "/\u0292\u0254li/",
2077
+ luit: "/l\u0265i/",
2078
+ marchiennes: "/ma\u0281\u0283j\u025Bn/",
2079
+ // place name (Zola)
2080
+ "m\xE9taphysico-th\xE9ologo-cosmolonigologie": "/metafizikoteol\u0254\u0261\u0254k\u0254sm\u0254l\u0254ni\u0261\u0254l\u0254\u0292i/",
2081
+ // Voltaire
2082
+ montsou: "/m\u0254\u0303su/",
2083
+ // fictional town (Zola)
2084
+ morgion: "/m\u0254\u0281\u0292j\u0254\u0303/",
2085
+ // place name (Dumas)
2086
+ myriel: "/mi\u0281j\u025Bl/",
2087
+ // character name (Hugo)
2088
+ "neuve-sainte-genevi\xE8ve": "/n\u0153vs\u025B\u0303t\u0292\u0259nvj\u025Bv/",
2089
+ // Paris street (Balzac)
2090
+ nicole: "/nik\u0254l/",
2091
+ pangloss: "/p\u0251\u0303\u0261l\u0254s/",
2092
+ // Voltaire character
2093
+ rainur\u00E9e: "/\u0281\u025Bny\u0281e/",
2094
+ rion: "/\u0281j\u0254\u0303/",
2095
+ // place name (Dumas)
2096
+ "saint-marcel": "/s\u025B\u0303ma\u0281s\u025Bl/",
2097
+ // Paris quarter (Dumas)
2098
+ "thunder-ten-tronckh": "/t\u0254n\u025B\u0281t\u025Bnt\u0281\u0254nk/",
2099
+ // Voltaire
2100
+ trieste: "/t\u0281ij\u025Bst/",
2101
+ // city name (Dumas)
2102
+ vauquer: "/voke/",
2103
+ // character name (Balzac)
2104
+ y: "/i/"
2105
+ // pronoun "there" — dict has letter name /igʁɛk/
2106
+ };
2107
+
2108
+ // src/overrides/is.ts
2109
+ var is = {
2110
+ \u00E1sir: "/\u02C8au\u02D0s\u026Ar/",
2111
+ \u00E1synjur: "/\u02C8au\u02D0s\u026Anj\u028Fr/",
2112
+ ballir: "/\u02C8patl\u026Ar/",
2113
+ beytils: "/\u02C8pei\u02D0t\u02B0\u026Als/",
2114
+ Bj\u00E1lfa: "/\u02C8pjaulva/",
2115
+ Bjarnar: "/\u02C8pjartnar/",
2116
+ bl\u00F3t: "/plou\u02D0t\u02B0/",
2117
+ // sacrifice/ritual
2118
+ b\u00F3klaus: "/\u02C8pouk\u02B0l\u0153ys/",
2119
+ br\u00E9fberi: "/\u02C8prj\u025B\u02D0vp\u025Br\u026A/",
2120
+ // letter carrier
2121
+ burluf\u00F3ts: "/\u02C8p\u028Frtl\u028Ffouts/",
2122
+ efla\u00F0i: "/\u02C8\u025Bpla\xF0\u026A/",
2123
+ // past tense: strengthened/performed
2124
+ ek: "/\u025Bk\u02B0/",
2125
+ fannhv\u00EDtir: "/\u02C8fan\u02D0kvit\u02B0\u026Ar/",
2126
+ farandi: "/\u02C8farant\u026A/",
2127
+ fars\u00E6lda: "/\u02C8farsailta/",
2128
+ fira: "/\u02C8f\u026Ara/",
2129
+ flatnefur: "/\u02C8flatn\u025Bv\u028Fr/",
2130
+ fl\u00FD\u00F0u: "/\u02C8fli\u02D0\xF0\u028F/",
2131
+ fornaldar: "/\u02C8f\u0254rtnaltar/",
2132
+ fornrit: "/\u02C8f\u0254rtnr\u026At\u02B0/",
2133
+ galdursmenn: "/\u02C8kalt\u028Frsm\u025Bn\u02D0/",
2134
+ gautr: "/\u02C8k\u0153y\u02D0tr/",
2135
+ // New saga/literary overrides
2136
+ gengr: "/k\u025B\u014Bkr/",
2137
+ go\u00F0or\u00F0sma\u00F0ur: "/\u02C8k\u0254\u02D0\xF0\u0254r\xF0sma\u02D0\xF0\u028Fr/",
2138
+ h\u00E6ngs: "/\u02C8hai\u014Bs/",
2139
+ hags\u00E6lda: "/\u02C8haksailta/",
2140
+ H\u00E1lfdanarsonar: "/\u02C8haulf\u02CCtanar\u02CCs\u0254nar/",
2141
+ h\u00E1lftr\u00F6lls: "/\u02C8haulftr\u0153tls/",
2142
+ Hallbjarnar: "/\u02C8hatlpjartnar/",
2143
+ Hallfre\u00F0s: "/\u02C8hatlfr\u025B\xF0s/",
2144
+ // genitive of Hallfreður
2145
+ Hallfre\u00F0ur: "/\u02C8hatlfr\u025B\xF0\u028Fr/",
2146
+ // proper name
2147
+ h\u00E1rfagra: "/\u02C8haur\u02CCfa\u0263ra/",
2148
+ // fair-haired (epithet)
2149
+ h\u00E1rfagri: "/\u02C8haur\u02CCfa\u0263r\u026A/",
2150
+ hersir: "/\u02C8h\u025Brs\u026Ar/",
2151
+ hersis: "/\u02C8h\u025Brs\u026As/",
2152
+ herskarar: "/\u02C8h\u025Brsk\u02B0arar/",
2153
+ Hrafnkelsdal: "/\u02C8r\u0325apnk\u02B0\u025Blstalr/",
2154
+ // place name
2155
+ Hreggvi\u00F0sson: "/\u02C8r\u0325\u025Bk\u02D0v\u026A\xF0s\u02D0\u0254n/",
2156
+ hr\u00EDmhv\u00EDta: "/\u02C8r\u0325imkvit\u02B0a/",
2157
+ Ing\u00F3lfr: "/\u02C8\u026A\u014Bkoulfr/",
2158
+ ins: "/\u026Ans/",
2159
+ \u00CDvarssonar: "/\u02C8ivars\u02D0\u0254nar/",
2160
+ j\u00F6klanna: "/\u02C8j\u0153k\u02B0lan\u02D0a/",
2161
+ J\u00F3rsalalands: "/\u02C8jourtsalalants/",
2162
+ Ketils: "/\u02C8k\u02B0\u025Bt\u02B0\u026Als/",
2163
+ kindir: "/\u02C8k\u02B0\u026Ant\u026Ar/",
2164
+ K\u00F3lumkilli: "/\u02C8k\u02B0ou\u02D0l\u028Fmk\u02B0\u026Atl\u026A/",
2165
+ konungd\u00E6mi: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014B\u02CCtai\u02D0m\u026A/",
2166
+ konungd\u00F3mur: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014B\u02CCdou\u02D0m\u028Fr/",
2167
+ konungr: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014Bkr/",
2168
+ konungseigninni: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014Bkseikn\u026An\u02D0\u026A/",
2169
+ kotb\u00F3ndi: "/\u02C8k\u02B0\u0254t\u02B0pount\u026A/",
2170
+ kunnigt: "/\u02C8k\u02B0\u028Fn\u02D0\u026Ak\u02B0t\u02B0/",
2171
+ kvonga\u00F0ur: "/\u02C8k\u02B0v\u0254\u014Bka\xF0\u028Fr/",
2172
+ kynst\u00F3r: "/\u02C8k\u02B0\u026Anstou\u02D0r/",
2173
+ ma\u00F0r: "/ma\u02D0\xF0r/",
2174
+ magra: "/\u02C8ma\u0263ra/",
2175
+ mannd\u00E1\u00F0in: "/\u02C8man\u02D0tau\u02D0\xF0\u026An/",
2176
+ mannf\u00F3lkit: "/\u02C8man\u02D0fouk\u02B0\u026At\u02B0/",
2177
+ mj\u00F6k: "/mj\u0153k\u02B0/",
2178
+ m\u00F6gu: "/\u02C8m\u0153\u02D0k\u028F/",
2179
+ n\u00F3rr\u00E6nn: "/\u02C8nou\u02D0rrai\u02D0n\u02D0/",
2180
+ N\u00F6rvasundum: "/\u02C8n\u0153rvas\u028Fnt\u028Fm/",
2181
+ Norvegi: "/\u02C8n\u0254rv\u025Bj\u026A/",
2182
+ n\u00FDu: "/\u02C8ni\u02D0\u028F/",
2183
+ \u00F3arga: "/\u02C8ou\u02D0arka/",
2184
+ \u00D6nundur: "/\u02C8\u0153\u02D0n\u028Fnt\u028Fr/",
2185
+ or: "/\u0254r/",
2186
+ \u00F3r: "/ou\u02D0r/",
2187
+ \u00F6xn: "/\u02C8\u0153ksn/",
2188
+ pl\u00F3gsland: "/\u02C8p\u02B0lou\u02D0kslant/",
2189
+ r\u00E1\u00F0u: "/\u02C8rau\u02D0\xF0\u028F/",
2190
+ r\u00E6kir: "/\u02C8rai\u02D0k\u02B0\u026Ar/",
2191
+ Raumar\u00EDki: "/\u02C8r\u0153y\u02D0mar\u026A\u02D0k\u02B0\u026A/",
2192
+ Raumsd\u00E6lafylki: "/\u02C8r\u0153ymstailaf\u026Alk\u02B0\u026A/",
2193
+ Raumsdal: "/\u02C8r\u0153ymstal/",
2194
+ raumur: "/\u02C8r\u0153y\u02D0m\u028Fr/",
2195
+ Rein: "/rei\u02D0n/",
2196
+ sannliga: "/\u02C8san\u02D0l\u026A\u0263a/",
2197
+ Sk\u00ED\u00F0ason: "/\u02C8ski\u02D0\xF0as\u0254n/",
2198
+ s\u00F6\u00F0ul: "/\u02C8s\u0153\u02D0\xF0\u028Fl/",
2199
+ s\u00F3lkerfum: "/\u02C8soulk\u02B0\u025Brv\u028Fm/",
2200
+ tindar: "/\u02C8t\u02B0\u026Antar/",
2201
+ \u00FAtsj\u00E1num: "/\u02C8u\u02D0t\u02B0sjau\u02D0n\u028Fm/",
2202
+ v\u00E1gskorin: "/\u02C8vau\u0263sk\u0254r\u026An/",
2203
+ Valf\u00F6\u00F0r: "/\u02C8valv\u0153\xF0r/",
2204
+ v\u00EDkverskur: "/\u02C8vi\u02D0kv\u025Brsk\u028Fr/",
2205
+ Yngveldi: "/\u02C8\u026A\u014Bkv\u025Blt\u026A/",
2206
+ \u00FEat: "/\u03B8at\u02B0/"
2207
+ };
2208
+
2209
+ // src/overrides/ja.ts
2210
+ var ja = {
2211
+ \u3042\u3051\u307C\u306E: "/akebono/",
2212
+ \u3042\u3063\u305F: "/at\u02D0a/",
2213
+ \u3042\u3068: "/ato/",
2214
+ \u3042\u307E\u305F: "/amata/",
2215
+ \u3042\u3089\u306C: "/a\u027Ean\u026F/",
2216
+ \u3042\u308A: "/a\u027Ei/",
2217
+ \u3042\u308B: "/a\u027E\u026F/",
2218
+ \u3042\u308B\u304F: "/a\u027E\u026Fk\u026F/",
2219
+ \u3044\u3046: "/i\u026F/",
2220
+ \u30A4\u30AE\u30EA\u30B9: "/i\u0261i\u027Eis\u026F/",
2221
+ // England
2222
+ \u3044\u305F: "/ita/",
2223
+ \u3044\u305F\u3046: "/ita\u026F/",
2224
+ // archaic: greatly
2225
+ // New sample overrides
2226
+ \u3044\u3064\u3082: "/its\u026Fmo/",
2227
+ \u3044\u3065\u308C: "/id\u026F\u027Ee/",
2228
+ // archaic: which
2229
+ \u3044\u308B: "/i\u027E\u026F/",
2230
+ \u3046\u307F: "/\u026Fmi/",
2231
+ \u3046\u3089: "/\u026F\u027Ea/",
2232
+ \u304A\u308B: "/o\u027E\u026F/",
2233
+ \u304B\u305F: "/kata/",
2234
+ \u304B\u305F\u3061: "/katat\u0255i/",
2235
+ \u304B\u3064\u3050: "/kats\u026F\u0261\u026F/",
2236
+ \u304B\u306E: "/kano/",
2237
+ \u304F: "/k\u026F/",
2238
+ // archaic verb stem
2239
+ \u3054: "/\u0261o/",
2240
+ \u3053\u3053: "/koko/",
2241
+ \u3054\u3056\u3044\u307E\u3059: "/\u0261ozaimas\u026F/",
2242
+ \u3053\u306E: "/kono/",
2243
+ \u3055\u3056\u308C: "/saza\u027Ee/",
2244
+ \u3055\u3073\u3057\u3044: "/sabi\u0255i\u02D0/",
2245
+ \u3055\u3089: "/sa\u027Ea/",
2246
+ // archaic: furthermore
2247
+ \u3057\u3088\u3046: "/\u0255ijo\u02D0/",
2248
+ \u3059\u3050\u308C\u308B: "/s\u026F\u0261\u026F\u027Ee\u027E\u026F/",
2249
+ \u3059\u3053\u3057: "/s\u026Fko\u0255i/",
2250
+ \u3059\u308B: "/s\u026F\u027E\u026F/",
2251
+ \u305D\u3053: "/soko/",
2252
+ \u305D\u306E: "/sono/",
2253
+ \u3060\u3044\u3076: "/daib\u026F/",
2254
+ \u3060\u3051: "/dake/",
2255
+ \u305F\u3060: "/tada/",
2256
+ \u3060\u3064: "/dats\u026F/",
2257
+ // archaic suffix: -ish
2258
+ \u305F\u306A\u3073\u304F: "/tanabik\u026F/",
2259
+ \u3064\u304F: "/ts\u026Fk\u026F/",
2260
+ \u3064\u308C\u308B: "/ts\u026F\u027Ee\u027E\u026F/",
2261
+ \u3069\u3053: "/doko/",
2262
+ \u3068\u3053\u308D: "/toko\u027Eo/",
2263
+ \u3068\u3089\u3048\u308B: "/to\u027Eae\u027E\u026F/",
2264
+ \u306A\u304A: "/nao/",
2265
+ \u306A\u304B: "/naka/",
2266
+ \u306A\u304F: "/nak\u026F/",
2267
+ \u306A\u3063\u305F: "/nat\u02D0a/",
2268
+ \u306A\u3073\u304F: "/nabik\u026F/",
2269
+ \u306E\u3051: "/noke/",
2270
+ \u3070\u304B\u308A: "/baka\u027Ei/",
2271
+ \u306F\u305F: "/hata/",
2272
+ \u30CF\u30F3\u30D6\u30EB\u30AF: "/hamb\u026F\u027E\u026Fk\u026F/",
2273
+ \u3075\u3046: "/\u0278\u026F\u02D0/",
2274
+ \u3075\u3061: "/\u0278\u026Ft\u0255i/",
2275
+ \u307B\u304B: "/hoka/",
2276
+ \u307B\u3069: "/hodo/",
2277
+ \u307B\u3093\u3068\u3046: "/honto\u02D0/",
2278
+ \u307E\u305F: "/mata/",
2279
+ \u307E\u3060: "/mada/",
2280
+ \u307E\u3067: "/made/",
2281
+ \u307F\u306A\u3055\u3093: "/minasa\u0274/",
2282
+ \u307F\u3093\u306A: "/min\u02D0a/",
2283
+ \u3080\u3059: "/m\u026Fs\u026F/",
2284
+ \u30E1\u30ED\u30B9: "/me\u027Eos\u026F/",
2285
+ \u3084\u3046\u3084\u3046: "/ja\u026Fja\u026F/",
2286
+ // archaic: gradually
2287
+ \u3084\u307F: "/jami/",
2288
+ \u3084\u3080: "/jam\u026F/",
2289
+ \u3084\u3093\u3054\u3068\u306A\u3057: "/ja\u014B\u0261otona\u0255i/",
2290
+ // archaic: noble
2291
+ \u3088\u304F: "/jok\u026F/",
2292
+ \u3088\u307B\u3069: "/johodo/",
2293
+ \u308B: "/\u027E\u026F/",
2294
+ // classical auxiliary
2295
+ \u308F\u304B\u308B: "/waka\u027E\u026F/",
2296
+ \u4E00\u8336: "/it\u02D0\u0255a/",
2297
+ // Issa (poet)
2298
+ \u4E09\u5341\u4E03: "/sa\u0274d\u0291\u026F\u02D0\u0255it\u0255i/",
2299
+ \u5019\u3046: "/so\u02D0\u027Eo\u02D0/",
2300
+ // archaic polite auxiliary
2301
+ \u547C\u3093\u3067: "/jo\u0274de/",
2302
+ \u6B62\u307E\u3063\u305F: "/tomat\u02D0a/",
2303
+ \u75E9: "/jase/",
2304
+ // thin
2305
+ \u767D\u304F: "/\u0255i\u027Eok\u026F/",
2306
+ \u77E5\u3063\u3066: "/\u0255it\u02D0e/",
2307
+ \u7A4D\u307F: "/ts\u026Fmi/",
2308
+ \u7F85\u751F\u9580: "/\u027Ea\u0255o\u02D0mo\u0274/",
2309
+ // Rashomon
2310
+ \u821E\u9DB4: "/maiz\u026F\u027E\u026F/",
2311
+ // Maizuru (place)
2312
+ \u91D1\u95A3: "/ki\u0274kak\u026F/",
2313
+ // Golden Pavilion
2314
+ \u9759\u304B\u3055: "/\u0255iz\u026Fkasa/"
2315
+ };
2316
+
2317
+ // src/overrides/km.ts
2318
+ var km = {
2319
+ // --- UDHR Article 1 ---
2320
+ \u1780\u17C6\u178E\u17BE\u178F: "/k\u0251mna\u0259t/",
2321
+ // birth
2322
+ // --- Reamker (Ramayana) ---
2323
+ \u1780\u17D2\u178A\u17B8: "/kd\u0259y/",
2324
+ // matter, affair
2325
+ \u1780\u17D2\u179A\u17C1\u179C: "/kreew/",
2326
+ // furious
2327
+ \u1780\u17D2\u179A\u17C1\u179C\u1780\u17D2\u179A\u17C4\u1792: "/kreew krout/",
2328
+ // furiously angry
2329
+ \u1780\u17D2\u179A\u17C4\u1792: "/krout/",
2330
+ // anger
2331
+ \u1780\u17D2\u179F: "/ks/",
2332
+ // consonant cluster (browser splits ក្សត្រី)
2333
+ \u1780\u17D2\u179F\u178F\u17D2\u179A: "/ksaat/",
2334
+ // king
2335
+ // --- Nokor Reach (National Anthem) ---
2336
+ \u1780\u17D2\u179F\u178F\u17D2\u179A\u17B6: "/ksaatraa/",
2337
+ // king (literary form)
2338
+ \u1780\u17D2\u179F\u178F\u17D2\u179A\u17B8: "/ksaatr\u0259y/",
2339
+ // queen
2340
+ \u1781\u17D2\u1798\u17B8: "/km\u0259y/",
2341
+ // ogre (literary)
2342
+ \u1781\u17D2\u179C\u17BE\u1780: "/kwa\u0259k/",
2343
+ // to stir, disturb
2344
+ \u1782\u17B6\u1794\u17CB: "/koap/",
2345
+ // beloved
2346
+ // --- Tum Teav ---
2347
+ \u1782\u17BC: "/kuu/",
2348
+ // partner
2349
+ \u1782\u17BC\u1782\u17B6\u1794\u17CB: "/kuu koap/",
2350
+ // partner, beloved
2351
+ \u1783\u17D2\u179B\u17B6\u178F: "/kliet/",
2352
+ // separated
2353
+ \u1785\u179A: "/c\u0251\u0251/",
2354
+ // to walk, go
2355
+ \u1785\u17D2\u1793\u17C1\u17C7: "/cneh/",
2356
+ // this (literary)
2357
+ \u1787\u17D0\u1799: "/cey/",
2358
+ // victory
2359
+ \u1787\u17D0\u1799\u1798\u1784\u17D2\u1782\u179B: "/cey m\u0254\u014Bk\u0254l/",
2360
+ // victory, auspicious
2361
+ // --- Proverbs ---
2362
+ \u1787\u17B6\u1780\u17CB: "/ceak/",
2363
+ // certain, sure
2364
+ \u1787\u17B6\u179B\u17C6\u178A\u17B6\u1794\u17CB: "/cie l\u0254m\u0257aap/",
2365
+ // successively
2366
+ // --- Preah Chinawong ---
2367
+ \u1787\u17B7\u1793: "/c\u0268n/",
2368
+ // Jin (proper name element)
2369
+ \u1787\u17B7\u1793\u179C\u1784\u17D2\u0E2A: "/c\u0268n w\u0254\u014B/",
2370
+ // Chinawong (proper name)
2371
+ \u1787\u17BC: "/cuu/",
2372
+ // proper (literary)
2373
+ \u1789\u17B6\u1780\u17CB: "/\u0272eak/",
2374
+ // to startle, twitch
2375
+ \u1789\u17B6\u1780\u17CB\u1785\u17B7\u1789\u17D2\u1785\u17BE\u1798: "/\u0272eak c\u0259\u0272ca\u0259m/",
2376
+ // to raise eyebrows
2377
+ \u178A\u17C2\u1793: "/daen/",
2378
+ // territory
2379
+ \u178E\u17B6\u1799: "/naay/",
2380
+ // to yearn
2381
+ \u178E\u17B6\u1799\u1785\u17B7\u178F\u17D2\u178F: "/naay c\u0259t/",
2382
+ // heart yearns
2383
+ \u178F\u1794: "/t\u0251p/",
2384
+ // to reply
2385
+ \u1790\u17D2\u1780\u17B6\u1793: "/tkaan/",
2386
+ // magnificent (literary)
2387
+ \u1790\u17D2\u1780\u17BE\u1784: "/tk\u0259\u0259\u014B/",
2388
+ // to glorify
2389
+ \u1790\u17D2\u1780\u17BE\u1784\u1790\u17D2\u1780\u17B6\u1793: "/tk\u0259\u0259\u014B tkaan/",
2390
+ // glorious, magnificent
2391
+ \u1790\u17D2\u1793\u17BC\u179A: "/tnoo/",
2392
+ // dignity, nobility
2393
+ \u1790\u17D2\u1793\u17C2: "/tnae/",
2394
+ // aspect (partial segmentation of ថ្នែក)
2395
+ \u1790\u17D2\u1793\u17C2\u1780: "/tnaek/",
2396
+ // aspect, class
2397
+ \u1790\u17D2\u179C\u17B6\u178F\u17CB: "/twat/",
2398
+ // harshly
2399
+ \u1791\u178F: "/t\u0254t/",
2400
+ // to look, behold (royal)
2401
+ \u1791\u17B6\u179C: "/tiew/",
2402
+ // Teav (proper name)
2403
+ \u1791\u17BB\u1780\u17D2\u1781: "/tuk/",
2404
+ // suffering (dukkha)
2405
+ \u1791\u17BB\u1780\u17D2\u1781\u179C\u17C1\u1791\u1793\u17B6: "/tukweet\u0251\u0251nie/",
2406
+ // suffering
2407
+ \u1791\u17BC\u179B: "/tuul/",
2408
+ // to inform (royal register)
2409
+ \u1791\u17C1\u1796\u17D2\u178F\u17B6: "/teep\u0257aa/",
2410
+ // devas, celestial beings
2411
+ \u1793\u179A\u1794\u178F\u17B8: "/n\u0254r\u0254pa\u0257\u0259y/",
2412
+ // sovereign, king
2413
+ \u1793\u17B6\u179C: "/niew/",
2414
+ // Nav (proper name)
2415
+ \u1793\u17B7\u1798\u17CC\u179B: "/n\u0268mm\u0254l/",
2416
+ // pure, immaculate
2417
+ \u1794\u17C6\u1795\u17D2\u179B\u17B6\u1789: "/b\u0251mp\u02B0lie\u0272/",
2418
+ // to destroy
2419
+ \u1794\u17C6\u1795\u17D2\u179B\u17B7\u1785: "/b\u0251mp\u02B0l\u0259c/",
2420
+ // to demolish
2421
+ \u1794\u1796\u17B7\u178F\u17D2\u179A: "/b\u0251p\u0268t/",
2422
+ // lord, sir (polite address)
2423
+ \u1794\u17BB\u178F\u17D2\u179A: "/\u0253ot/",
2424
+ // son
2425
+ // --- Chbab Srey (Code for Women) ---
2426
+ \u1794\u17D2\u179A\u178A\u17C5: "/pr\u0251\u0257aw/",
2427
+ // to advise, counsel
2428
+ \u1794\u17D2\u179A\u178E\u17B8: "/pr\u0251n\u0259y/",
2429
+ // loving, affectionate
2430
+ \u1794\u17D2\u179A\u17B6\u1787\u17D2\u1789: "/praac/",
2431
+ // wisdom
2432
+ \u1794\u17D2\u179A\u17B6\u179F\u17B6\u1791: "/praasaat/",
2433
+ // temple, palace
2434
+ \u1796\u17B7\u179A\u17C4\u1792: "/piroot/",
2435
+ // anger
2436
+ \u1796\u17BB\u17C6: "/pum/",
2437
+ // not (literary)
2438
+ \u1796\u17BB\u17C6\u1787\u17BC: "/pum cuu/",
2439
+ // not proper
2440
+ \u1796\u17D2\u1799\u17B6\u1794\u17B6\u1791: "/pjiebaat/",
2441
+ // malice
2442
+ \u1796\u17D2\u179A\u17C7\u1798\u17A0\u17C1\u179F\u17B8: "/preah m\u0254hees\u0259y/",
2443
+ // queen consort (with ព្រះ)
2444
+ \u1796\u17D2\u179A\u17C7\u179A\u17B6\u1787\u1794\u17BB\u178F\u17D2\u179A: "/preah riec \u0253ot/",
2445
+ // prince (with ព្រះ)
2446
+ \u1796\u17D2\u179A\u17C7\u179A\u17B6\u1787\u17B6: "/preah riecie/",
2447
+ // the king
2448
+ \u1797\u17D0\u1780\u17D2\u179A\u17D2\u178F: "/p\u02B0eak/",
2449
+ // face (royal register)
2450
+ \u1797\u17B6\u178F\u179A: "/p\u02B0iet\u0251\u0251/",
2451
+ // brother (standalone; final រ silent)
2452
+ \u1797\u17B6\u178F\u179A\u1797\u17B6\u1796: "/p\u02B0iet\u0251\u0251r\u0251p\u02B0iep/",
2453
+ // brotherhood
2454
+ \u1797\u17D2\u1789\u17B6: "/p\u0272ie/",
2455
+ // to awaken
2456
+ \u1798\u1784\u17D2\u1782\u179B: "/m\u0254\u014Bk\u0254l/",
2457
+ // auspicious
2458
+ \u1798\u17A0\u17B6: "/m\u0254haa/",
2459
+ // great (prefix)
2460
+ \u1798\u17A0\u17B6\u1780\u17D2\u179F\u178F\u17D2\u179A: "/m\u0254haa ksaat/",
2461
+ // great king, maharaja
2462
+ \u1798\u17A0\u17B6\u1787\u17B6\u178F\u17B7: "/m\u0254haa ciet/",
2463
+ // great nation
2464
+ \u1798\u17A0\u17C1\u179F\u17B8: "/m\u0254hees\u0259y/",
2465
+ // queen consort
2466
+ \u179A\u17B6\u1787: "/riec/",
2467
+ // royal
2468
+ \u179A\u17B6\u1787\u1794\u17BB\u178F\u17D2\u179A: "/riec \u0253ot/",
2469
+ // prince
2470
+ \u179A\u17B6\u1787\u17B6: "/riecie/",
2471
+ // king
2472
+ \u179A\u17BB\u1784: "/ru\u014B/",
2473
+ // to shine
2474
+ \u179A\u17BB\u1784\u179A\u17BF\u1784: "/ru\u014B r\u0268\u0259\u014B/",
2475
+ // brilliant, prosperous
2476
+ \u179B\u17C6\u178A\u17B6\u1794\u17CB: "/l\u0254m\u0257aap/",
2477
+ // successively, in order
2478
+ \u179C\u1784\u17D2\u179F: "/w\u0254\u014B/",
2479
+ // dynasty, lineage
2480
+ \u179C\u17B7\u1785\u17B6\u179A\u178E\u1789\u17D2\u1789\u17B6\u178E: "/wicaar\u0251\u0272\u0272aan/",
2481
+ // discernment
2482
+ \u179C\u17C1\u1791\u1793\u17B6: "/weet\u0251\u0251nie/",
2483
+ // pain (vedana)
2484
+ \u179F\u178F\u17B7: "/sa\u0294te\u0294/",
2485
+ // consciousness, mindfulness
2486
+ \u179F\u178F\u17B7\u179F\u1798\u17D2\u1794\u1787\u1789\u17D2\u1789\u17C8: "/sa\u0294te\u0294 sampa\u0294c\u028A\u0259\u0272\u0272ea\u0294/",
2487
+ // conscience
2488
+ \u179F\u1798\u17D2\u1794\u1787\u1789\u17D2\u1789\u17C8: "/sampa\u0294c\u028A\u0259\u0272\u0272ea\u0294/",
2489
+ // awareness
2490
+ \u179F\u17B6\u1791\u179A: "/saat\u0254\u0254/",
2491
+ // to welcome
2492
+ \u179F\u17B7\u179A\u17B8: "/ser\u0259y/",
2493
+ // glory (from Pali)
2494
+ \u179F\u17BD: "/su\u0259/",
2495
+ // (first syllable of សួស្តី)
2496
+ \u179F\u17BD\u179F\u17D2\u178F\u17B8: "/su\u0259sd\u0259y/",
2497
+ // greeting, well-being
2498
+ \u179F\u17C1\u1785\u1780\u17D2\u178A\u17B8: "/sac kd\u0259y/",
2499
+ // matter, affair
2500
+ \u179F\u17C1\u1785\u1780\u17D2\u178A\u17B8\u1790\u17D2\u179B\u17C3\u1790\u17D2\u1793\u17BC\u179A: "/sac kd\u0259y tlay tnoo/",
2501
+ // dignity
2502
+ \u179F\u17D2\u178A\u17C1\u1785: "/sdac/",
2503
+ // king
2504
+ \u179F\u17D2\u178F\u17B6\u1794\u17CB: "/sdaap/",
2505
+ // to listen
2506
+ \u179F\u17D2\u178F\u17B8: "/sd\u0259y/",
2507
+ // (second syllable of សួស្តី)
2508
+ \u179F\u17D2\u1791\u17BB\u17C7: "/stuh/",
2509
+ // to rush
2510
+ \u179F\u17D2\u1793\u17C6: "/sn\u0251m/",
2511
+ // concubine
2512
+ \u17A0\u178F\u17D2\u1790\u17B6: "/hat\u0251\u0251t\u02B0aa/",
2513
+ // hand (literary)
2514
+ // --- Constitution Preamble ---
2515
+ \u17A2\u1784\u17D2\u1782\u179A: "/\u0294\u0251\u014Bk\u0254\u0254/",
2516
+ // Angkor
2517
+ \u17A2\u179A: "/\u0294\u0251\u0251/",
2518
+ // to rejoice
2519
+ \u17A2\u179F\u17D2\u1785\u17B6\u179A\u17D2\u1799: "/\u0294\u0251scaa/",
2520
+ // wonderful, marvelous
2521
+ \u17A2\u17B6\u179B: "/\u0294aal/",
2522
+ // to rush, boast
2523
+ \u17A2\u17BD\u178F: "/\u0294u\u0259t/",
2524
+ // to boast
2525
+ \u17B1\u17D2\u1799: "/\u0294aoy/"
2526
+ // to give, let
2527
+ };
2528
+
2529
+ // src/overrides/ko.ts
2530
+ var ko = {
2531
+ \uAC10\uB098\uBB34: "/kam.na.mu/",
2532
+ \uAC1C\uCC9C: "/k\u025B.t\u0255\u02B0\u028Cn/",
2533
+ \uACBD\uC131: "/kj\u028C\u014B.s\u028C\u014B/",
2534
+ // old name for Seoul
2535
+ // New sample overrides
2536
+ \uACE0\uB2EC\uD504\uB2E4: "/ko.dal.p\u02B0\u026F.da/",
2537
+ \uACE0\uC774: "/ko.i/",
2538
+ \uAD11\uC74C: "/kwa\u014B.\u026Fm/",
2539
+ \uAE08\uBE5B: "/k\u026Fm.pit\u031A/",
2540
+ \uAE30\uB098\uAE30\uB2E4: "/ki.na.\u0261i.da/",
2541
+ // very long
2542
+ \uAE38\uB3D9: "/kil.do\u014B/",
2543
+ // Hong Gildong (character name)
2544
+ \uAF79\uACFC\uB9AC: "/k\u0348w\u025B\u014B.gwa.\u027Ei/",
2545
+ // kkwaenggwari (percussion instrument)
2546
+ \uB04A\uC784: "/k\u0348\u026Fn.im/",
2547
+ \uB098\uD0C0\uC0E4: "/na.t\u02B0a.\u0255a/",
2548
+ // Natasha
2549
+ \uB17C\uAC00: "/non.\u0261a/",
2550
+ \uB2C8\uAE4C: "/ni.k\u0348a/",
2551
+ // because (suffix)
2552
+ \uB3C4\uC6B0\uB2E4: "/to.u.da/",
2553
+ // to help
2554
+ \uB3D9\uC9D3\uB2EC: "/to\u014B.d\u0291it\u031A.t\u0348al/",
2555
+ \uB9E4\uC5B4\uB2EC\uB9AC\uB2E4: "/m\u025B.\u028C.dal.li.da/",
2556
+ \uBB3C\uB4E4\uB2E4: "/mul.d\u026Fl.da/",
2557
+ \uBC31\uB450\uC0B0: "/p\u025Bk\u031A.t\u0348u.san/",
2558
+ // Mt. Baekdu
2559
+ \uBC94\uD558\uB2E4: "/p\u028Cm.ha.da/",
2560
+ \uC0B0\uBAA8\uD241\uC774: "/san.mo.t\u02B0u\u014B.i/",
2561
+ \uC18C\uC90F\uC9D1: "/so.d\u0291ut\u031A.t\u0348\u0255ip\u031A/",
2562
+ \uC544\uB77C\uB9AC\uC694: "/a.\u027Ea.\u027Ei.jo/",
2563
+ // arirang refrain
2564
+ \uC544\uC2DC\uB2E4: "/a.\u0255i.da/",
2565
+ // to know (honorific)
2566
+ \uC57D\uC0B0: "/jak\u031A.s\u0348an/",
2567
+ // Yaksan (place)
2568
+ \uC5B4\uB450: "/\u028C.du/",
2569
+ \uC5B4\uB860: "/\u028C.\u027Eon/",
2570
+ // archaic: elder
2571
+ \uC5ED\uACB9\uB2E4: "/j\u028Ck\u031A.kj\u028Cp\u031A.t\u0348a/",
2572
+ \uC601\uBCC0: "/j\u028C\u014B.bj\u028Cn/",
2573
+ // Yeongbyeon (place)
2574
+ \uC624\uB3D9\uB098\uBB34: "/o.do\u014B.na.mu/",
2575
+ \uC624\uB3D9\uC78E: "/o.do\u014B.ip\u031A/",
2576
+ \uC624\uC2DC\uB2E4: "/o.\u0255i.da/",
2577
+ // honorific: to come
2578
+ \uC654\uB2E4: "/wat\u031A.t\u0348a/",
2579
+ \uC6B0\uB7EC\uB974\uB2E4: "/u.\u027E\u028C.\u027E\u026F.da/",
2580
+ \uC6D0\uD1B5\uD558\uB2E4: "/w\u028Cn.t\u02B0o\u014B.ha.da/",
2581
+ \uC774\uB77C: "/i.\u027Ea/",
2582
+ // copula ending
2583
+ \uC774\uC5B4\uB4E0: "/i.\u028C.d\u026Fn/",
2584
+ // archaic conditional
2585
+ \uC774\uC5D0: "/i.e/",
2586
+ \uC78A\uD788\uB2E4: "/i.t\u02B0i.da/",
2587
+ \uC78E\uC0C8: "/ip\u031A.s\u0348\u025B/",
2588
+ \uC7A5\uB0A0: "/t\u0255a\u014B.nal/",
2589
+ \uC7A5\uD130: "/t\u0255a\u014B.t\u02B0\u028C/",
2590
+ \uC810\uC21C\uC774: "/t\u0255\u028Cm.su.ni/",
2591
+ // character name (diminutive)
2592
+ \uC9C0\uB9AC\uB2E4: "/t\u0255i.\u027Ei.da/",
2593
+ \uC9C0\uC904\uB300\uB2E4: "/t\u0255i.d\u0291ul.d\u025B.da/",
2594
+ \uCC44\uC2DD: "/t\u0255\u02B0\u025B.\u0255ik\u031A/",
2595
+ \uCCAD\uCC9C: "/t\u0255\u02B0\u028C\u014B.t\u0255\u02B0\u028Cn/",
2596
+ \uCD5C: "/t\u0255\u02B0we/",
2597
+ // surname Choi
2598
+ \uCD98\uD48D: "/t\u0255\u02B0un.p\u02B0u\u014B/",
2599
+ \uD0C0\uC791\uB9C8\uB2F9: "/t\u02B0a.d\u0291ak\u031A.ma.da\u014B/",
2600
+ // threshing ground
2601
+ \uD2F0\uB04C: "/t\u02B0i.k\u0348\u026Fl/",
2602
+ \uD53C\uC5B4\uC624\uB974\uB2E4: "/p\u02B0i.\u028C.o.\u027E\u026F.da/",
2603
+ \uD55C\uD14C: "/han.t\u02B0e/",
2604
+ \uD574\uC124\uD53C: "/h\u025B.s\u028Cl.p\u02B0i/",
2605
+ // poetic: at sunset
2606
+ \uD5E4\uB2E4: "/he.da/",
2607
+ // archaic: to count
2608
+ \uD718\uB2EC\uB9AC\uB2E4: "/hwi.dal.li.da/",
2609
+ \uD718\uB3CC\uB2E4: "/hwi.dol.da/"
2610
+ };
2611
+
2612
+ // src/overrides/ma.ts
2613
+ var ma = {
2614
+ Abdul: "abdul",
2615
+ bebas: "bebas",
2616
+ boleh: "boleh",
2617
+ gemawan: "\u0261\u0259mawan",
2618
+ Hamid: "hamid",
2619
+ ibni: "ibni",
2620
+ Jebat: "d\u0292\u0259bat",
2621
+ Johor: "d\u0292oho\u027E",
2622
+ kebebasan: "k\u0259bebasan",
2623
+ Lekir: "l\u0259ki\u027E",
2624
+ Lekiu: "l\u0259kiw",
2625
+ Malaysia: "malejsia",
2626
+ merdeka: "m\u0259\u027Edeka",
2627
+ mereka: "m\u0259reka",
2628
+ Oleh: "oleh",
2629
+ samarata: "samarata",
2630
+ Sarawak: "sa\u027Eawak",
2631
+ seekor: "s\u0259eko\u027E",
2632
+ seksaan: "seksa\u0294an",
2633
+ Selangor: "s\u0259la\u014Bo\u027E",
2634
+ selendang: "s\u0259l\u0259nda\u014B",
2635
+ Singapura: "si\u014B\u0261apu\u027Ea",
2636
+ Terengganu: "t\u0259\u027Ee\u014B\u0261anu",
2637
+ Tuhan: "tuhan"
2638
+ };
2639
+
2640
+ // src/overrides/nb.ts
2641
+ var nb = {
2642
+ // Old orthography (Riksmål/Danish) and common words
2643
+ al: "\u0251\u02D0l",
2644
+ \u00E5rsalderen: "o\u02D0\u027Es\u0251l\u02D0d\u0259\u027E\u0259n",
2645
+ arv: "\u0251\u027Ev",
2646
+ behold: "b\u0259h\u0254l",
2647
+ Bj\u00F8rgulfson: "bj\xF8\u02D0\u027E\u0261\u0289lfs\u0254n",
2648
+ Blik: "bl\u026Ak",
2649
+ Bliv: "bli\u02D0",
2650
+ b\u00F8r: "b\xF8\u02D0\u027E",
2651
+ B\u00F8rnene: "b\xF8\u02D0\u0273\u0259n\u0259",
2652
+ brorskapets: "b\u027Eu\u02D0\u027Esk\u0251\u02D0p\u0259ts",
2653
+ demokratiet: "d\u025Bmok\u027E\u0251ti\u02D0\u0259",
2654
+ dig: "d\u0251j",
2655
+ Dyb: "dy\u02D0p",
2656
+ Eftermiddag: "\u025Bft\u025B\u027Em\u026Ad\u0251\u02D0\u0261",
2657
+ ej: "\u0251j",
2658
+ fandens: "f\u0251n\u02D0\u0259ns",
2659
+ f\u00E5tt: "f\u0254t",
2660
+ fiender: "fi\u02D0\u025Bnd\u0259\u027E",
2661
+ Fjeldbygden: "fj\u025Blb\u028F\u0261d\u0259n",
2662
+ fordrukken: "f\u0254\u027Ed\u027E\u0289k\u02D0\u0259n",
2663
+ fornuft: "f\u0254\u027En\u0289ft",
2664
+ frem: "f\u027E\u025Bm",
2665
+ Fremmedkarl: "f\u027E\u025Bm\u02D0\u0259k\u0251\u027El",
2666
+ frostblaa: "f\u027E\u0254stblo\u02D0",
2667
+ frygt: "f\u027E\u028Fkt",
2668
+ gamlingen: "\u0261\u0251ml\u026A\u014B\u0259n",
2669
+ Gem: "j\u025Bm",
2670
+ gikk: "j\u026Ak",
2671
+ Gjesling: "j\u025Bsl\u026A\u014B",
2672
+ gjorde: "ju\u02D0\u027E\u0259",
2673
+ Glands: "\u0261l\u0251ns",
2674
+ gnistred: "\u0261n\u026Ast\u027E\u0259d",
2675
+ gr\u00E5spr\u00E6ngt: "\u0261\u027Eo\u02D0sp\u027E\u025B\u014Bt",
2676
+ Grunnlov: "\u0261\u027E\u0289n\u02D0lo\u02D0v",
2677
+ Guld: "\u0261\u0289l",
2678
+ Gyldenlak: "j\u028Fl\u02D0\u0259nl\u0251k",
2679
+ Gyldentop: "j\u028Fl\u02D0\u0259nt\u0254p",
2680
+ ham: "h\u0251m",
2681
+ Hej: "h\u0251j",
2682
+ h\u00F8r: "h\xF8\u02D0\u027E",
2683
+ Horisonten: "h\u0254\u027E\u026As\u0254nt\u0259n",
2684
+ humanistiske: "h\u0289m\u0251n\u026Ast\u026Ask\u0259",
2685
+ husbond: "h\u0289\u02D0sb\u0254n",
2686
+ Hvad: "v\u0251",
2687
+ hvem: "v\u025Bm",
2688
+ hverandre: "v\u025B\u027E\u0251nd\u027E\u0259",
2689
+ hverken: "v\u025B\u027Ek\u0259n",
2690
+ Hvor: "vu\u02D0\u027E",
2691
+ hvoraf: "vu\u02D0\u027E\u0251v",
2692
+ Hvorfor: "v\u0254\u027Ef\u0254\u027E",
2693
+ Hvormeget: "vu\u02D0\u027Em\u0251j\u02D0\u0259",
2694
+ iaften: "i\u0251ft\u0259n",
2695
+ Idet: "i\u02D0de\u02D0t",
2696
+ ihob: "iho\u02D0b",
2697
+ imellem: "im\u025Bl\u02D0\u0259m",
2698
+ imod: "i\u02D0mu\u02D0t",
2699
+ inn: "\u026An",
2700
+ intet: "\u026Ant\u0259",
2701
+ Intet: "\u026Ant\u0259",
2702
+ Ivar: "i\u02D0v\u0251\u027E",
2703
+ Jammer: "j\u0251m\u025B\u027E",
2704
+ Jensen: "j\u025Bns\u0259n",
2705
+ jordegods: "ju\u02D0\u027E\u0259\u0261\u0254ts",
2706
+ juletr\u00E6et: "j\u0289\u02D0l\u0259t\u027E\u025B\u02D0\u0259",
2707
+ just: "j\u0289st",
2708
+ kan: "k\u0251n",
2709
+ kanske: "k\u0251n\u0283\u0259",
2710
+ Kj\u00F8ttmeisene: "\xE7\xF8tm\xE6\u026As\u0259n\u0259",
2711
+ kold: "k\u0254l",
2712
+ kom: "k\u0254m",
2713
+ Kongeriket: "k\u0254\u014B\u0259\u027Ei\u02D0k\u0259",
2714
+ Kringsatt: "k\u027E\u026A\u014Bs\u0251t",
2715
+ Kristiania: "k\u027E\u026Ast\u026A\u0251\u02D0n\u026A\u0251",
2716
+ kristne: "k\u027E\u026Astn\u0259",
2717
+ Landets: "l\u0251n\u0259ts",
2718
+ Lavrans: "l\u0251\u02D0v\u027E\u0251ns",
2719
+ lektor: "l\u025Bkt\u0254\u027E",
2720
+ lig: "li\u02D0\u0261",
2721
+ m\u00E5: "mo\u02D0",
2722
+ maa: "mo\u02D0",
2723
+ Mandemagt: "m\u0251n\u02D0\u0259m\u0251kt",
2724
+ menneskerettighetene: "m\u025Bn\u02D0\u0259sk\u0259\u027E\u025Bt\u02D0\u026A\u0261he\u02D0t\u0259n\u0259",
2725
+ menneskerettigheter: "m\u025Bn\u02D0\u0259sk\u0259\u027E\u025Bt\u02D0\u026A\u0261he\u02D0t\u0259\u027E",
2726
+ menneskeverd: "m\u025Bn\u02D0\u0259sk\u0259ve\u02D0\u027Ed",
2727
+ mig: "m\u0251j",
2728
+ mod: "mo\u02D0d",
2729
+ monarkisk: "m\u0254n\u0251\u027Ek\u026Ask",
2730
+ Muld: "m\u0289l",
2731
+ Nej: "n\u0251j",
2732
+ noget: "no\u02D0\u0259",
2733
+ n\u00F8gne: "n\xF8jn\u0259",
2734
+ Norge: "n\u0254\u027E\u0261\u0259",
2735
+ nu: "n\u0289\u02D0",
2736
+ nysn\u00F8en: "ny\u02D0sn\xF8\u02D0\u0259n",
2737
+ \u00F8jne: "\u0254jn\u0259",
2738
+ op: "\u0254p",
2739
+ Peer: "pe\u02D0\u027E",
2740
+ Plads: "pl\u0251s",
2741
+ proppenerer: "p\u027E\u0254p\u0259ne\u02D0\u027E\u0259\u027E",
2742
+ Puslinger: "p\u0289\u02D0sl\u026A\u014B\u0259\u027E",
2743
+ raaber: "\u027Eo\u02D0b\u0259\u027E",
2744
+ R\u00E6kke: "\u027E\u025Bk\u0259",
2745
+ r\u00E6kker: "\u027E\u025Bk\u02D0\u0259\u027E",
2746
+ Ragnfrid: "\u027E\u0251\u014Bnf\u027Ei\u02D0d",
2747
+ regjeringsform: "\u027Eeje\u02D0\u027E\u026A\u014Bsf\u0254\u027Em",
2748
+ rettsstaten: "\u027E\u025Bt\u02D0s\u02D0t\u0251\u02D0t\u0259n",
2749
+ Rosentinter: "\u027Eu\u02D0s\u0259nt\u026Ant\u025B\u027E",
2750
+ rummeligt: "\u027E\u0289m\u02D0\u0259l\u026A\u0261t",
2751
+ Ryg: "\u027E\u028F\u0261",
2752
+ saganatt: "s\u0251\u02D0\u0261\u0251n\u0251t",
2753
+ samvittighet: "s\u0251mv\u026At\u02D0\u026A\u0261he\u02D0t",
2754
+ sidste: "s\u026Ast\u0259",
2755
+ siger: "si\u02D0\u0259\u027E",
2756
+ skabt: "sk\u0251pt",
2757
+ skal: "sk\u0251l",
2758
+ Skar: "sk\u0251\u02D0\u027E",
2759
+ skj\u00E6ms: "\u0283\u025Bms",
2760
+ Skodden: "sk\u0254d\u02D0\u0259n",
2761
+ Skydsskiftet: "\u0283\u028Ftssk\u026Aft\u0259",
2762
+ slig: "sli\u02D0\u0261",
2763
+ smaa: "smo\u02D0",
2764
+ Snees: "sne\u02D0s",
2765
+ Sneskavler: "sne\u02D0sk\u0251\u02D0vl\u025B\u027E",
2766
+ S\u00F8rby: "s\xF8\u02D0\u027Eby\u02D0",
2767
+ sp\u00F8r: "sp\xF8\u02D0\u027E",
2768
+ steget: "ste\u02D0\u0261\u0259",
2769
+ stundom: "st\u0289nd\u0254m",
2770
+ stygt: "st\u028Fkt",
2771
+ Sundbu: "s\u0289nb\u0289\u02D0",
2772
+ Syd: "sy\u02D0d",
2773
+ tabt: "t\u0251pt",
2774
+ Tant: "t\u0251nt",
2775
+ Terje: "t\u025B\u027Ej\u0259",
2776
+ T\u00F8v: "t\xF8\u02D0v",
2777
+ Tvi: "tvi\u02D0",
2778
+ tykkner: "t\u028Fk\u02D0n\u0259\u027E",
2779
+ uavhendelig: "\u0289\u02D0\u0251vh\u025Bnd\u0259l\u026A\u0261",
2780
+ udekket: "\u0289\u02D0d\u025Bk\u02D0\u0259",
2781
+ udelelig: "\u0289\u02D0de\u02D0l\u0259\u0261",
2782
+ uden: "\u0289\u02D0d\u0259n",
2783
+ underjordisk: "\u0289n\u02D0\u0259\u027Eju\u02D0\u027Ed\u026Ask",
2784
+ v\u00E6rbitt: "v\xE6\u02D0\u027Eb\u026At",
2785
+ vejr: "ve\u02D0\u027E",
2786
+ Verdigrunnlaget: "ve\u02D0\u027Ed\u026A\u0261\u027E\u0289n\u02D0l\u0251\u02D0\u0261\u0259",
2787
+ Vigen: "vi\u02D0\u0261\u0259n",
2788
+ vil: "v\u026Al",
2789
+ Vindvet: "v\u026Andv\u0259",
2790
+ visst: "v\u026Ast",
2791
+ Vorherres: "vo\u02D0\u027Eh\u025B\u027E\u0259s",
2792
+ yderste: "y\u02D0d\u0259\u027Est\u0259"
2793
+ };
2794
+
2795
+ // src/overrides/nl.ts
2796
+ var nl = {
2797
+ aandelen: "/\u02C8a\u02D0nd\u0259l\u0259n/",
2798
+ achterhaalt: "/\u02C8\u0251xt\u0259rha\u02D0lt/",
2799
+ ademde: "/\u02C8a\u02D0d\u0259md\u0259/",
2800
+ // past tense: breathed
2801
+ alchemie: "/\u0251lx\u0259\u02C8mi/",
2802
+ allen: "/\u02C8\u0251l\u0259n/",
2803
+ alsoo: "/\u0251l\u02C8so\u02D0/",
2804
+ // archaic: thus
2805
+ Anton: "/\u02C8\u0251nt\u0254n/",
2806
+ // proper name
2807
+ balkons: "/b\u0251l\u02C8k\u0254ns/",
2808
+ bange: "/\u02C8b\u0251\u014B\u0259/",
2809
+ benaeuwde: "/b\u0259\u02C8na\u02D0ud\u0259/",
2810
+ // archaic: oppressed
2811
+ // New sample overrides
2812
+ benoemd: "/b\u0259\u02C8nu\u02D0mt/",
2813
+ bevonden: "/b\u0259\u02C8v\u0254nd\u0259n/",
2814
+ bleeke: "/\u02C8ble\u02D0k\u0259/",
2815
+ // archaic: pale
2816
+ bloedroze: "/\u02C8blud\u02CCro\u02D0z\u0259/",
2817
+ boomen: "/\u02C8bo\u02D0m\u0259n/",
2818
+ // archaic: bomen (trees)
2819
+ braamstoelen: "/\u02C8bra\u02D0m\u02CCstu\u02D0l\u0259n/",
2820
+ // blackberry bushes
2821
+ bureautje: "/by\u02C8ro\u02D0tj\u0259/",
2822
+ burgery: "/b\u028Fr\u0263\u0259\u02C8r\u025Bi/",
2823
+ // archaic: burgerij
2824
+ dagelix: "/\u02C8da\u02D0\u0263\u0259l\u026Aks/",
2825
+ // archaic: dagelijks
2826
+ dagschemer: "/\u02C8d\u0251\u0263\u02CCsxe\u02D0m\u0259r/",
2827
+ dengenen: "/d\u025Bn\u02C8\u0263e\u02D0n\u0259n/",
2828
+ // archaic: those
2829
+ dese: "/\u02C8de\u02D0z\u0259/",
2830
+ // archaic: deze
2831
+ deselve: "/d\u0259\u02C8z\u025Blv\u0259/",
2832
+ // archaic: dezelfde
2833
+ dien: "/di\u02D0n/",
2834
+ // archaic dative: that
2835
+ draaide: "/\u02C8dra\u02D0id\u0259/",
2836
+ duitsen: "/\u02C8d\u0153yts\u0259n/",
2837
+ // archaic: German
2838
+ eenvouds: "/\u02C8e\u02D0nv\u0251uts/",
2839
+ // archaic: simplicity
2840
+ eert: "/e\u02D0rt/",
2841
+ egters: "/\u02C8\u025B\u0263t\u0259rs/",
2842
+ // proper name
2843
+ engelen: "/\u02C8\u025B\u014B\u0259l\u0259n/",
2844
+ erbarremt: "/\u025Br\u02C8b\u0251r\u0259mt/",
2845
+ // archaic: have mercy
2846
+ erembodegem: "/\u02C8e\u02D0r\u0259m\u02CCbo\u02D0d\u025B\u0263\u0259m/",
2847
+ erkers: "/\u02C8\u025Brk\u0259rs/",
2848
+ flauwe: "/\u02C8fl\u0251u\u0259/",
2849
+ frits: "/fr\u026Ats/",
2850
+ gekend: "/\u0263\u0259\u02C8k\u025Bnt/",
2851
+ // past participle: known
2852
+ gekomen: "/\u0263\u0259\u02C8ko\u02D0m\u0259n/",
2853
+ gekund: "/\u0263\u0259\u02C8k\u028Fnt/",
2854
+ gemeenschappelijks: "/\u0263\u0259\u02C8me\u02D0nsx\u0251p\u0259l\u0259ks/",
2855
+ // archaic genitive
2856
+ geschrey: "/\u0263\u0259\u02C8sxr\u025Bi/",
2857
+ // archaic: outcry
2858
+ gestelt: "/\u0263\u0259\u02C8st\u025Blt/",
2859
+ // archaic: placed
2860
+ geverfd: "/\u0263\u0259\u02C8v\u025Brft/",
2861
+ // painted, dyed
2862
+ gewone: "/\u0263\u0259\u02C8wo\u02D0n\u0259/",
2863
+ ghewelt: "/\u0263\u0259\u02C8w\u025Blt/",
2864
+ // archaic: violence
2865
+ godt: "/\u0263\u0254t/",
2866
+ // archaic: God
2867
+ goedkope: "/\u02C8\u0263utk\u02B0o\u02D0p\u0259/",
2868
+ graaft: "/\u0263ra\u02D0ft/",
2869
+ groeiden: "/\u02C8\u0263ru\u02D0id\u0259n/",
2870
+ grooten: "/\u02C8\u0263ro\u02D0t\u0259n/",
2871
+ // archaic: great
2872
+ Haarlem: "/\u02C8ha\u02D0rl\u0259m/",
2873
+ // city name
2874
+ had: "/h\u0251t/",
2875
+ hadden: "/\u02C8h\u0251d\u0259n/",
2876
+ hare: "/\u02C8ha\u02D0r\u0259/",
2877
+ // archaic: her
2878
+ hele: "/\u02C8he\u02D0l\u0259/",
2879
+ hemelsche: "/\u02C8he\u02D0m\u0259lsx\u0259/",
2880
+ // archaic: heavenly
2881
+ henri: "/\u0251\u0303\u02C8ri/",
2882
+ // French name
2883
+ herinner: "/h\u025B\u02C8r\u026An\u0259r/",
2884
+ hispanje: "/h\u026As\u02C8p\u0251\u0272\u0259/",
2885
+ // archaic: Spain
2886
+ hooft: "/ho\u02D0ft/",
2887
+ // archaic: hoofd
2888
+ hooren: "/\u02C8ho\u02D0r\u0259n/",
2889
+ // archaic: horen
2890
+ immense: "/\u026A\u02C8m\u025Bns\u0259/",
2891
+ inni: "/\u02C8\u026Ani/",
2892
+ // proper name (Mulisch)
2893
+ kapellekensbaan: "/ka\u02C8p\u025Bl\u0259k\u0259ns\u02CCba\u02D0n/",
2894
+ kennelick: "/\u02C8k\u025Bn\u0259l\u0259k/",
2895
+ // archaic: evident
2896
+ kraanwagentje: "/\u02C8kra\u02D0n\u02CCwa\u02D0\u0263\u0259ntj\u0259/",
2897
+ laatsten: "/\u02C8la\u02D0tst\u0259n/",
2898
+ lande: "/\u02C8l\u0251nd\u0259/",
2899
+ // archaic dative: land
2900
+ lauriergracht: "/l\u0251u\u02C8ri\u02D0r\u0263r\u0251xt/",
2901
+ lesen: "/\u02C8le\u02D0z\u0259n/",
2902
+ // archaic: lezen
2903
+ lesten: "/\u02C8l\u025Bst\u0259n/",
2904
+ // archaic: last
2905
+ louterende: "/\u02C8l\u0251ut\u0259r\u025Bnd\u0259/",
2906
+ mooiste: "/\u02C8mo\u02D0ist\u0259/",
2907
+ // superlative: prettiest
2908
+ my: "/m\u025Bi/",
2909
+ // archaic: mij
2910
+ nassouwe: "/n\u0251\u02C8s\u0251u\u0259/",
2911
+ // archaic: Nassau
2912
+ niemandsbos: "/\u02C8ni\u02D0m\u0251nts\u02CCb\u0254s/",
2913
+ // nobody's forest
2914
+ oeroeg: "/\u02C8uru\u0263/",
2915
+ // proper name (Haasse)
2916
+ ondersaten: "/\u02C8\u0254nd\u0259r\u02CCza\u02D0t\u0259n/",
2917
+ // archaic: subjects
2918
+ ontwaakte: "/\u0254nt\u02C8wa\u02D0kt\u0259/",
2919
+ onverveerd: "/\u02CC\u0254nv\u0259r\u02C8ve\u02D0rt/",
2920
+ opgerezen: "/\u02C8\u0254p\u0263\u0259\u02CCre\u02D0z\u0259n/",
2921
+ osewoudt: "/\u02C8o\u02D0z\u0259\u02CCw\u0251ut/",
2922
+ // proper name (Hermans)
2923
+ ouders: "/\u02C8\u0251ud\u0259rs/",
2924
+ philips: "/\u02C8fil\u026Aps/",
2925
+ plachten: "/\u02C8pl\u0251xt\u0259n/",
2926
+ // archaic: used to
2927
+ pleegde: "/\u02C8ple\u02D0\u0263d\u0259/",
2928
+ po\u00EBtische: "/po\u02C8e\u02D0t\u026Asx\u0259/",
2929
+ prince: "/\u02C8pr\u026Ans\u0259/",
2930
+ // archaic: prins
2931
+ prinse: "/\u02C8pr\u026Ans\u0259/",
2932
+ // archaic variant
2933
+ reed: "/re\u02D0t/",
2934
+ romans: "/ro\u02C8m\u0251ns/",
2935
+ saluyt: "/sa\u02D0\u02C8l\u0153yt/",
2936
+ // archaic: greeting
2937
+ sarphatistraat: "/s\u0251r\u02C8fa\u02D0tistra\u02D0t/",
2938
+ // street name (Nescio)
2939
+ scherpste: "/\u02C8sx\u025Brpst\u0259/",
2940
+ schilderskade: "/\u02C8sx\u026Ald\u0259rs\u02CCka\u02D0d\u0259/",
2941
+ // street name
2942
+ Seynaeve: "/\u02C8s\u025Bina\u02D0v\u0259/",
2943
+ // Belgian proper name
2944
+ sien: "/si\u02D0n/",
2945
+ // archaic: zien
2946
+ smalle: "/\u02C8sm\u0251l\u0259/",
2947
+ // inflected: narrow
2948
+ spoorwegzate: "/\u02C8spo\u02D0r\u02CCw\u025Bxza\u02D0t\u0259/",
2949
+ // archaic: railway junction
2950
+ Steenwijk: "/\u02C8ste\u02D0n\u028B\u025Bik/",
2951
+ // place name
2952
+ stonden: "/\u02C8st\u0254nd\u0259n/",
2953
+ stopte: "/\u02C8st\u0254pt\u0259/",
2954
+ tamarindeboomen: "/ta\u02D0ma\u02D0\u02C8r\u026And\u0259\u02CCbo\u02D0m\u0259n/",
2955
+ // archaic: tamarind trees
2956
+ tegenwoordighe: "/\u02C8te\u02D0\u0263\u0259n\u02CCwo\u02D0rd\u0259\u0263\u0259/",
2957
+ // archaic
2958
+ terechtgekomen: "/t\u0259\u02C8r\u025Bxt\u0263\u0259\u02CCko\u02D0m\u0259n/",
2959
+ termurenlaan: "/t\u025Br\u02C8my\u02D0r\u0259n\u02CCla\u02D0n/",
2960
+ terugdenk: "/t\u0259\u02C8r\u028Fxd\u025B\u014Bk/",
2961
+ // compound: think back
2962
+ torentjes: "/\u02C8to\u02D0r\u0259ntj\u0259s/",
2963
+ toverplaatjes: "/\u02C8to\u02D0v\u0259r\u02CCpla\u02D0tj\u0259s/",
2964
+ // magic pictures
2965
+ tracht: "/tr\u0251xt/",
2966
+ // verb: try
2967
+ tragische: "/\u02C8tra\u02D0\u0263\u026Asx\u0259/",
2968
+ tusschen: "/\u02C8t\u028Fsx\u0259n/",
2969
+ // archaic: tussen (between)
2970
+ velden: "/\u02C8v\u025Bld\u0259n/",
2971
+ vermoordde: "/v\u0259r\u02C8mo\u02D0rd\u0259/",
2972
+ verschijnt: "/v\u0259r\u02C8sx\u025Bint/",
2973
+ // appears
2974
+ vlamde: "/\u02C8vl\u0251md\u0259/",
2975
+ vloog: "/vlo\u02D0x/",
2976
+ volcx: "/v\u0254lks/",
2977
+ // archaic: volks
2978
+ volle: "/\u02C8v\u0254l\u0259/",
2979
+ voorschoten: "/\u02C8vo\u02D0rsxo\u02D0t\u0259n/",
2980
+ // place name
2981
+ voorzomermiddag: "/\u02C8vo\u02D0r\u02CCzo\u02D0m\u0259r\u02CCm\u026Ad\u0251x/",
2982
+ vroege: "/\u02C8vru\u0263\u0259/",
2983
+ warme: "/\u02C8v\u0251rm\u0259/",
2984
+ weerd: "/we\u02D0rt/",
2985
+ // archaic: waard
2986
+ weggegaan: "/\u02C8v\u025Bx\u0263\u0259\u02CC\u0263a\u02D0n/",
2987
+ werd: "/w\u025Brt/",
2988
+ werkmenschen: "/\u02C8w\u025Brk\u02CCm\u025Bnsx\u0259n/",
2989
+ // archaic: working people
2990
+ "west-java": "/\u02CCw\u025Bst\u02C8ja\u02D0va/",
2991
+ wintrop: "/\u02C8v\u026Antr\u0254p/",
2992
+ // proper name (Mulisch)
2993
+ wonderlijker: "/\u02C8\u028B\u0254nd\u0259rl\u025Bik\u0259r/",
2994
+ // comparative: more wonderful
2995
+ woonde: "/\u02C8wo\u02D0nd\u0259/",
2996
+ wordt: "/w\u0254rt/",
2997
+ yegelick: "/\u02C8je\u02D0\u0263\u0259l\u026Ak/",
2998
+ // archaic: iedereen
2999
+ zakkende: "/\u02C8z\u0251k\u0259nd\u0259/",
3000
+ // sinking
3001
+ zekere: "/\u02C8ze\u02D0k\u0259r\u0259/",
3002
+ // a certain
3003
+ zijne: "/\u02C8z\u025Bin\u0259/",
3004
+ // archaic: zijn
3005
+ zocht: "/z\u0254xt/",
3006
+ zuivere: "/\u02C8z\u0153yv\u0259r\u0259/",
3007
+ // pure (inflected)
3008
+ zulke: "/\u02C8z\u028Flk\u0259/"
3009
+ };
3010
+
3011
+ // src/overrides/or.ts
3012
+ var or_ = {
3013
+ // Bande Utkala Janani (Odia anthem)
3014
+ \u0B05\u0B19\u0B4D\u0B17\u0B47: "\u0254\u014B\u0261e",
3015
+ // Jagannath Das — Odia Bhagabata
3016
+ \u0B05\u0B28\u0B3E\u0B26\u0B3F: "\u0254na\u02D0d\u032Ai",
3017
+ \u0B05\u0B2A\u0B4D\u0B30\u0B2E\u0B3F\u0B24: "\u0254p\u027E\u0254mit\u032A\u0254",
3018
+ \u0B05\u0B30\u0B4D\u0B25\u0B47: "\u0254\u027Et\u032A\u02B0e",
3019
+ \u0B05\u0B36\u0B47\u0B37: "\u0254\u0283e\u0282\u0254",
3020
+ \u0B06\u0B17\u0B41\u0B01: "a\u02D0\u0261\u0169",
3021
+ \u0B06\u0B24\u0B4D\u0B2E\u0B40\u0B5F: "a\u02D0t\u032Ami\u02D0j\u0254",
3022
+ \u0B06\u0B30\u0B24: "a\u02D0\u027E\u0254t\u032A\u0254",
3023
+ // Gangadhar Meher — Tapaswinee
3024
+ \u0B06\u0B39\u0B4D\u0B32\u0B3E\u0B26: "a\u02D0hl\u032Aa\u02D0d\u032A\u0254",
3025
+ \u0B07\u0B28\u0B4D\u0B26\u0B4D\u0B30\u0B28\u0B40\u0B33: "ind\u032A\u027E\u0254ni\u02D0l\u032A\u0254",
3026
+ \u0B09\u0B24\u0B4D\u0B15\u0B33\u0B2D\u0B41\u0B2C\u0B28\u0B47: "ut\u032Ak\u0254l\u032A\u0254b\u02B1ub\u0254ne",
3027
+ \u0B09\u0B24\u0B4D\u0B15\u0B33\u0B30: "ut\u032Ak\u0254l\u032A\u0254\u027E\u0254",
3028
+ // Upendra Bhanja — Baidehisha Bilasa
3029
+ \u0B09\u0B26\u0B4D\u0B2D\u0B2C: "ud\u032Ab\u02B1\u0254b\u0254",
3030
+ \u0B0F\u0B23\u0B47: "e\u0273e",
3031
+ \u0B14\u0B30\u0B38\u0B41: "\u0254w\u027E\u0254su",
3032
+ // Rangabati (folk song)
3033
+ \u0B15\u0B28\u0B15\u0B32\u0B24\u0B3E: "k\u0254n\u0254k\u0254l\u032A\u0254t\u032Aa\u02D0",
3034
+ \u0B15\u0B33: "k\u0254l\u032A\u0254",
3035
+ \u0B15\u0B39\u0B32\u0B4B: "k\u0254h\u0254l\u032Ao\u02D0",
3036
+ \u0B15\u0B3E\u0B24\u0B30: "ka\u02D0t\u032A\u0254\u027E\u0254",
3037
+ // Fakir Mohan Senapati — Chha Mana Atha Guntha
3038
+ \u0B15\u0B3E\u0B30\u0B2C\u0B3E\u0B30: "ka\u02D0\u027E\u0254ba\u02D0\u027E\u0254",
3039
+ \u0B15\u0B3E\u0B30\u0B3E\u0B26\u0B23\u0B4D\u0B21\u0B47: "ka\u02D0\u027Ea\u02D0d\u032A\u0254\u0273\u0256e",
3040
+ \u0B15\u0B3F\u0B2E\u0B4D\u0B2A\u0B3E: "kimpa\u02D0",
3041
+ \u0B15\u0B43\u0B37\u0B4D\u0B23\u0B2A\u0B3E\u0B26: "k\u027Eu\u0282\u0273\u0254pa\u02D0d\u032A\u0254",
3042
+ \u0B15\u0B47\u0B2C\u0B3E: "keba\u02D0",
3043
+ \u0B15\u0B47\u0B36\u0B3E: "ke\u0283a\u02D0",
3044
+ \u0B15\u0B4B\u0B36: "ko\u02D0\u0283\u0254",
3045
+ // Madhusudan Das — Utkala Santana
3046
+ \u0B17\u0B19\u0B4D\u0B17\u0B3E: "\u0261\u0254\u014B\u0261a\u02D0",
3047
+ \u0B17\u0B33\u0B41\u0B1B\u0B3F: "\u0261\u0254l\u032Aut\u0361\u0283\u02B0i",
3048
+ \u0B17\u0B40\u0B24\u0B28\u0B3E\u0B26: "\u0261i\u02D0t\u032A\u0254na\u02D0d\u032A\u0254",
3049
+ \u0B17\u0B41\u0B23\u0B3E\u0B33\u0B5F: "\u0261u\u0273a\u02D0l\u032A\u0254j\u0254",
3050
+ \u0B17\u0B41\u0B01\u0B25\u0B3E: "\u0261\u0169t\u032A\u02B0a\u02D0",
3051
+ \u0B17\u0B4B: "\u0261o\u02D0",
3052
+ \u0B17\u0B4B\u0B26\u0B3E\u0B2C\u0B30\u0B40: "\u0261o\u02D0d\u032Aa\u02D0b\u0254\u027Ei\u02D0",
3053
+ \u0B18\u0B1F\u0B47: "\u0261\u02B1\u0254\u0288e",
3054
+ \u0B18\u0B41\u0B2E\u0B41\u0B38\u0B30\u0B05\u0B27\u0B3F\u0B2A: "\u0261\u02B1umus\u0254\u027E\u0254d\u032A\u02B1ip\u0254",
3055
+ \u0B1A\u0B33\u0B47: "t\u0361\u0283\u0254l\u032Ae",
3056
+ \u0B1A\u0B33\u0B47\u0B28\u0B3E\u0B39\u0B3F\u0B01: "t\u0361\u0283\u0254l\u032Aena\u02D0h\u0129",
3057
+ \u0B1A\u0B3E\u0B30\u0B41: "t\u0361\u0283a\u02D0\u027Eu",
3058
+ \u0B1A\u0B3E\u0B32\u0B3F\u0B2F\u0B3E\u0B06\u0B28\u0B4D\u0B24\u0B41: "t\u0361\u0283a\u02D0l\u032Aija\u02D0a\u02D0nt\u032Au",
3059
+ \u0B1C\u0B17\u0B28\u0B4D\u0B28\u0B3E\u0B25\u0B47: "d\u0361\u0292\u0254\u0261\u0254nna\u02D0t\u032A\u02B0e",
3060
+ \u0B1C\u0B28\u0B4D\u0B2E\u0B15\u0B3E\u0B33\u0B30\u0B41: "d\u0361\u0292\u0254nm\u0254ka\u02D0l\u032A\u0254\u027Eu",
3061
+ \u0B1C\u0B2E\u0B3F\u0B26\u0B3E\u0B30: "d\u0361\u0292\u0254mid\u032Aa\u02D0\u027E\u0254",
3062
+ \u0B1C\u0B3F\u0B24: "d\u0361\u0292it\u032A\u0254",
3063
+ \u0B1C\u0B4D\u0B5F\u0B4B\u0B24\u0B3F\u0B30\u0B4D\u0B2E\u0B5F\u0B40: "d\u0361\u0292jo\u02D0t\u032Ai\u027Em\u0254ji\u02D0",
3064
+ \u0B24\u0B26\u0B4D\u0B27\u0B24: "t\u032A\u0254d\u032Ad\u032A\u02B1\u0254t\u032A\u0254",
3065
+ \u0B24\u0B28\u0B41\u0B15\u0B3E\u0B28\u0B4D\u0B24\u0B3F: "t\u032A\u0254nuka\u02D0nt\u032Ai",
3066
+ \u0B24\u0B30\u0B19\u0B4D\u0B17\u0B47: "t\u032A\u0254\u027E\u0254\u014B\u0261e",
3067
+ \u0B24\u0B30\u0B3F\u0B32\u0B47: "t\u032A\u0254\u027Eil\u032Ae",
3068
+ \u0B24\u0B3E\u0B17\u0B47: "t\u032Aa\u02D0\u0261e",
3069
+ \u0B24\u0B41\u0B39\u0B3F: "t\u032Auhi",
3070
+ \u0B24\u0B4B\u0B39\u0B30: "t\u032Ao\u02D0h\u0254\u027E\u0254",
3071
+ // Fakir Mohan Senapati — Odia Kahiki Daridra
3072
+ \u0B26\u0B30\u0B3F\u0B26\u0B4D\u0B30\u0B24\u0B3E: "d\u032A\u0254\u027Eid\u032A\u027E\u0254t\u032Aa\u02D0",
3073
+ \u0B26\u0B40\u0B30\u0B4D\u0B18\u0B3F\u0B15\u0B3E: "d\u032Ai\u02D0\u027E\u0261\u02B1ika\u02D0",
3074
+ \u0B26\u0B40\u0B30\u0B4D\u0B18\u0B47: "d\u032Ai\u02D0\u027E\u0261\u02B1e",
3075
+ \u0B26\u0B41\u0B03\u0B16: "d\u032Auk\u02B1\u0254",
3076
+ \u0B26\u0B41\u0B30\u0B4D\u0B17\u0B24\u0B3F\u0B30: "d\u032Au\u027E\u0261\u0254t\u032Ai\u027E\u0254",
3077
+ \u0B26\u0B47\u0B36\u0B2C\u0B3E\u0B38\u0B40: "d\u032Ae\u0283\u0254ba\u02D0si\u02D0",
3078
+ \u0B26\u0B47\u0B36\u0B2E\u0B3E\u0B1F\u0B3F\u0B30\u0B47: "d\u032Ae\u0283\u0254ma\u02D0\u0288i\u027Ee",
3079
+ \u0B26\u0B4D\u0B5F\u0B41\u0B24\u0B3F: "d\u032Ajut\u032Ai",
3080
+ \u0B27\u0B28\u0B1E\u0B4D\u0B1C\u0B5F: "d\u032A\u02B1\u0254n\u0254\u0272d\u0361\u0292\u0254j\u0254",
3081
+ \u0B27\u0B3E\u0B28\u0B30: "d\u032A\u02B1a\u02D0n\u0254\u027E\u0254",
3082
+ \u0B28: "n\u0254",
3083
+ \u0B28\u0B17\u0B26: "n\u0254\u0261\u0254d\u032A\u0254",
3084
+ \u0B28\u0B28\u0B4D\u0B26\u0B28: "n\u0254nd\u032A\u0254n\u0254",
3085
+ \u0B28\u0B2E\u0B07\u0B01: "n\u0254m\u0254\u0129",
3086
+ \u0B28\u0B30\u0B4D\u0B15\u0B47: "n\u0254\u027Eke",
3087
+ \u0B28\u0B3E\u0B30\u0B40\u0B19\u0B4D\u0B15: "na\u02D0\u027Ei\u02D0\u014Bk\u0254",
3088
+ \u0B28\u0B3F\u0B30\u0B28\u0B4D\u0B24\u0B30\u0B47: "ni\u027E\u0254nt\u032A\u0254\u027Ee",
3089
+ \u0B28\u0B3F\u0B39\u0B3F\u0B24: "nihit\u032A\u0254",
3090
+ \u0B28\u0B40\u0B33\u0B3E\u0B2E\u0B4D\u0B2C\u0B41: "ni\u02D0l\u032Aa\u02D0mbu",
3091
+ \u0B28\u0B43\u0B2A: "n\u027Eup\u0254",
3092
+ \u0B2A\u0B21\u0B3C\u0B3F\u0B25\u0B3E\u0B09: "p\u0254\u0256\u032Ait\u032A\u02B0a\u02D0u",
3093
+ \u0B2A\u0B26\u0B41: "p\u0254d\u032Au",
3094
+ \u0B2A\u0B26\u0B47: "p\u0254d\u032Ae",
3095
+ \u0B2A\u0B5F\u0B30\u0B47: "p\u0254j\u0254\u027Ee",
3096
+ \u0B2A\u0B30\u0B3E: "p\u0254\u027Ea\u02D0",
3097
+ \u0B2A\u0B42\u0B30\u0B4D\u0B2C\u0B2A\u0B41\u0B30\u0B41\u0B37: "pu\u02D0\u027Eb\u0254pu\u027Eu\u0282\u0254",
3098
+ \u0B2A\u0B4B\u0B37\u0B23: "po\u02D0\u0282\u0254\u0273\u0254",
3099
+ \u0B2A\u0B4D\u0B30\u0B1C\u0B4D\u0B1E\u0B3E: "p\u027E\u0254d\u0361\u0292\u0272a\u02D0",
3100
+ \u0B2A\u0B4D\u0B30\u0B2C\u0B28\u0B4D\u0B27\u0B47: "p\u027E\u0254b\u0254nd\u032A\u02B1e",
3101
+ \u0B2A\u0B4D\u0B30\u0B3E\u0B23\u0B40\u0B2E\u0B3E\u0B28\u0B19\u0B4D\u0B15: "p\u027Ea\u02D0\u0273i\u02D0ma\u02D0n\u0254\u014Bk\u0254",
3102
+ \u0B2C\u0B28\u0B2D\u0B42\u0B2E\u0B3F: "b\u0254n\u0254b\u02B1u\u02D0mi",
3103
+ \u0B2C\u0B28\u0B4D\u0B26\u0B28: "b\u0254nd\u032A\u0254n\u0254",
3104
+ \u0B2C\u0B28\u0B4D\u0B26\u0B47: "b\u0254nd\u032Ae",
3105
+ \u0B2C\u0B30\u0B39\u0B3F\u0B2C\u0B02\u0B36\u0B47: "b\u0254\u027E\u0254hib\u0254\u014B\u0283e",
3106
+ \u0B2C\u0B30\u0B4D\u0B23\u0B4D\u0B23\u0B07: "b\u0254\u027E\u0273\u0273\u0254i",
3107
+ \u0B2C\u0B3F\u0B1A\u0B3E\u0B30\u0B47: "bit\u0361\u0283a\u02D0\u027Ee",
3108
+ \u0B2C\u0B3F\u0B28\u0B41: "binu",
3109
+ \u0B2C\u0B3F\u0B2C\u0B47\u0B15: "bibek",
3110
+ \u0B2C\u0B3F\u0B33\u0B3E\u0B38: "bil\u032Aa\u02D0s\u0254",
3111
+ \u0B2C\u0B3F\u0B36\u0B3F\u0B37\u0B4D\u0B1F\u0B47: "bi\u0283i\u0282\u0288e",
3112
+ \u0B2C\u0B3F\u0B39\u0B19\u0B4D\u0B17\u0B47: "bih\u0254\u014B\u0261e",
3113
+ \u0B2C\u0B47\u0B28\u0B3F: "beni",
3114
+ \u0B2C\u0B47\u0B36\u0B3E: "be\u0283a\u02D0",
3115
+ \u0B2C\u0B4B\u0B32\u0B3E\u0B09: "bo\u02D0l\u032Aa\u02D0u",
3116
+ \u0B2C\u0B4D\u0B5F\u0B25\u0B3E: "bj\u0254t\u032A\u02B0a\u02D0",
3117
+ \u0B2C\u0B4D\u0B30\u0B1C: "b\u027E\u0254d\u0361\u0292\u0254",
3118
+ \u0B2D\u0B23\u0B4D\u0B21\u0B3E\u0B30: "b\u02B1\u0254\u0273\u0256a\u02D0\u027E\u0254",
3119
+ \u0B2D\u0B3E\u0B24\u0B43\u0B2D\u0B3E\u0B2C: "b\u02B1a\u02D0t\u032A\u027Eub\u02B1a\u02D0b\u0254",
3120
+ \u0B2D\u0B3E\u0B37\u0B2E\u0B5F\u0B40: "b\u02B1a\u02D0\u0282\u0254m\u0254ji\u02D0",
3121
+ \u0B2D\u0B40\u0B30\u0B41: "b\u02B1i\u02D0\u027Eu",
3122
+ \u0B2D\u0B42\u0B27\u0B30\u0B2E\u0B3E\u0B33\u0B3E: "b\u02B1u\u02D0d\u032A\u02B1\u0254\u027E\u0254ma\u02D0l\u032Aa\u02D0",
3123
+ \u0B2D\u0B47\u0B26\u0B3F: "b\u02B1ed\u032Ai",
3124
+ \u0B2E\u0B15\u0B30\u0B28\u0B4D\u0B26: "m\u0254k\u0254\u027E\u0254nd\u032A\u0254",
3125
+ \u0B2E\u0B19\u0B4D\u0B17\u0B30\u0B3E\u0B1C: "m\u0254\u014B\u0261\u0254\u027Ea\u02D0d\u0361\u0292\u0254",
3126
+ \u0B2E\u0B28\u0B30\u0B47: "m\u0254n\u0254\u027Ee",
3127
+ \u0B2E\u0B28\u0B4B\u0B39\u0B30: "m\u0254no\u02D0h\u0254\u027E\u0254",
3128
+ \u0B2E\u0B2B\u0B38\u0B32\u0B30: "m\u0254p\u02B0\u0254s\u0254l\u032A\u0254\u027E\u0254",
3129
+ \u0B2E\u0B30\u0B3E\u0B33\u0B2E\u0B3E\u0B33\u0B3F\u0B28\u0B40: "m\u0254\u027Ea\u02D0l\u032A\u0254ma\u02D0l\u032Aini\u02D0",
3130
+ \u0B2E\u0B30\u0B4D\u0B2F\u0B4D\u0B5F\u0B3E\u0B26\u0B3E: "m\u0254\u027Ed\u0292ja\u02D0d\u032Aa\u02D0",
3131
+ \u0B2E\u0B39\u0B3E\u0B1C\u0B28: "m\u0254ha\u02D0d\u0361\u0292\u0254n\u0254",
3132
+ \u0B2E\u0B39\u0B3E\u0B1C\u0B28\u0B40: "m\u0254ha\u02D0d\u0361\u0292\u0254ni\u02D0",
3133
+ \u0B2E\u0B3E\u0B32\u0B47: "ma\u02D0l\u032Ae",
3134
+ \u0B2E\u0B3F\u0B36\u0B41: "mi\u0283u",
3135
+ \u0B2E\u0B41\u0B16\u0B30\u0B3F\u0B24: "muk\u02B1\u0254\u027Eit\u032A\u0254",
3136
+ \u0B30\u0B19\u0B4D\u0B17\u0B2C\u0B24\u0B40: "\u027E\u0254\u014B\u0261\u0254b\u0254t\u032Ai\u02D0",
3137
+ \u0B30\u0B1C\u0B3E: "\u027E\u0254d\u0361\u0292a\u02D0",
3138
+ \u0B30\u0B3E\u0B1C\u0B3F\u0B24: "\u027Ea\u02D0d\u0361\u0292it\u032A\u0254",
3139
+ \u0B30\u0B3E\u0B28\u0B3F: "\u027Ea\u02D0ni",
3140
+ \u0B30\u0B4B\u0B26\u0B28: "\u027Eo\u02D0d\u032A\u0254n\u0254",
3141
+ \u0B36\u0B41\u0B23\u0B3E\u0B2F\u0B3E\u0B0F: "\u0283u\u0273a\u02D0ja\u02D0e",
3142
+ \u0B36\u0B41\u0B26\u0B4D\u0B27: "\u0283ud\u032Ad\u032A\u02B1\u0254",
3143
+ \u0B36\u0B41\u0B2D: "\u0283ub\u02B1\u0254",
3144
+ \u0B36\u0B41\u0B2D\u0B4D\u0B30: "\u0283ub\u02B1\u027E\u0254",
3145
+ \u0B36\u0B4B\u0B2D\u0B3E\u0B30: "\u0283ob\u02B1a\u02D0\u027E\u0254",
3146
+ \u0B38\u0B2A\u0B28: "s\u0254p\u0254n\u0254",
3147
+ \u0B38\u0B39\u0B41: "s\u0254hu",
3148
+ \u0B38\u0B3E\u0B1C\u0B47: "sa\u02D0d\u0361\u0292e",
3149
+ \u0B38\u0B41\u0B24\u0B3E: "sut\u032Aa\u02D0",
3150
+ \u0B38\u0B41\u0B2A\u0B4D\u0B30\u0B15\u0B3E\u0B36: "sup\u027E\u0254ka\u02D0\u0283\u0254",
3151
+ \u0B38\u0B47\u0B2E\u0B3E\u0B28\u0B19\u0B4D\u0B15\u0B20\u0B3E\u0B30\u0B47: "sema\u02D0n\u0254\u014Bk\u0254\u0288\u02B0a\u02D0\u027Ee",
3152
+ \u0B38\u0B4B\u0B26\u0B30: "so\u02D0d\u032A\u0254\u027E\u0254",
3153
+ \u0B38\u0B4D\u0B2B\u0B41\u0B30\u0B4D\u0B24\u0B4D\u0B24\u0B3F: "sp\u02B0u\u027Et\u032At\u032Ai",
3154
+ "\u0B38\u0B4D\u0B71\u0B3E\u0B27\u0B40\u0B28": "swa\u02D0d\u032A\u02B1i\u02D0n\u0254",
3155
+ \u0B39\u0B3E\u0B38\u0B2E\u0B5F\u0B40: "ha\u02D0s\u0254m\u0254ji\u02D0",
3156
+ \u0B39\u0B3F\u0B24\u0B47: "hit\u032Ae",
3157
+ \u0B39\u0B43\u0B26\u0B5F\u0B30\u0B47: "h\u027Eud\u032A\u0254j\u0254\u027Ee",
3158
+ \u0B39\u0B43\u0B26\u0B47: "h\u027Eud\u032Ae",
3159
+ \u0B39\u0B47\u0B32: "hel\u032A\u0254",
3160
+ \u0B39\u0B47\u0B32\u0B3E\u0B24: "hel\u032Aa\u02D0t\u032A\u0254"
3161
+ };
3162
+
3163
+ // src/overrides/pt.ts
3164
+ var pt = {
3165
+ a: "/a/",
3166
+ agora: "/a\u02C8\u0261o\u027Ea/",
3167
+ ainda: "/a\u02C8\u0129da/",
3168
+ alto: "/\u02C8awtu/",
3169
+ alvejei: "/awve\u02C8\u0292ej/",
3170
+ antes: "/\u02C8\xE3t\u0283is/",
3171
+ // New sample overrides
3172
+ Antigamente: "/\xE3t\u0283i\u0261a\u02C8m\u1EBDt\u0283i/",
3173
+ aparecesse: "/apa\u027Ee\u02C8sesi/",
3174
+ assembleia: "/as\u1EBD\u02C8bleja/",
3175
+ autom\u00F3veis: "/awto\u02C8m\u0254vejs/",
3176
+ autor: "/aw\u02C8to\u027E/",
3177
+ aventura: "/av\u1EBD\u02C8tu\u027Ea/",
3178
+ bater: "/ba\u02C8tex/",
3179
+ beija: "/\u02C8bej\u0292a/",
3180
+ bem: "/b\u1EBDj/",
3181
+ calmo: "/\u02C8kawmu/",
3182
+ cama: "/\u02C8k\xE3ma/",
3183
+ certa: "/\u02C8s\u025B\u027Eta/",
3184
+ chamam: "/\u02C8\u0283\xE3m\xE3w/",
3185
+ como: "/\u02C8komu/",
3186
+ contraparente: "/k\xF5t\u027Eapa\u02C8\u027E\u1EBDt\u0283i/",
3187
+ costume: "/kos\u02C8t\u0169mi/",
3188
+ cotovelo: "/koto\u02C8velu/",
3189
+ cruzarmos: "/k\u027Euz\u02C8a\u027Emus/",
3190
+ d: "/de/",
3191
+ // abbreviation for Dom
3192
+ de: "/d\u0292i/",
3193
+ dei: "/dej/",
3194
+ dele: "/\u02C8deli/",
3195
+ dentro: "/\u02C8d\u1EBDt\u027Eu/",
3196
+ deu: "/dew/",
3197
+ dia: "/\u02C8d\u0292ia/",
3198
+ direita: "/d\u0292i\u02C8\u027Eejta/",
3199
+ direito: "/d\u0292i\u02C8\u027Eejtu/",
3200
+ disco: "/\u02C8d\u0292isku/",
3201
+ disposto: "/d\u0292is\u02C8postu/",
3202
+ disse: "/\u02C8d\u0292isi/",
3203
+ dois: "/dojs/",
3204
+ \u00E9: "/\u025B/",
3205
+ emprenhou: "/\u1EBDp\u027Ee\u02C8\u0272ow/",
3206
+ encante: "/\u1EBD\u02C8k\xE3t\u0283i/",
3207
+ entre: "/\u02C8\u1EBDt\u027Ei/",
3208
+ escolherei: "/isko\u028Ee\u02C8\u027Eej/",
3209
+ esf\u00EDngico: "/es\u02C8f\u0129\u0292iku/",
3210
+ eterna: "/e\u02C8t\u025B\u027Ena/",
3211
+ eu: "/ew/",
3212
+ Europa: "/ew\u02C8\u027E\u0254pa/",
3213
+ face: "/\u02C8fasi/",
3214
+ faixas: "/\u02C8faj\u0283as/",
3215
+ falsa: "/\u02C8fawsa/",
3216
+ faz: "/fas/",
3217
+ fim: "/f\u0129/",
3218
+ Fita: "/\u02C8fita/",
3219
+ fita: "/\u02C8fita/",
3220
+ fitando: "/fi\u02C8t\xE3du/",
3221
+ flameja: "/fla\u02C8me\u0292a/",
3222
+ focinhando: "/fosi\u02C8\u0272\xE3du/",
3223
+ foi: "/foj/",
3224
+ fossem: "/\u02C8fos\u1EBDj/",
3225
+ fosses: "/\u02C8fosis/",
3226
+ fragorosas: "/f\u027Ea\u0261o\u02C8\u027Eozas/",
3227
+ francisco: "/f\u027E\xE3\u02C8sisku/",
3228
+ frente: "/\u02C8f\u027E\u1EBDt\u0283i/",
3229
+ f\u00FAlgidos: "/\u02C8fuw\u0292idus/",
3230
+ gente: "/\u02C8\u0292\u1EBDt\u0283i/",
3231
+ girassol: "/\u0292i\u027Ea\u02C8s\u0254w/",
3232
+ gosto: "/\u02C8\u0261ostu/",
3233
+ grandes: "/\u02C8\u0261\u027E\xE3d\u0292is/",
3234
+ havia: "/a\u02C8via/",
3235
+ hei: "/ej/",
3236
+ hesitei: "/ezi\u02C8tej/",
3237
+ homens: "/\u02C8\xF5m\u1EBDjs/",
3238
+ houve: "/\u02C8ovi/",
3239
+ individuais: "/\u0129d\u0292ividu\u02C8ajs/",
3240
+ instante: "/\u0129s\u02C8t\xE3t\u0283i/",
3241
+ instituir: "/\u0129st\u0283itu\u02C8i\u027E/",
3242
+ isso: "/\u02C8isu/",
3243
+ jos\u00E9: "/\u0292o\u02C8z\u025B/",
3244
+ liberdade: "/libe\u027E\u02C8dad\u0292i/",
3245
+ lisboa: "/liz\u02C8boa/",
3246
+ louca: "/\u02C8loka/",
3247
+ louvor: "/lo\u02C8vo\u027E/",
3248
+ lua: "/\u02C8lua/",
3249
+ maior: "/ma\u02C8jo\u027E/",
3250
+ mais: "/majs/",
3251
+ mal: "/maw/",
3252
+ margens: "/\u02C8ma\u027E\u0292\u1EBDjs/",
3253
+ maus: "/maws/",
3254
+ mesti\u00E7ara: "/mesti\u02C8sa\u027Ea/",
3255
+ m\u00E9todo: "/\u02C8m\u025Btudu/",
3256
+ mil: "/miw/",
3257
+ mim: "/m\u0129/",
3258
+ morder: "/mo\u027E\u02C8dex/",
3259
+ morte: "/\u02C8m\u0254\u027Et\u0283i/",
3260
+ morto: "/\u02C8mo\u027Etu/",
3261
+ mostra: "/\u02C8m\u0254st\u027Ea/",
3262
+ mulher: "/mu\u02C8\u028E\u025Bx/",
3263
+ nacional: "/nasio\u02C8naw/",
3264
+ nascimento: "/nasi\u02C8m\u1EBDtu/",
3265
+ nele: "/\u02C8neli/",
3266
+ nem: "/n\u1EBDj/",
3267
+ nesse: "/\u02C8nesi/",
3268
+ noite: "/\u02C8nojt\u0283i/",
3269
+ nonada: "/no\u02C8nada/",
3270
+ // nothing (Guimarães Rosa)
3271
+ nosso: "/\u02C8nosu/",
3272
+ novo: "/\u02C8novu/",
3273
+ num: "/n\u0169/",
3274
+ o: "/u/",
3275
+ \u00F3: "/\u0254/",
3276
+ ondas: "/\u02C8\xF5das/",
3277
+ onde: "/\u02C8\xF5d\u0292i/",
3278
+ ou: "/ow/",
3279
+ ousadia: "/oza\u02C8d\u0292ia/",
3280
+ outono: "/ow\u02C8tonu/",
3281
+ outra: "/\u02C8owt\u027Ea/",
3282
+ outro: "/\u02C8owt\u027Eu/",
3283
+ para: "/\u02C8pa\u027Ea/",
3284
+ parte: "/\u02C8pa\u027Et\u0283i/",
3285
+ passadeira: "/pasa\u02C8dej\u027Ea/",
3286
+ pode: "/\u02C8p\u0254d\u0292i/",
3287
+ portugal: "/po\u027Etu\u02C8\u0261aw/",
3288
+ posso: "/\u02C8posu/",
3289
+ posta: "/\u02C8p\u0254sta/",
3290
+ povo: "/\u02C8povu/",
3291
+ praia: "/\u02C8p\u027Eaja/",
3292
+ prometia: "/p\u027Eome\u02C8t\u0283ia/",
3293
+ quanto: "/\u02C8kw\xE3tu/",
3294
+ quarto: "/\u02C8kwa\u027Etu/",
3295
+ quer: "/k\u025B\u027E/",
3296
+ querer: "/ke\u02C8\u027Eex/",
3297
+ ramalhete: "/xama\u02C8\u028Eet\u0283i/",
3298
+ // mansion name (Eça de Queirós)
3299
+ real: "/xe\u02C8aw/",
3300
+ rebentavam: "/xeb\u1EBD\u02C8tav\xE3w/",
3301
+ rei: "/xej/",
3302
+ reino: "/\u02C8xejnu/",
3303
+ remota: "/xe\u02C8m\u0254ta/",
3304
+ retumbante: "/xet\u0169\u02C8b\xE3t\u0283i/",
3305
+ rir: "/xi\u027E/",
3306
+ riso: "/\u02C8xizu/",
3307
+ rosto: "/\u02C8xostu/",
3308
+ rua: "/\u02C8xua/",
3309
+ s: "/s\xE3w/",
3310
+ // abbreviation for São
3311
+ saber: "/sa\u02C8bex/",
3312
+ sal: "/saw/",
3313
+ sei: "/sej/",
3314
+ sem: "/s\u1EBDj/",
3315
+ sempre: "/\u02C8s\u1EBDp\u027Ei/",
3316
+ senhor: "/se\u02C8\u0272o\u027E/",
3317
+ sequer: "/se\u02C8k\u025B\u027E/",
3318
+ ser: "/\u02C8sex/",
3319
+ sim: "/s\u0129/",
3320
+ simples: "/\u02C8s\u0129plis/",
3321
+ Sob: "/s\u0254b/",
3322
+ sociais: "/sosi\u02C8ajs/",
3323
+ sol: "/s\u0254w/",
3324
+ sua: "/\u02C8sua/",
3325
+ sublimaram: "/subli\u02C8ma\u027E\xE3w/",
3326
+ tanto: "/\u02C8t\xE3tu/",
3327
+ taprobana: "/tap\u027Eo\u02C8b\xE3na/",
3328
+ // ancient Sri Lanka
3329
+ te: "/t\u0283i/",
3330
+ tempo: "/\u02C8t\u1EBDpu/",
3331
+ ter: "/tex/",
3332
+ tive: "/\u02C8t\u0283ivi/",
3333
+ "toldam-lhe": "/\u02C8towd\xE3w\u028Ei/",
3334
+ trapiche: "/t\u027Ea\u02C8pi\u0283i/",
3335
+ triste: "/\u02C8t\u027Eist\u0283i/",
3336
+ um: "/\u0169/",
3337
+ uma: "/\u02C8\u0169ma/",
3338
+ universo: "/uni\u02C8v\u025B\u027Esu/",
3339
+ uso: "/\u02C8uzu/",
3340
+ vale: "/\u02C8vali/",
3341
+ verde: "/\u02C8ve\u027Ed\u0292i/",
3342
+ verdes: "/\u02C8ve\u027Ed\u0292is/",
3343
+ vez: "/ves/",
3344
+ viagem: "/vi\u02C8a\u0292\u1EBDj/",
3345
+ "viv\xEA-lo": "/vi\u02C8velu/",
3346
+ "vou-me": "/\u02C8vomi/",
3347
+ zelo: "/\u02C8zelu/",
3348
+ zomba: "/\u02C8z\xF5ba/"
3349
+ };
3350
+
3351
+ // src/overrides/ro.ts
3352
+ var ro = {
3353
+ // New sample overrides
3354
+ admirabil\u0103: "/admira\u02C8bil\u0259/",
3355
+ alearg\u0103: "/a\u02C8le\u032Farg\u0259/",
3356
+ alergi: "/a\u02C8lerd\u0292\u02B2/",
3357
+ Armadia: "/ar\u02C8madja/",
3358
+ b\u0103ie\u0219a\u0219: "/b\u0259je\u02C8\u0283a\u0283/",
3359
+ "ca-ntr-un": "/kan\u02C8trun/",
3360
+ C\u00E2rlibaba: "/k\u0268rli\u02C8baba/",
3361
+ ciob\u0103nei: "/t\u0283ob\u0259\u02C8nej/",
3362
+ ciople\u0219te: "/t\u0283o\u02C8ple\u0283te/",
3363
+ cirezilor: "/t\u0283i\u02C8rezilor/",
3364
+ cl\u0103di: "/kl\u0259\u02C8d\u02B2i/",
3365
+ codrul: "/\u02C8kodrul/",
3366
+ crengi: "/krend\u0292\u02B2/",
3367
+ "croie\u0219te-\u021Bi": "/kro\u02C8je\u0283tet\u0283i/",
3368
+ curat\u0103: "/ku\u02C8rat\u0259/",
3369
+ deshame: "/des\u02C8hame/",
3370
+ desprind: "/des\u02C8prind/",
3371
+ "De\u0219teapt\u0103-te": "/de\u0283\u02C8te\u032Fapt\u0259te/",
3372
+ fecioara: "/fet\u0283o\u02C8ara/",
3373
+ ferestrelor: "/fe\u02C8restrelor/",
3374
+ furtunoas\u0103: "/furtu\u02C8no\u032Fas\u0259/",
3375
+ gem\u00E2nd: "/d\u0292e\u02C8m\u0268nd/",
3376
+ grupurile: "/\u02C8\u0261rupurile/",
3377
+ homer: "/\u02C8homer/",
3378
+ humule\u0219tii: "/humu\u02C8le\u0283ti.i/",
3379
+ \u00EEncepusem: "/\u0268nt\u0283e\u02C8pusem/",
3380
+ \u00EEntinsese: "/\u0268ntin\u02C8sese/",
3381
+ \u00EEntov\u0103r\u0103\u0219ind: "/\u0268ntov\u0259r\u0259\u02C8\u0283ind/",
3382
+ intrat\u0103: "/in\u02C8trat\u0259/",
3383
+ Jidovi\u021Ba: "/\u0292ido\u02C8vits\u0259/",
3384
+ "l-a": "/la/",
3385
+ "le-nt\xE2lnesc": "/lent\u0268l\u02C8nesk/",
3386
+ luasem: "/lu\u02C8asem/",
3387
+ lustruie\u0219te: "/lustru\u02C8je\u0283te/",
3388
+ "m\xE2n\u0103-n": "/\u02C8m\u0268n\u0259n/",
3389
+ "m\xE2ndr\u0103-n": "/\u02C8m\u0268ndr\u0259n/",
3390
+ morminte: "/mor\u02C8minte/",
3391
+ muceg\u0103it\u0103: "/mut\u0283e\u0261\u0259\u02C8it\u0259/",
3392
+ na\u021Bional\u0103: "/natsio\u02C8nal\u0259/",
3393
+ Obloanele: "/oblo\u032Fa\u02C8nele/",
3394
+ "ochiu-adormit": "/\u02C8okju ador\u02C8mit/",
3395
+ paraschi: "/para\u02C8ski/",
3396
+ p\u0103rin\u021Bilor: "/p\u0259\u02C8rintsilor/",
3397
+ "pe-ntinsele": "/pentin\u02C8sele/",
3398
+ Prahovei: "/pra\u02C8hovej/",
3399
+ r\u0103sar: "/r\u0259\u02C8sar/",
3400
+ r\u0103zuie\u0219te: "/r\u0259zu\u02C8je\u0283te/",
3401
+ r\u0103zvr\u0103tit\u0103: "/r\u0259zvr\u0259\u02C8tit\u0259/",
3402
+ sc\u00E2r\u021B\u00E2iau: "/sk\u0268rts\u0268\u02C8jaw/",
3403
+ sc\u00E2r\u021B\u00E2ie: "/sk\u0268r\u02C8ts\u0268je/",
3404
+ scoal\u0103: "/\u02C8sko\u032Fal\u0259/",
3405
+ scrumit: "/skru\u02C8mit/",
3406
+ scuipat: "/skui\u02C8pat/",
3407
+ "se-nchine": "/sen\u02C8kine/",
3408
+ sfin\u021Bi: "/sfints\u02B2/",
3409
+ sicriele: "/si\u02C8krijele/",
3410
+ socoate: "/so\u02C8ko\u032Fate/",
3411
+ Some\u0219ul: "/\u02C8some\u0283ul/",
3412
+ \u0219oseaua: "/\u0283o\u02C8se\u032Fawa/",
3413
+ stam: "/stam/",
3414
+ strivesc: "/stri\u02C8vesk/",
3415
+ sublocotenent: "/sublokote\u02C8nent/",
3416
+ t\u0103ind: "/t\u0259\u02C8ind/",
3417
+ tainele: "/\u02C8tajnele/",
3418
+ "te-ad\xE2ncir\u0103": "/tead\u0268n\u02C8t\u0283ir\u0259/",
3419
+ "toate-s": "/\u02C8to\u032Fates/",
3420
+ tr\u0103snesc: "/tr\u0259s\u02C8nesk/",
3421
+ ucid: "/u\u02C8t\u0283id/",
3422
+ uneltele: "/u\u02C8neltele/",
3423
+ v\u0103ii: "/\u02C8v\u0259ji/",
3424
+ v\u00E2nturat\u0103: "/v\u0268ntu\u02C8rat\u0259/",
3425
+ ve\u0219tm\u00E2nt: "/ve\u0283t\u02C8m\u0268nt/",
3426
+ "vijelia-ngrozitoare": "/vi\u02C8\u0292eli.a \u014B\u0261rozi\u02C8to\u032Fare/",
3427
+ "Vino-n": "/vi\u02C8non/"
3428
+ };
3429
+
3430
+ // src/overrides/sv.ts
3431
+ var sv = {
3432
+ \u00E4dlaste: "/\u02C8\u025B\u02D0dlast\u025B/",
3433
+ \u00E4ngder: "/\u02C8\u025B\u014B\u02D0d\u025Br/",
3434
+ // New entries for updated samples
3435
+ anl\u00E4nt: "/\u02C8an\u02D0l\u025Bnt/",
3436
+ arbetsrum: "/\u02C8ar\u02D0bets\u02CCr\u0275m/",
3437
+ arvedel: "/\u02C8ar\u02D0ve\u02CCde\u02D0l/",
3438
+ atenare: "/at\u025B\u02C8n\u0251\u02D0r\u025B/",
3439
+ \u00E5ttiotv\u00E5: "/\u02C8\u0254t\u02D0\u026A\u0254\u02CCtvo\u02D0/",
3440
+ \u00E5ttitalet: "/\u02C8\u0254t\u02D0\u026A\u02CCt\u0251\u02D0l\u025Bt/",
3441
+ barnkullarnas: "/\u02C8b\u0251\u02D0\u0273\u02CCk\u0275l\u02D0arnas/",
3442
+ // New sample overrides
3443
+ Birck: "/b\u026Ark/",
3444
+ b\u00F6nderna: "/\u02C8b\u0153nd\u025B\u0273a/",
3445
+ brynja: "/\u02C8bryn\u02D0ja/",
3446
+ bryt: "/bry\u02D0t/",
3447
+ d\u00E4rn\u00E4st: "/\u02C8d\xE6\u02D0\u0273\u025Bst/",
3448
+ dimmors: "/\u02C8d\u026Am\u02D0\u0254\u0282/",
3449
+ eget: "/\u02C8e\u02D0\u0261\u025Bt/",
3450
+ f\u00E5gels\u00E5ng: "/\u02C8fo\u02D0\u0261\u025Bl\u02CCs\u0254\u014B\u02D0/",
3451
+ fj\u00E4llh\u00F6ga: "/\u02C8fj\u025Bl\u02D0\u02CCh\xF8\u02D0\u0261a/",
3452
+ fjol\u00E5rets: "/\u02C8fju\u02D0l\u02CCo\u02D0r\u025Bts/",
3453
+ fornstora: "/\u02C8f\u0254\u02D0\u0273\u02CCstu\u02D0ra/",
3454
+ f\u00F6rt\u00E4tas: "/f\u0153r\u02C8t\u025B\u02D0tas/",
3455
+ fredagskv\u00E4ll: "/\u02C8fre\u02D0da\u0261s\u02CCkv\u025Bl\u02D0/",
3456
+ gamla: "/\u02C8\u0261am\u02D0la/",
3457
+ gl\u00E4djerika: "/\u02C8\u0261l\u025Bd\u02D0j\u025B\u02CCri\u02D0ka/",
3458
+ haga: "/\u02C8h\u0251\u02D0\u0261a/",
3459
+ // place name
3460
+ halvm\u00F6rkret: "/\u02C8halv\u02CCm\u0153rkr\u025Bt/",
3461
+ "hedeby-\xF6n": "/\u02C8he\u02D0d\u025Bby\u02D0\u02CC\xF8\u02D0n/",
3462
+ // place name
3463
+ henrik: "/\u02C8h\u025Bnr\u026Ak/",
3464
+ // proper name
3465
+ h\u00F6llo: "/\u02C8h\xF8l\u02D0\u0254/",
3466
+ improvisationer: "/\u026Ampr\u0254v\u026Asa\u02C8\u0267u\u02D0n\u025Br/",
3467
+ Ingmarss\u00F6nerna: "/\u02C8\u026A\u014B\u02D0mar\u02CCs\xF8\u02D0n\u025B\u0273a/",
3468
+ j\u00E4ttestaden: "/\u02C8j\u025Bt\u02D0\u025B\u02CCst\u0251\u02D0d\u025Bn/",
3469
+ kriminalkommissarie: "/kr\u026Am\u026A\u02C8n\u0251\u02D0lk\u0254m\u026A\u02CCs\u0251\u02D0r\u026A\u025B/",
3470
+ kristina: "/kr\u026A\u02C8sti\u02D0na/",
3471
+ // proper name
3472
+ kv\u00E4llsm\u00F6rkret: "/\u02C8kv\u025Bl\u02D0s\u02CCm\u0153rkr\u025Bt/",
3473
+ l\u00E5ngstrump: "/\u02C8l\u0254\u014B\u02D0\u02CCstr\u0275mp/",
3474
+ linh\u00E5rig: "/\u02C8li\u02D0n\u02CCho\u02D0r\u026A\u0261/",
3475
+ ljusl\u00E5gor: "/\u02C8j\u0289\u02D0s\u02CClo\u02D0\u0261\u0254r/",
3476
+ l\u00F6vsamlingar: "/\u02C8l\xF8\u02D0v\u02CCsam\u02D0l\u026A\u014B\u02D0ar/",
3477
+ lyfte: "/\u02C8l\u028Fft\u025B/",
3478
+ lyftes: "/\u02C8l\u028Fft\u025Bs/",
3479
+ m\u00E4nniskor: "/\u02C8m\u025Bn\u02D0\u026A\u02CC\u0267\u0254r/",
3480
+ m\u00E4nniskors: "/\u02C8m\u025Bn\u02D0\u026A\u02CC\u0267\u0254\u0282/",
3481
+ Martin: "/\u02C8mar\u02D0t\u026An/",
3482
+ m\u00E4ssfall: "/\u02C8m\u025Bs\u02D0\u02CCfal\u02D0/",
3483
+ mosebacke: "/\u02C8mu\u02D0s\u025B\u02CCbak\u02D0\u025B/",
3484
+ new: "/nju\u02D0/",
3485
+ nilsson: "/\u02C8n\u026Al\u02D0s\u0254n/",
3486
+ // proper name
3487
+ nordamerika: "/\u02C8nu\u02D0\u0256a\u02CCme\u02D0r\u026Aka/",
3488
+ numret: "/\u02C8n\u0275m\u02D0r\u025Bt/",
3489
+ nytt: "/n\u028Ft\u02D0/",
3490
+ odygd: "/\u02C8u\u02D0\u02CCdy\u0261d/",
3491
+ ofruktsamt: "/\u02C8u\u02D0\u02CCfr\u0275ktsamt/",
3492
+ \u00F6mt\u00E5ligare: "/\u02C8\xF8m\u02D0\u02CCto\u02D0l\u026A\u0261ar\u025B/",
3493
+ \u00F6ppnad: "/\u02C8\u0153p\u02D0nad/",
3494
+ orimligheter: "/u\u02D0\u02C8ri\u02D0ml\u026A\u0261\u02CChe\u02D0t\u025Br/",
3495
+ oskar: "/\u02C8\u0254skar/",
3496
+ // proper name
3497
+ page: "/\u02C8p\u0251\u02D0\u0267\u0259/",
3498
+ p\u00E4rontr\u00E4d: "/\u02C8p\xE6\u02D0r\u0254n\u02CCtr\u025B\u02D0d/",
3499
+ paulun: "/pa\u02C8l\u0289\u02D0n/",
3500
+ presentpappret: "/pr\u025B\u02C8s\u025Bnt\u02CCpap\u02D0r\u025Bt/",
3501
+ proportionerad: "/pr\u0254p\u0254\u0282\u0254\u02C8ne\u02D0rad/",
3502
+ roligast: "/\u02C8ru\u02D0l\u026A\u0261ast/",
3503
+ r\u00F6tm\u00E5nadshetta: "/\u02C8r\xF8\u02D0t\u02CCmo\u02D0nads\u02CCh\u025Bt\u02D0a/",
3504
+ runor: "/\u02C8r\u0289\u02D0n\u0254r/",
3505
+ saffransblommorna: "/\u02C8saf\u02D0rans\u02CCbl\u0254m\u02D0\u0254\u0273a/",
3506
+ scharlakanssol: "/\u02C8\u0267\u0251\u02D0rlakans\u02CCsu\u02D0l/",
3507
+ si: "/si\u02D0/",
3508
+ siljan: "/\u02C8s\u026Al\u02D0jan/",
3509
+ silverne: "/\u02C8s\u026Alv\u025B\u0273\u025B/",
3510
+ sk\u00E4ms: "/\u0267\u025Bm\u02D0s/",
3511
+ sn\u00F6dropparna: "/\u02C8sn\xF8\u02D0\u02CCdr\u0254p\u02D0a\u0273a/",
3512
+ sommarmorgon: "/\u02C8s\u0254mar\u02CCm\u0254r\u0261\u0254n/",
3513
+ spiralgalax: "/sp\u026A\u02C8r\u0251\u02D0l\u0261a\u02CClaks/",
3514
+ t\u00E4r: "/t\u025B\u02D0r/",
3515
+ telefonsamtal: "/t\u025Bl\u025B\u02C8fo\u02D0n\u02CCsam\u02CCt\u0251\u02D0l/",
3516
+ tillaga: "/t\u026Al\u02C8l\u0251\u02D0\u0261a/",
3517
+ tjugosex: "/\u02C8\u0255\u0289\u02D0\u0261\u0254\u02CCs\u025Bks/",
3518
+ upp\u00E5: "/\u02C8\u0275p\u02D0o\u02D0/",
3519
+ uppgr\u00E4vda: "/\u02C8\u0275p\u02D0\u02CC\u0261r\u025B\u02D0vda/",
3520
+ utflyttade: "/\u02C8\u0289\u02D0t\u02CCfl\u028Ft\u02D0ad\u025B/",
3521
+ utsiktspunkt: "/\u02C8\u0289\u02D0t\u02CCs\u026Akts\u02CCp\u0275\u014Bkt/",
3522
+ v\u00E4lvuxen: "/\u02C8v\u025B\u02D0l\u02CCv\u0275ks\u025Bn/",
3523
+ vanger: "/\u02C8va\u014B\u02D0\u025Br/",
3524
+ // surname
3525
+ verklighetsgrund: "/\u02C8v\u025B\u02D0kl\u026A\u0261\u02CChe\u02D0ts\u02CC\u0261r\u0275nd/",
3526
+ vilka: "/\u02C8v\u026Al\u02D0ka/",
3527
+ vingad: "/\u02C8v\u026A\u014B\u02D0ad/",
3528
+ voro: "/\u02C8vu\u02D0r\u0254/",
3529
+ york: "/j\u0254rk/",
3530
+ zephyrens: "/s\u025B\u02C8fy\u02D0r\u025Bns/"
3531
+ };
3532
+
3533
+ // src/overrides/sw.ts
3534
+ var sw = {
3535
+ // G2P handles most Swahili words. These overrides cover edge cases
3536
+ // that G2P can't handle (Arabic loanwords, foreign names).
3537
+ // Arabic loanword with "qi" — G2P doesn't have a rule for 'q'
3538
+ sadiqi: "/sa\u02C8di\u02D0ki/",
3539
+ ulithiqi: "/uli\u02C8\u03B8iki/",
3540
+ // Arabic "kh" = /x/ (voiceless velar fricative), not /kh/
3541
+ usiikhini: "/usii\u02C8xini/",
3542
+ // Foreign name — 'c' alone is not a Swahili phoneme
3543
+ Victoria: "/vikt\u0254\u02C8\u027Eia/"
3544
+ };
3545
+
3546
+ // src/overrides/vi.ts
3547
+ var vi = {
3548
+ chinh: "/t\u0255\u026A\u014B\u02E7/",
3549
+ li\u00EAu: "/li\u0259w\u02E7/",
3550
+ ng\u00E1c: "/\u014Bak\u02E7\u02E5/",
3551
+ tru\u00E2n: "/\u0288w\u0259n\u02E7/"
3552
+ };
3553
+
3554
+ // src/foreign.ts
3555
+ var IPA_SLASH_RE = /^\/|\/$/g;
3556
+ var WHITESPACE_SPLIT_RE = /(\s+)/;
3557
+ var WHITESPACE_RE = /^\s+$/;
3558
+ var LEADING_NON_LETTER_RE = /^[^\p{L}\p{M}]/u;
3559
+ var TRAILING_NON_LETTER_RE = /[^\p{L}\p{M}]$/u;
3560
+ var CONTRACTION_SPLIT_RE = /(?<=['-])|(?=['-])/;
3561
+ var khmerSegmenter = typeof Intl !== "undefined" && typeof Intl.Segmenter === "function" ? new Intl.Segmenter("km", { granularity: "word" }) : void 0;
3562
+ function segmentKhmerText(text) {
3563
+ if (khmerSegmenter === void 0) {
3564
+ return text;
3565
+ }
3566
+ const normalized = text.replaceAll("\u200B", " ");
3567
+ const segments = [...khmerSegmenter.segment(normalized)];
3568
+ let result = "";
3569
+ for (let i = 0; i < segments.length; i++) {
3570
+ const seg = segments[i];
3571
+ result += seg.segment;
3572
+ const next = segments[i + 1];
3573
+ if (seg.isWordLike === true && next?.isWordLike === true) {
3574
+ result += " ";
3575
+ }
3576
+ }
3577
+ return result;
3578
+ }
3579
+ var LANGUAGES = [
3580
+ { code: "ar", label: "Arabic" },
3581
+ { code: "yue", label: "Cantonese" },
3582
+ { code: "nl", label: "Dutch" },
3583
+ { code: "eo", label: "Esperanto" },
3584
+ { code: "fi", label: "Finnish" },
3585
+ { code: "fr", label: "French" },
3586
+ { code: "de", label: "German" },
3587
+ { code: "is", label: "Icelandic" },
3588
+ { code: "ja", label: "Japanese" },
3589
+ { code: "km", label: "Khmer" },
3590
+ { code: "ko", label: "Korean" },
3591
+ { code: "ma", label: "Malay" },
3592
+ { code: "zh", label: "Mandarin" },
3593
+ { code: "nb", label: "Norwegian" },
3594
+ { code: "or", label: "Odia" },
3595
+ { code: "fa", label: "Persian" },
3596
+ { code: "pt", label: "Portuguese" },
3597
+ { code: "ro", label: "Romanian" },
3598
+ { code: "es", label: "Spanish" },
3599
+ { code: "sw", label: "Swahili" },
3600
+ { code: "sv", label: "Swedish" },
3601
+ { code: "vi", label: "Vietnamese" }
3602
+ ];
3603
+ var IPA_WORD_OVERRIDES = {
3604
+ ar,
3605
+ de,
3606
+ eo,
3607
+ es,
3608
+ fa,
3609
+ fi,
3610
+ fr,
3611
+ is,
3612
+ ja,
3613
+ km,
3614
+ ko,
3615
+ ma,
3616
+ nb,
3617
+ nl,
3618
+ or: or_,
3619
+ pt,
3620
+ ro,
3621
+ sv,
3622
+ sw,
3623
+ vi
3624
+ };
3625
+ function ipaToIngglish(ipa) {
3626
+ const clean = ipa.replaceAll(IPA_SLASH_RE, "").replaceAll(".", "");
3627
+ const arpabet = ipaToArpabet(clean);
3628
+ return arpabetToIngglish(arpabet);
3629
+ }
3630
+ function lookupIpa(dict, word) {
3631
+ const { entries, lang } = dict;
3632
+ const override = getIpaOverride(lang, word) ?? getIpaOverride(lang, word.toLowerCase());
3633
+ if (override) {
3634
+ return override;
3635
+ }
3636
+ const lower = word.toLowerCase();
3637
+ const title = lower.charAt(0).toUpperCase() + lower.slice(1);
3638
+ const stripped = stripDiacritics(lower);
3639
+ if (entries[word] ?? entries[lower] ?? entries[title] ?? entries[stripped]) {
3640
+ return entries[word] ?? entries[lower] ?? entries[title] ?? entries[stripped];
3641
+ }
3642
+ if (lower.includes("\xDF")) {
3643
+ const ssLower = lower.replaceAll("\xDF", "ss");
3644
+ const ssTitle = ssLower.charAt(0).toUpperCase() + ssLower.slice(1);
3645
+ return entries[ssLower] ?? entries[ssTitle];
3646
+ }
3647
+ if (word.includes("'")) {
3648
+ const curly = word.replaceAll("'", "\u2019");
3649
+ const curlyLower = curly.toLowerCase();
3650
+ const curlyResult = entries[curly] ?? entries[curlyLower];
3651
+ if (curlyResult) {
3652
+ return curlyResult;
3653
+ }
3654
+ }
3655
+ if (Object.hasOwn(LEMMATIZERS, lang)) {
3656
+ const lemmaResult = LEMMATIZERS[lang](entries, lower);
3657
+ if (lemmaResult) {
3658
+ return lemmaResult;
3659
+ }
3660
+ }
3661
+ if (lang === "km") {
3662
+ const compound = lookupKhmerCompound(entries, word);
3663
+ if (compound !== void 0) {
3664
+ return compound;
3665
+ }
3666
+ }
3667
+ if (Object.hasOwn(G2P_CONVERTERS, lang)) {
3668
+ return G2P_CONVERTERS[lang](lower);
3669
+ }
3670
+ return void 0;
3671
+ }
3672
+ var khmerMergedDict;
3673
+ var khmerDictKeys;
3674
+ function applyDefaultStress(arpabet) {
3675
+ const hasStress = arpabet.some((p) => isVowel2(p) && getStress2(p) !== null);
3676
+ if (hasStress) {
3677
+ return arpabet;
3678
+ }
3679
+ const result = [...arpabet];
3680
+ for (let i = result.length - 1; i >= 0; i--) {
3681
+ if (isVowel2(result[i])) {
3682
+ result[i] = result[i] + "1";
3683
+ break;
3684
+ }
3685
+ }
3686
+ return result;
3687
+ }
3688
+ function decomposeKhmer(dict, keys, remaining, acc) {
3689
+ if (remaining.length === 0) {
3690
+ return acc;
3691
+ }
3692
+ for (const key of keys) {
3693
+ const ipa = dict[key];
3694
+ if (remaining.startsWith(key) && ipa !== void 0) {
3695
+ const result = decomposeKhmer(dict, keys, remaining.slice(key.length), [...acc, ipa]);
3696
+ if (result !== null) {
3697
+ return result;
3698
+ }
3699
+ }
3700
+ }
3701
+ return null;
3702
+ }
3703
+ function getIpaOverride(lang, word) {
3704
+ return IPA_WORD_OVERRIDES[lang]?.[word];
3705
+ }
3706
+ function ipaToFormat(ipa, format, lang) {
3707
+ const clean = ipa.replaceAll(IPA_SLASH_RE, "").replaceAll(".", "");
3708
+ const overrides = lang ? IPA_LANGUAGE_OVERRIDES[lang] : void 0;
3709
+ const arpabet = applyDefaultStress(ipaToArpabet(clean, overrides));
3710
+ return arpabetToFormat(arpabet, format, { disableRColoring: true });
3711
+ }
3712
+ function lookupKhmerCompound(entries, word) {
3713
+ if (khmerMergedDict === void 0) {
3714
+ khmerMergedDict = { ...entries };
3715
+ const overrides = IPA_WORD_OVERRIDES.km;
3716
+ if (overrides) {
3717
+ for (const [k, v] of Object.entries(overrides)) {
3718
+ khmerMergedDict[k] = v;
3719
+ }
3720
+ }
3721
+ }
3722
+ khmerDictKeys ?? (khmerDictKeys = Object.keys(khmerMergedDict).toSorted((a, b) => b.length - a.length));
3723
+ const parts = decomposeKhmer(khmerMergedDict, khmerDictKeys, word, []);
3724
+ if (parts === null || parts.length < 2) {
3725
+ return void 0;
3726
+ }
3727
+ return parts.map((p) => p.replaceAll(IPA_SLASH_RE, "")).join(" ");
3728
+ }
3729
+ var NOT_FOUND_MARKER = "\uFFFD";
3730
+ var SENTENCE_END_RE = /[.!?。!?]$/;
3731
+ function translateForeign(text, dict, format = "ingglish") {
3732
+ const tokens = translateForeignWithMapping(text, dict, format);
3733
+ return tokens.map((t) => !t.matched && t.isWord ? NOT_FOUND_MARKER + t.original : t.translated).join("");
3734
+ }
3735
+ function translateForeignWithMapping(text, dict, format = "ingglish") {
3736
+ const { lang } = dict;
3737
+ let atSentenceStart = true;
3738
+ const processed = lang === "km" ? segmentKhmerText(text) : text;
3739
+ const tokens = [];
3740
+ for (const segment of normalizeApostrophes(processed).split(WHITESPACE_SPLIT_RE)) {
3741
+ if (WHITESPACE_RE.test(segment)) {
3742
+ tokens.push({ isWord: false, matched: true, original: segment, translated: segment });
3743
+ continue;
3744
+ }
3745
+ if (!segment) {
3746
+ continue;
3747
+ }
3748
+ const leading = [];
3749
+ const trailing = [];
3750
+ let core = segment;
3751
+ while (core.length > 0 && LEADING_NON_LETTER_RE.test(core)) {
3752
+ leading.push(core[0]);
3753
+ core = core.slice(1);
3754
+ }
3755
+ while (core.length > 0 && TRAILING_NON_LETTER_RE.test(core)) {
3756
+ trailing.unshift(core.at(-1));
3757
+ core = core.slice(0, -1);
3758
+ }
3759
+ if (!core) {
3760
+ tokens.push({ isWord: false, matched: true, original: segment, translated: segment });
3761
+ continue;
3762
+ }
3763
+ let casePattern = detectCasePattern(core);
3764
+ const preservesCase = getFormatPreservesCase(format);
3765
+ if (atSentenceStart && preservesCase && casePattern === "lower" && isCaselessWord(core)) {
3766
+ casePattern = "capitalized";
3767
+ }
3768
+ atSentenceStart = SENTENCE_END_RE.test(trailing.join(""));
3769
+ const leadStr = leading.join("");
3770
+ const trailStr = trailing.join("");
3771
+ const ipa = lookupIpa(dict, core);
3772
+ if (ipa) {
3773
+ const translated = ipaToFormat(ipa, format, lang);
3774
+ const cased = preservesCase ? applyCasePattern(translated, casePattern) : translated;
3775
+ tokens.push({
3776
+ isWord: true,
3777
+ matched: true,
3778
+ original: segment,
3779
+ translated: leadStr + cased + trailStr
3780
+ });
3781
+ continue;
3782
+ }
3783
+ const parts = core.split(CONTRACTION_SPLIT_RE);
3784
+ if (parts.length > 1) {
3785
+ const partIpas = parts.map((part, i) => {
3786
+ if (part === "'" || part === "-") {
3787
+ return;
3788
+ }
3789
+ let ipa2;
3790
+ if (parts[i + 1] === "'") {
3791
+ ipa2 = lookupIpa(dict, part + "'");
3792
+ }
3793
+ ipa2 ?? (ipa2 = lookupIpa(dict, part));
3794
+ return ipa2;
3795
+ });
3796
+ const allFound = parts.every(
3797
+ (part, i) => part === "'" || part === "-" || partIpas[i] !== void 0
3798
+ );
3799
+ if (allFound) {
3800
+ const groups = [[]];
3801
+ for (const [i, part_] of parts.entries()) {
3802
+ const part = part_;
3803
+ if (part === "'") {
3804
+ continue;
3805
+ }
3806
+ if (part === "-") {
3807
+ groups.push([]);
3808
+ continue;
3809
+ }
3810
+ const ipa2 = partIpas[i];
3811
+ groups.at(-1).push(ipa2.replaceAll(IPA_SLASH_RE, "").replaceAll(".", ""));
3812
+ }
3813
+ const translated2 = groups.map((ipas) => ipaToFormat(ipas.join(""), format, lang)).join("-");
3814
+ const cased = preservesCase ? applyCasePattern(translated2, casePattern) : translated2;
3815
+ tokens.push({
3816
+ isWord: true,
3817
+ matched: true,
3818
+ original: segment,
3819
+ translated: leadStr + cased + trailStr
3820
+ });
3821
+ continue;
3822
+ }
3823
+ let isFirstPart = true;
3824
+ const translated = parts.map((part, i) => {
3825
+ if (part === "'" || part === "-") {
3826
+ return part;
3827
+ }
3828
+ const partCase = isFirstPart ? casePattern : detectCasePattern(part);
3829
+ isFirstPart = false;
3830
+ const partIpa = partIpas[i];
3831
+ if (partIpa) {
3832
+ const partTranslated = ipaToFormat(partIpa, format, lang);
3833
+ return preservesCase ? applyCasePattern(partTranslated, partCase) : partTranslated;
3834
+ }
3835
+ return NOT_FOUND_MARKER + part;
3836
+ });
3837
+ if (translated.some(
3838
+ (t, i) => parts[i] !== "'" && parts[i] !== "-" && !t.startsWith(NOT_FOUND_MARKER)
3839
+ )) {
3840
+ const translatedText = translated.map((t) => t.replaceAll(NOT_FOUND_MARKER, "")).join("");
3841
+ tokens.push({
3842
+ isWord: true,
3843
+ matched: false,
3844
+ original: segment,
3845
+ translated: leadStr + translatedText + trailStr
3846
+ });
3847
+ continue;
3848
+ }
3849
+ }
3850
+ tokens.push({ isWord: true, matched: false, original: segment, translated: segment });
3851
+ }
3852
+ return tokens;
3853
+ }
3854
+ function isCaselessWord(word) {
3855
+ const ch = word[0];
3856
+ return ch !== void 0 && ch.toUpperCase() === ch.toLowerCase();
3857
+ }
3858
+
3859
+ // src/index.ts
3860
+ function registerIPA() {
3861
+ registerFormat("ipa", {
3862
+ forward: arpabetToIPARaw,
3863
+ isLatinScript: true,
3864
+ joinSeparator: " ",
3865
+ label: "IPA",
3866
+ preservesCase: false
3867
+ });
3868
+ }
3869
+ export {
3870
+ LANGUAGES,
3871
+ NOT_FOUND_MARKER,
3872
+ arpabetPhonemeToIPA,
3873
+ arpabetToIPARaw,
3874
+ ipaToArpabetClean,
3875
+ ipaToIngglish,
3876
+ lookupIpa,
3877
+ registerIPA,
3878
+ segmentKhmerText,
3879
+ translateForeign,
3880
+ translateForeignWithMapping
3881
+ };