@ingglish/ipa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3905 -0
- package/dist/index.d.cts +66 -0
- package/dist/index.d.ts +66 -0
- package/dist/index.js +3881 -0
- package/package.json +51 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,3905 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
LANGUAGES: () => LANGUAGES,
|
|
24
|
+
NOT_FOUND_MARKER: () => NOT_FOUND_MARKER,
|
|
25
|
+
arpabetPhonemeToIPA: () => arpabetPhonemeToIPA,
|
|
26
|
+
arpabetToIPARaw: () => arpabetToIPARaw,
|
|
27
|
+
ipaToArpabetClean: () => ipaToArpabetClean,
|
|
28
|
+
ipaToIngglish: () => ipaToIngglish,
|
|
29
|
+
lookupIpa: () => lookupIpa,
|
|
30
|
+
registerIPA: () => registerIPA,
|
|
31
|
+
segmentKhmerText: () => segmentKhmerText,
|
|
32
|
+
translateForeign: () => translateForeign,
|
|
33
|
+
translateForeignWithMapping: () => translateForeignWithMapping
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(index_exports);
|
|
36
|
+
var import_phonemes4 = require("@ingglish/phonemes");
|
|
37
|
+
|
|
38
|
+
// src/to-ipa.ts
|
|
39
|
+
var import_phonemes = require("@ingglish/phonemes");
|
|
40
|
+
|
|
41
|
+
// src/ipa-maps.ts
|
|
42
|
+
var IPA_VOWEL_MAP = {
|
|
43
|
+
// Monophthongs
|
|
44
|
+
AA: "\u0251",
|
|
45
|
+
// father, hot, bother
|
|
46
|
+
AE: "\xE6",
|
|
47
|
+
// cat, bat, had
|
|
48
|
+
AH: "\u028C",
|
|
49
|
+
// but, cup, son (stressed)
|
|
50
|
+
AO: "\u0254",
|
|
51
|
+
// thought, caught, law
|
|
52
|
+
// Diphthongs
|
|
53
|
+
AW: "a\u028A",
|
|
54
|
+
// cow, how, out
|
|
55
|
+
AY: "a\u026A",
|
|
56
|
+
// my, eye, time
|
|
57
|
+
EH: "\u025B",
|
|
58
|
+
// bed, red, said
|
|
59
|
+
ER: "\u025D",
|
|
60
|
+
// bird, her, nurse
|
|
61
|
+
EY: "e\u026A",
|
|
62
|
+
// say, day, make
|
|
63
|
+
IH: "\u026A",
|
|
64
|
+
// bit, sit, gym
|
|
65
|
+
IY: "i",
|
|
66
|
+
// bee, see, machine
|
|
67
|
+
OW: "o\u028A",
|
|
68
|
+
// go, show, coat
|
|
69
|
+
OY: "\u0254\u026A",
|
|
70
|
+
// boy, toy, coin
|
|
71
|
+
UH: "\u028A",
|
|
72
|
+
// book, put, could
|
|
73
|
+
UW: "u"
|
|
74
|
+
// too, blue, food
|
|
75
|
+
};
|
|
76
|
+
var IPA_CONSONANT_MAP = {
|
|
77
|
+
// Stops (plosives)
|
|
78
|
+
B: "b",
|
|
79
|
+
// Affricates
|
|
80
|
+
CH: "t\u0283",
|
|
81
|
+
// chat, batch
|
|
82
|
+
D: "d",
|
|
83
|
+
// Fricatives
|
|
84
|
+
DH: "\xF0",
|
|
85
|
+
// the, this (voiced dental)
|
|
86
|
+
F: "f",
|
|
87
|
+
G: "\u0261",
|
|
88
|
+
// Note: IPA uses ɡ (U+0261), not g
|
|
89
|
+
HH: "h",
|
|
90
|
+
JH: "d\u0292",
|
|
91
|
+
// just, edge
|
|
92
|
+
K: "k",
|
|
93
|
+
// Liquids
|
|
94
|
+
L: "l",
|
|
95
|
+
// Nasals
|
|
96
|
+
M: "m",
|
|
97
|
+
N: "n",
|
|
98
|
+
NG: "\u014B",
|
|
99
|
+
// sing, thing
|
|
100
|
+
P: "p",
|
|
101
|
+
R: "\u0279",
|
|
102
|
+
// alveolar approximant
|
|
103
|
+
S: "s",
|
|
104
|
+
SH: "\u0283",
|
|
105
|
+
// ship
|
|
106
|
+
T: "t",
|
|
107
|
+
TH: "\u03B8",
|
|
108
|
+
// think (voiceless dental)
|
|
109
|
+
V: "v",
|
|
110
|
+
// Glides (semivowels)
|
|
111
|
+
W: "w",
|
|
112
|
+
Y: "j",
|
|
113
|
+
Z: "z",
|
|
114
|
+
ZH: "\u0292"
|
|
115
|
+
// measure, beige
|
|
116
|
+
};
|
|
117
|
+
var ARPABET_TO_IPA_MAP = {
|
|
118
|
+
...IPA_VOWEL_MAP,
|
|
119
|
+
...IPA_CONSONANT_MAP
|
|
120
|
+
};
|
|
121
|
+
var IPA_VARIANT_MAP = {
|
|
122
|
+
a: "AE",
|
|
123
|
+
// plain /a/ — maps to "a" (cat) for recognizable foreign word output
|
|
124
|
+
e: "EH",
|
|
125
|
+
// plain /e/ — mid front vowel, like "bed"
|
|
126
|
+
\u0259: "AH0",
|
|
127
|
+
// schwa (unstressed) — forward map uses ʌ→AH for the stressed variant
|
|
128
|
+
\u025A: "ER",
|
|
129
|
+
// r-colored schwa variant — forward map uses ɝ→ER
|
|
130
|
+
g: "G",
|
|
131
|
+
// ASCII g — forward map uses ɡ (U+0261)
|
|
132
|
+
\u026B: "L",
|
|
133
|
+
// dark l
|
|
134
|
+
o: "OW",
|
|
135
|
+
// some IPA uses plain o for goat vowel
|
|
136
|
+
r: "R"
|
|
137
|
+
// common variant — forward map uses ɹ (alveolar approximant)
|
|
138
|
+
// Note: IPA /y/ is the close front rounded vowel (French "tu", German "über").
|
|
139
|
+
// It's handled in IPA_APPROXIMATION_MAP as y→UW. The consonant /j/ (palatal
|
|
140
|
+
// approximant) is already mapped via the forward map reversal (j→Y).
|
|
141
|
+
};
|
|
142
|
+
var IPA_APPROXIMATION_MAP = {
|
|
143
|
+
// --- Open vowels ---
|
|
144
|
+
\u00E4: "AA",
|
|
145
|
+
// /ä/ open central — IPA diacritic variant ≈ "father"
|
|
146
|
+
// --- Open vowel diphthongs (Finnish, etc.) ---
|
|
147
|
+
// English uses /aɪ/ and /aʊ/ (with plain 'a'), but Finnish/other languages
|
|
148
|
+
// use /ɑi/ and /ɑu/ (with open back 'ɑ'). Treat as diphthongs, not two vowels.
|
|
149
|
+
\u00E6i: "AY",
|
|
150
|
+
// /æi/ — Finnish "päivä" ≈ "my" diphthong (uses near-open front æ)
|
|
151
|
+
\u0250: "AH",
|
|
152
|
+
// /ɐ/ near-open central — Portuguese unstressed "a" ≈ "but"
|
|
153
|
+
\u0251i: "AY",
|
|
154
|
+
// /ɑi/ — Finnish "taivas" ≈ "my" diphthong
|
|
155
|
+
\u0251u: "AW",
|
|
156
|
+
// /ɑu/ — Finnish "sauna" ≈ "cow" diphthong
|
|
157
|
+
\u0251\u028A: "AW",
|
|
158
|
+
// /ɑʊ/ — Chinese 好 /xɑʊ/ ≈ "cow" diphthong (uses IPA ʊ not plain u)
|
|
159
|
+
// Nasal vowels (ɑ̃, ɛ̃, ɔ̃, etc.) are handled in from-ipa.ts by
|
|
160
|
+
// converting vowel+combining-tilde to vowel+"n" before map lookup.
|
|
161
|
+
\u0252: "AO",
|
|
162
|
+
// /ɒ/ open back rounded — British "lot" ≈ "thought"
|
|
163
|
+
// --- Implosives and other stops ---
|
|
164
|
+
\u0253: "B",
|
|
165
|
+
// /ɓ/ voiced bilabial implosive ≈ B
|
|
166
|
+
\u00E7: "SH",
|
|
167
|
+
// /ç/ voiceless palatal fricative — "ich" (German) ≈ "sh"
|
|
168
|
+
// --- Alveolo-palatal (Mandarin, Japanese, Polish) ---
|
|
169
|
+
\u0255: "SH",
|
|
170
|
+
// /ɕ/ voiceless alveolo-palatal fricative — Mandarin "xi" ≈ "sh"
|
|
171
|
+
d\u0291: "JH",
|
|
172
|
+
// /dʑ/ voiced alveolo-palatal affricate — Japanese "ji" ≈ "j"
|
|
173
|
+
// --- Retroflex (Hindi, Mandarin) ---
|
|
174
|
+
\u0256: "D",
|
|
175
|
+
// /ɖ/ voiced retroflex stop — Hindi ≈ D
|
|
176
|
+
\u0257: "D",
|
|
177
|
+
// /ɗ/ voiced alveolar implosive ≈ D
|
|
178
|
+
// --- Diphthong sequences (non-English vowel pairs) ---
|
|
179
|
+
// These use standard IPA vowels that don't appear in English diphthongs
|
|
180
|
+
// (English uses ɪ/ʊ as second element, not i/u/o).
|
|
181
|
+
ei: "EY",
|
|
182
|
+
// /ei/ — Finnish "ei", various ≈ "say" diphthong
|
|
183
|
+
\u0258: "AH0",
|
|
184
|
+
// /ɘ/ close-mid central ≈ schwa
|
|
185
|
+
\u025C: "ER",
|
|
186
|
+
// /ɜ/ open-mid central — non-rhotic "bird" ≈ "er"
|
|
187
|
+
\u025E: "ER",
|
|
188
|
+
// /ɞ/ open-mid central rounded ≈ "er"
|
|
189
|
+
\u0264: "AH",
|
|
190
|
+
// /ɤ/ close-mid back unrounded — Korean "ㅓ" ≈ "but"
|
|
191
|
+
\u0262: "G",
|
|
192
|
+
// /ɢ/ voiced uvular stop ≈ G
|
|
193
|
+
\u0263: "G",
|
|
194
|
+
// /ɣ/ voiced velar fricative — Spanish "lago" ≈ G
|
|
195
|
+
// --- Pharyngeal (Arabic) ---
|
|
196
|
+
\u0127: "HH",
|
|
197
|
+
// /ħ/ voiceless pharyngeal fricative — Arabic "ha" ≈ H
|
|
198
|
+
\u0266: "HH",
|
|
199
|
+
// /ɦ/ voiced glottal fricative — Korean 합 ≈ H
|
|
200
|
+
// --- Central/back vowels not in English ---
|
|
201
|
+
\u0268: "IH",
|
|
202
|
+
// /ɨ/ close central — Russian "ы" ≈ "bit"
|
|
203
|
+
\u026C: "L",
|
|
204
|
+
// /ɬ/ voiceless lateral fricative — Welsh "ll" ≈ L
|
|
205
|
+
\u026D: "L",
|
|
206
|
+
// /ɭ/ retroflex lateral ≈ L
|
|
207
|
+
// --- Laterals ---
|
|
208
|
+
\u028E: "L Y",
|
|
209
|
+
// /ʎ/ palatal lateral — Italian "figlio", Spanish "ll" ≈ LY
|
|
210
|
+
\u0271: "M",
|
|
211
|
+
// /ɱ/ labiodental nasal ≈ M
|
|
212
|
+
\u0274: "N",
|
|
213
|
+
// /ɴ/ uvular nasal — Japanese moraic ん ≈ "n" (not "ng")
|
|
214
|
+
// --- Nasals ---
|
|
215
|
+
\u0272: "N Y",
|
|
216
|
+
// /ɲ/ palatal nasal — Spanish "ñ", Italian "gn" ≈ NY
|
|
217
|
+
// tɕ and dʑ are handled as two-char sequences below
|
|
218
|
+
\u0273: "N",
|
|
219
|
+
// /ɳ/ retroflex nasal ≈ N
|
|
220
|
+
\u00F8: "UH",
|
|
221
|
+
// /ø/ close-mid front rounded — "peu" (French), "schön" (German) ≈ "u"
|
|
222
|
+
\u0153: "AH1",
|
|
223
|
+
// /œ/ open-mid front rounded — "peur" (French) ≈ "uh"
|
|
224
|
+
\u0153y: "OY",
|
|
225
|
+
// /œy/ — Dutch "huis" diphthong ≈ "boy"
|
|
226
|
+
oi: "OY",
|
|
227
|
+
// /oi/ — Finnish "koira", Portuguese "coisa" ≈ "boy" diphthong
|
|
228
|
+
ou: "OW",
|
|
229
|
+
// /ou/ — Finnish "koulu" ≈ "go" diphthong
|
|
230
|
+
o\u026F: "OW",
|
|
231
|
+
// /oɯ/ — Japanese long /oː/ (dict convention: 東京 /toɯkjoɯ/) ≈ "oh"
|
|
232
|
+
\u0278: "F",
|
|
233
|
+
// /ɸ/ voiceless bilabial fricative — Japanese "fu" ≈ F
|
|
234
|
+
q: "K",
|
|
235
|
+
// /q/ voiceless uvular stop — Arabic "Quran" ≈ K
|
|
236
|
+
\u0280: "R",
|
|
237
|
+
// /ʀ/ uvular trill — some German dialects ≈ English R
|
|
238
|
+
// --- Taps, trills, and approximants (Spanish, Italian, Mandarin, etc.) ---
|
|
239
|
+
\u027B: "R",
|
|
240
|
+
// /ɻ/ voiced retroflex approximant — Mandarin "er" (二) ≈ R
|
|
241
|
+
\u027D: "D",
|
|
242
|
+
// /ɽ/ retroflex flap — Hindi ≈ D
|
|
243
|
+
\u027E: "R",
|
|
244
|
+
// /ɾ/ alveolar tap — Spanish "pero" ≈ R (also like "butter" flap)
|
|
245
|
+
// --- Uvular consonants (French, German, Arabic) ---
|
|
246
|
+
\u0281: "R",
|
|
247
|
+
// /ʁ/ voiced uvular fricative — French/German R ≈ English R
|
|
248
|
+
\u0282: "SH",
|
|
249
|
+
// /ʂ/ voiceless retroflex fricative — Mandarin "shi" ≈ "sh"
|
|
250
|
+
// --- Affricates (two-char sequences matched by the converter) ---
|
|
251
|
+
t\u0255: "CH",
|
|
252
|
+
// /tɕ/ voiceless alveolo-palatal affricate — Mandarin "ji", Korean "ㅈ" ≈ "ch"
|
|
253
|
+
\u0288: "T",
|
|
254
|
+
// /ʈ/ voiceless retroflex stop — Hindi ≈ T
|
|
255
|
+
\u0288\u0282: "CH",
|
|
256
|
+
// /ʈʂ/ voiceless retroflex affricate — Mandarin "zhi" ≈ "ch"
|
|
257
|
+
// --- Labial-velar ---
|
|
258
|
+
\u0265: "W",
|
|
259
|
+
// /ɥ/ labial-palatal approximant — French "lui" ≈ W
|
|
260
|
+
\u026F: "UH",
|
|
261
|
+
// /ɯ/ close back unrounded — Japanese "u", Turkish "ı" ≈ "book" (shorter than "oo")
|
|
262
|
+
\u0270: "W",
|
|
263
|
+
// /ɰ/ voiced velar approximant — Japanese 川 /kaɰᵝa/ ≈ W
|
|
264
|
+
\u028B: "V",
|
|
265
|
+
// /ʋ/ labiodental approximant — Hindi, Dutch ≈ V
|
|
266
|
+
// --- Velar/palatal fricatives (German, Mandarin, etc.) ---
|
|
267
|
+
x: "HH",
|
|
268
|
+
// /x/ voiceless velar fricative — default H (override to K for German)
|
|
269
|
+
// --- Front rounded vowels (French, German, Turkish, etc.) ---
|
|
270
|
+
y: "UW",
|
|
271
|
+
// /y/ close front rounded — "tu" (French) ≈ "too"
|
|
272
|
+
\u028F: "UH",
|
|
273
|
+
// /ʏ/ near-close front rounded — "Glück" (German) ≈ "book"
|
|
274
|
+
\u0290: "ZH",
|
|
275
|
+
// /ʐ/ voiced retroflex fricative — Mandarin "ri" ≈ "zh"
|
|
276
|
+
\u0291: "ZH",
|
|
277
|
+
// /ʑ/ voiced alveolo-palatal fricative ≈ "zh"
|
|
278
|
+
// --- Glottal ---
|
|
279
|
+
\u0294: "",
|
|
280
|
+
// /ʔ/ glottal stop — often silent in approximation
|
|
281
|
+
// Note: length mark ː and combining diacritics are stripped in from-ipa.ts
|
|
282
|
+
// before lookup. Nasal vowels (◌̃) are converted to vowel+n there.
|
|
283
|
+
// --- Dental fricatives already in English ---
|
|
284
|
+
// θ and ð are in the main map
|
|
285
|
+
\u0295: "AH",
|
|
286
|
+
// /ʕ/ voiced pharyngeal fricative — Arabic "ain" ≈ "uh" (rough)
|
|
287
|
+
// --- Bilabial fricatives (Spanish) ---
|
|
288
|
+
\u03B2: "V",
|
|
289
|
+
// /β/ voiced bilabial fricative — Spanish "b" between vowels ≈ V
|
|
290
|
+
\u03C7: "HH"
|
|
291
|
+
// /χ/ voiceless uvular fricative — default H (override to K for German)
|
|
292
|
+
};
|
|
293
|
+
var IPA_TO_ARPABET_MAP = {
|
|
294
|
+
...IPA_APPROXIMATION_MAP,
|
|
295
|
+
...Object.fromEntries(Object.entries(ARPABET_TO_IPA_MAP).map(([arpabet, ipa]) => [ipa, arpabet])),
|
|
296
|
+
...IPA_VARIANT_MAP
|
|
297
|
+
};
|
|
298
|
+
var IPA_LANGUAGE_OVERRIDES = {
|
|
299
|
+
// German: /x/ and /χ/ are the "ach-Laut" — English speakers say "bahk" for Bach
|
|
300
|
+
de: { x: "K", \u03C7: "K" },
|
|
301
|
+
// Dutch: /ɣ/ is a breathy G (closer to H than hard G) — "goed" ≈ "hood" not "good"
|
|
302
|
+
nl: { \u0263: "HH" }
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
// src/to-ipa.ts
|
|
306
|
+
var WORD_JOINER = "\u2060";
|
|
307
|
+
var STRESS_MARKERS = {
|
|
308
|
+
0: "",
|
|
309
|
+
1: WORD_JOINER + "\u02C8" + WORD_JOINER,
|
|
310
|
+
2: WORD_JOINER + "\u02CC" + WORD_JOINER
|
|
311
|
+
};
|
|
312
|
+
function arpabetPhonemeToIPA(phoneme) {
|
|
313
|
+
const base = (0, import_phonemes.stripStress)(phoneme);
|
|
314
|
+
const stress = (0, import_phonemes.getStress)(phoneme);
|
|
315
|
+
const ipa = ARPABET_TO_IPA_MAP[base];
|
|
316
|
+
if (ipa === void 0) {
|
|
317
|
+
return phoneme.toLowerCase();
|
|
318
|
+
}
|
|
319
|
+
if (base === "AH" && stress === 0) {
|
|
320
|
+
return "\u0259";
|
|
321
|
+
}
|
|
322
|
+
if (stress === 1 || stress === 2) {
|
|
323
|
+
return STRESS_MARKERS[stress] + ipa;
|
|
324
|
+
}
|
|
325
|
+
return ipa;
|
|
326
|
+
}
|
|
327
|
+
function arpabetToIPA(arpabet) {
|
|
328
|
+
const ipaSegments = [];
|
|
329
|
+
const stressPositions = [];
|
|
330
|
+
for (let i = 0; i < arpabet.length; i++) {
|
|
331
|
+
const symbol = arpabet[i];
|
|
332
|
+
const base = (0, import_phonemes.stripStress)(symbol);
|
|
333
|
+
const stress = (0, import_phonemes.getStress)(symbol);
|
|
334
|
+
const ipa = ARPABET_TO_IPA_MAP[base];
|
|
335
|
+
if (ipa === void 0) {
|
|
336
|
+
ipaSegments.push(symbol.toLowerCase());
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
if (base === "AH" && stress === 0) {
|
|
340
|
+
ipaSegments.push("\u0259");
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
if (stress === 1 || stress === 2) {
|
|
344
|
+
const marker = stress === 1 ? WORD_JOINER + "\u02C8" + WORD_JOINER : WORD_JOINER + "\u02CC" + WORD_JOINER;
|
|
345
|
+
let onsetIndex = ipaSegments.length;
|
|
346
|
+
if (i > 0) {
|
|
347
|
+
let j = i - 1;
|
|
348
|
+
const consonants = [];
|
|
349
|
+
while (j >= 0 && !(0, import_phonemes.isVowel)(arpabet[j])) {
|
|
350
|
+
consonants.push((0, import_phonemes.stripStress)(arpabet[j]));
|
|
351
|
+
j--;
|
|
352
|
+
}
|
|
353
|
+
consonants.reverse();
|
|
354
|
+
if (consonants.length > 0) {
|
|
355
|
+
const onsetStartInCluster = (0, import_phonemes.findOnsetStart)(consonants);
|
|
356
|
+
onsetIndex = j + 1 + onsetStartInCluster;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
stressPositions.push({ index: onsetIndex, marker });
|
|
360
|
+
}
|
|
361
|
+
ipaSegments.push(ipa);
|
|
362
|
+
}
|
|
363
|
+
const sortedStress = stressPositions.toSorted((a, b) => a.index - b.index);
|
|
364
|
+
const result = [];
|
|
365
|
+
let stressIdx = 0;
|
|
366
|
+
for (let i = 0; i <= ipaSegments.length; i++) {
|
|
367
|
+
while (stressIdx < sortedStress.length && sortedStress[stressIdx].index === i) {
|
|
368
|
+
result.push(sortedStress[stressIdx].marker);
|
|
369
|
+
stressIdx++;
|
|
370
|
+
}
|
|
371
|
+
if (i < ipaSegments.length) {
|
|
372
|
+
result.push(ipaSegments[i]);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return `/${result.join("")}/`;
|
|
376
|
+
}
|
|
377
|
+
function arpabetToIPARaw(arpabet) {
|
|
378
|
+
const full = arpabetToIPA(arpabet);
|
|
379
|
+
return full.slice(1, -1);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// src/foreign.ts
|
|
383
|
+
var import_normalize = require("@ingglish/normalize");
|
|
384
|
+
var import_phonemes3 = require("@ingglish/phonemes");
|
|
385
|
+
|
|
386
|
+
// src/from-ipa.ts
|
|
387
|
+
var import_phonemes2 = require("@ingglish/phonemes");
|
|
388
|
+
var ARPABET_VOWELS = new Set(Object.keys(IPA_VOWEL_MAP));
|
|
389
|
+
var NASAL_VOWEL_RE = /([aeiouɑɛɔəɐɒæøœʌɝɚɘɜɞɤʏʊɪɨɯy])\u0303/g;
|
|
390
|
+
var MODIFIER_RE = /[\u02B0\u02D0\u02D1\u02E5-\u02E9\u0303\u1D5D]/g;
|
|
391
|
+
var COMBINING_NON_CEDILLA_RE = /(?!\u0327)\p{Mn}/gu;
|
|
392
|
+
var mergedMapCache = /* @__PURE__ */ new Map();
|
|
393
|
+
function ipaToArpabet(ipa, overrides) {
|
|
394
|
+
const normalized = ipa.normalize("NFD");
|
|
395
|
+
const denasalized = normalized.replaceAll(NASAL_VOWEL_RE, "$1n");
|
|
396
|
+
const stripped = denasalized.replaceAll(MODIFIER_RE, "");
|
|
397
|
+
const stripped2 = stripped.replaceAll(COMBINING_NON_CEDILLA_RE, "");
|
|
398
|
+
const clean = stripped2.normalize("NFC");
|
|
399
|
+
let map = IPA_TO_ARPABET_MAP;
|
|
400
|
+
if (overrides) {
|
|
401
|
+
let cached = mergedMapCache.get(overrides);
|
|
402
|
+
if (!cached) {
|
|
403
|
+
cached = { ...IPA_TO_ARPABET_MAP, ...overrides };
|
|
404
|
+
mergedMapCache.set(overrides, cached);
|
|
405
|
+
}
|
|
406
|
+
map = cached;
|
|
407
|
+
}
|
|
408
|
+
const result = [];
|
|
409
|
+
let pendingStress = null;
|
|
410
|
+
let i = 0;
|
|
411
|
+
const push = (phoneme) => {
|
|
412
|
+
const base = (0, import_phonemes2.stripStress)(phoneme);
|
|
413
|
+
if (ARPABET_VOWELS.has(base) && pendingStress !== null) {
|
|
414
|
+
result.push(base + String(pendingStress));
|
|
415
|
+
pendingStress = null;
|
|
416
|
+
} else {
|
|
417
|
+
result.push(phoneme);
|
|
418
|
+
}
|
|
419
|
+
};
|
|
420
|
+
while (i < clean.length) {
|
|
421
|
+
const ch = clean[i];
|
|
422
|
+
if (ch === "\u02C8") {
|
|
423
|
+
pendingStress = 1;
|
|
424
|
+
i++;
|
|
425
|
+
continue;
|
|
426
|
+
}
|
|
427
|
+
if (ch === "\u02CC") {
|
|
428
|
+
pendingStress = 2;
|
|
429
|
+
i++;
|
|
430
|
+
continue;
|
|
431
|
+
}
|
|
432
|
+
if (i + 1 < clean.length) {
|
|
433
|
+
const twoChar = clean.slice(i, i + 2);
|
|
434
|
+
const twoCharArpabet = map[twoChar];
|
|
435
|
+
if (twoCharArpabet !== void 0) {
|
|
436
|
+
if (twoCharArpabet.includes(" ")) {
|
|
437
|
+
for (const p of twoCharArpabet.split(" ")) {
|
|
438
|
+
push(p);
|
|
439
|
+
}
|
|
440
|
+
} else {
|
|
441
|
+
push(twoCharArpabet);
|
|
442
|
+
}
|
|
443
|
+
i += 2;
|
|
444
|
+
continue;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
const oneCharArpabet = map[ch];
|
|
448
|
+
if (oneCharArpabet !== void 0) {
|
|
449
|
+
if (oneCharArpabet.includes(" ")) {
|
|
450
|
+
for (const p of oneCharArpabet.split(" ")) {
|
|
451
|
+
push(p);
|
|
452
|
+
}
|
|
453
|
+
} else {
|
|
454
|
+
push(oneCharArpabet);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
i++;
|
|
458
|
+
}
|
|
459
|
+
const deduped = [];
|
|
460
|
+
for (const phoneme of result) {
|
|
461
|
+
if (phoneme !== deduped.at(-1)) {
|
|
462
|
+
deduped.push(phoneme);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
return deduped;
|
|
466
|
+
}
|
|
467
|
+
function ipaToArpabetClean(ipa) {
|
|
468
|
+
const arpabet = ipaToArpabet(ipa).map((p) => (0, import_phonemes2.stripStress)(p));
|
|
469
|
+
return arpabet.length > 0 ? arpabet : null;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// src/g2p.ts
|
|
473
|
+
var G2P_CONVERTERS = {
|
|
474
|
+
eo: esperantoG2P,
|
|
475
|
+
fi: finnishG2P,
|
|
476
|
+
ma: malayG2P,
|
|
477
|
+
sw: swahiliG2P
|
|
478
|
+
};
|
|
479
|
+
var IPA_VOWELS = new Set("aeiou\u0251\xE6\xF8y\u025B\u0254\u0259");
|
|
480
|
+
function addFirstSyllableStress(ipa) {
|
|
481
|
+
return ipa ? "\u02C8" + ipa : ipa;
|
|
482
|
+
}
|
|
483
|
+
function addPenultimateStress(ipa) {
|
|
484
|
+
const vowelPositions = [];
|
|
485
|
+
for (const [i, ch] of Array.from(ipa).entries()) {
|
|
486
|
+
if (IPA_VOWELS.has(ch)) {
|
|
487
|
+
vowelPositions.push(i);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
if (vowelPositions.length <= 1) {
|
|
491
|
+
return ipa ? "\u02C8" + ipa : ipa;
|
|
492
|
+
}
|
|
493
|
+
const stressPos = vowelPositions.at(-2);
|
|
494
|
+
let onset = stressPos;
|
|
495
|
+
while (onset > 0 && !IPA_VOWELS.has(ipa[onset - 1]) && ipa[onset - 1] !== "\u02D0") {
|
|
496
|
+
onset--;
|
|
497
|
+
}
|
|
498
|
+
return ipa.slice(0, onset) + "\u02C8" + ipa.slice(onset);
|
|
499
|
+
}
|
|
500
|
+
function applyRules(word, rules) {
|
|
501
|
+
const normalized = word.normalize("NFC");
|
|
502
|
+
let result = "";
|
|
503
|
+
let i = 0;
|
|
504
|
+
while (i < normalized.length) {
|
|
505
|
+
let matched = false;
|
|
506
|
+
for (const [grapheme, phoneme] of rules) {
|
|
507
|
+
if (normalized.startsWith(grapheme, i)) {
|
|
508
|
+
result += phoneme;
|
|
509
|
+
i += grapheme.length;
|
|
510
|
+
matched = true;
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
if (!matched) {
|
|
515
|
+
i++;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
return result;
|
|
519
|
+
}
|
|
520
|
+
var FINNISH_RULES = [
|
|
521
|
+
// Digraphs (must precede single-char rules)
|
|
522
|
+
["nk", "\u014Bk"],
|
|
523
|
+
["ng", "\u014B\u02D0"],
|
|
524
|
+
// Long vowels (double letters → vowel + length mark)
|
|
525
|
+
["aa", "\u0251\u02D0"],
|
|
526
|
+
["ee", "e\u02D0"],
|
|
527
|
+
["ii", "i\u02D0"],
|
|
528
|
+
["oo", "o\u02D0"],
|
|
529
|
+
["uu", "u\u02D0"],
|
|
530
|
+
["yy", "y\u02D0"],
|
|
531
|
+
["\xE4\xE4", "\xE6\u02D0"],
|
|
532
|
+
["\xF6\xF6", "\xF8\u02D0"],
|
|
533
|
+
// Geminate consonants
|
|
534
|
+
["pp", "p\u02D0"],
|
|
535
|
+
["tt", "t\u02D0"],
|
|
536
|
+
["kk", "k\u02D0"],
|
|
537
|
+
["mm", "m\u02D0"],
|
|
538
|
+
["nn", "n\u02D0"],
|
|
539
|
+
["ll", "l\u02D0"],
|
|
540
|
+
["rr", "r\u02D0"],
|
|
541
|
+
["ss", "s\u02D0"],
|
|
542
|
+
// Single vowels
|
|
543
|
+
["a", "\u0251"],
|
|
544
|
+
["e", "e"],
|
|
545
|
+
["i", "i"],
|
|
546
|
+
["o", "o"],
|
|
547
|
+
["u", "u"],
|
|
548
|
+
["y", "y"],
|
|
549
|
+
["\xE4", "\xE6"],
|
|
550
|
+
["\xF6", "\xF8"],
|
|
551
|
+
// Single consonants
|
|
552
|
+
["b", "b"],
|
|
553
|
+
["d", "d"],
|
|
554
|
+
["f", "f"],
|
|
555
|
+
["g", "\u0261"],
|
|
556
|
+
["h", "h"],
|
|
557
|
+
["j", "j"],
|
|
558
|
+
["k", "k"],
|
|
559
|
+
["l", "l"],
|
|
560
|
+
["m", "m"],
|
|
561
|
+
["n", "n"],
|
|
562
|
+
["p", "p"],
|
|
563
|
+
["r", "r"],
|
|
564
|
+
["s", "s"],
|
|
565
|
+
["t", "t"],
|
|
566
|
+
["v", "\u028B"],
|
|
567
|
+
["w", "\u028B"],
|
|
568
|
+
["z", "ts"]
|
|
569
|
+
];
|
|
570
|
+
function finnishG2P(word) {
|
|
571
|
+
return addFirstSyllableStress(applyRules(word, FINNISH_RULES));
|
|
572
|
+
}
|
|
573
|
+
var ESPERANTO_RULES = [
|
|
574
|
+
// Special Esperanto characters (must precede base letters)
|
|
575
|
+
["\u0109", "t\u0283"],
|
|
576
|
+
["\u011D", "d\u0292"],
|
|
577
|
+
["\u0125", "x"],
|
|
578
|
+
["\u0135", "\u0292"],
|
|
579
|
+
["\u015D", "\u0283"],
|
|
580
|
+
["\u016D", "w"],
|
|
581
|
+
["c", "ts"],
|
|
582
|
+
// Vowels
|
|
583
|
+
["a", "a"],
|
|
584
|
+
["e", "e"],
|
|
585
|
+
["i", "i"],
|
|
586
|
+
["o", "o"],
|
|
587
|
+
["u", "u"],
|
|
588
|
+
// Consonants
|
|
589
|
+
["b", "b"],
|
|
590
|
+
["d", "d"],
|
|
591
|
+
["f", "f"],
|
|
592
|
+
["g", "\u0261"],
|
|
593
|
+
["h", "h"],
|
|
594
|
+
["j", "j"],
|
|
595
|
+
["k", "k"],
|
|
596
|
+
["l", "l"],
|
|
597
|
+
["m", "m"],
|
|
598
|
+
["n", "n"],
|
|
599
|
+
["p", "p"],
|
|
600
|
+
["r", "r"],
|
|
601
|
+
["s", "s"],
|
|
602
|
+
["t", "t"],
|
|
603
|
+
["v", "v"],
|
|
604
|
+
["z", "z"]
|
|
605
|
+
];
|
|
606
|
+
function esperantoG2P(word) {
|
|
607
|
+
return addPenultimateStress(applyRules(word, ESPERANTO_RULES));
|
|
608
|
+
}
|
|
609
|
+
var SWAHILI_RULES = [
|
|
610
|
+
// Trigraph (must precede digraph 'ng')
|
|
611
|
+
["ng'", "\u014B"],
|
|
612
|
+
// Digraphs
|
|
613
|
+
["ch", "t\u0283"],
|
|
614
|
+
["dh", "\xF0"],
|
|
615
|
+
["gh", "\u0263"],
|
|
616
|
+
["ng", "\u014B\u0261"],
|
|
617
|
+
["nj", "nd\u0292"],
|
|
618
|
+
["ny", "\u0272"],
|
|
619
|
+
["sh", "\u0283"],
|
|
620
|
+
["th", "\u03B8"],
|
|
621
|
+
// Vowels
|
|
622
|
+
["a", "a"],
|
|
623
|
+
["e", "\u025B"],
|
|
624
|
+
["i", "i"],
|
|
625
|
+
["o", "\u0254"],
|
|
626
|
+
["u", "u"],
|
|
627
|
+
// Consonants
|
|
628
|
+
["b", "b"],
|
|
629
|
+
["d", "d"],
|
|
630
|
+
["f", "f"],
|
|
631
|
+
["g", "\u0261"],
|
|
632
|
+
["h", "h"],
|
|
633
|
+
["j", "d\u0292"],
|
|
634
|
+
["k", "k"],
|
|
635
|
+
["l", "l"],
|
|
636
|
+
["m", "m"],
|
|
637
|
+
["n", "n"],
|
|
638
|
+
["p", "p"],
|
|
639
|
+
["r", "\u027E"],
|
|
640
|
+
["s", "s"],
|
|
641
|
+
["t", "t"],
|
|
642
|
+
["v", "v"],
|
|
643
|
+
["w", "w"],
|
|
644
|
+
["y", "j"],
|
|
645
|
+
["z", "z"]
|
|
646
|
+
];
|
|
647
|
+
function swahiliG2P(word) {
|
|
648
|
+
return addPenultimateStress(applyRules(word, SWAHILI_RULES));
|
|
649
|
+
}
|
|
650
|
+
var MALAY_RULES = [
|
|
651
|
+
// Digraphs
|
|
652
|
+
["gh", "\u0263"],
|
|
653
|
+
["kh", "x"],
|
|
654
|
+
["ng", "\u014B"],
|
|
655
|
+
["ny", "\u0272"],
|
|
656
|
+
["sy", "\u0283"],
|
|
657
|
+
// Vowels
|
|
658
|
+
["a", "a"],
|
|
659
|
+
["e", "\u0259"],
|
|
660
|
+
["i", "i"],
|
|
661
|
+
["o", "o"],
|
|
662
|
+
["u", "u"],
|
|
663
|
+
// Consonants
|
|
664
|
+
["b", "b"],
|
|
665
|
+
["c", "t\u0283"],
|
|
666
|
+
["d", "d"],
|
|
667
|
+
["f", "f"],
|
|
668
|
+
["g", "\u0261"],
|
|
669
|
+
["h", "h"],
|
|
670
|
+
["j", "d\u0292"],
|
|
671
|
+
["k", "k"],
|
|
672
|
+
["l", "l"],
|
|
673
|
+
["m", "m"],
|
|
674
|
+
["n", "n"],
|
|
675
|
+
["p", "p"],
|
|
676
|
+
["r", "\u027E"],
|
|
677
|
+
["s", "s"],
|
|
678
|
+
["t", "t"],
|
|
679
|
+
["v", "v"],
|
|
680
|
+
["w", "w"],
|
|
681
|
+
["y", "j"],
|
|
682
|
+
["z", "z"]
|
|
683
|
+
];
|
|
684
|
+
function addMalayStress(ipa) {
|
|
685
|
+
const vowelPositions = [];
|
|
686
|
+
for (const [i, ch] of Array.from(ipa).entries()) {
|
|
687
|
+
if (IPA_VOWELS.has(ch)) {
|
|
688
|
+
vowelPositions.push(i);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
if (vowelPositions.length <= 1) {
|
|
692
|
+
return ipa ? "\u02C8" + ipa : ipa;
|
|
693
|
+
}
|
|
694
|
+
const penultPos = vowelPositions.at(-2);
|
|
695
|
+
const stressPos = ipa[penultPos] === "\u0259" ? vowelPositions.at(-1) : penultPos;
|
|
696
|
+
let onset = stressPos;
|
|
697
|
+
while (onset > 0 && !IPA_VOWELS.has(ipa[onset - 1]) && ipa[onset - 1] !== "\u02D0") {
|
|
698
|
+
onset--;
|
|
699
|
+
}
|
|
700
|
+
return ipa.slice(0, onset) + "\u02C8" + ipa.slice(onset);
|
|
701
|
+
}
|
|
702
|
+
function malayG2P(word) {
|
|
703
|
+
return addMalayStress(applyRules(word, MALAY_RULES));
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// src/lemmatizers.ts
|
|
707
|
+
var LEMMATIZERS = {
|
|
708
|
+
eo: lemmatizeEo,
|
|
709
|
+
fa: lemmatizeFa,
|
|
710
|
+
fi: lemmatizeFi,
|
|
711
|
+
ma: lemmatizeMa,
|
|
712
|
+
nb: lemmatizeNb,
|
|
713
|
+
ro: lemmatizeRo,
|
|
714
|
+
sv: lemmatizeSv,
|
|
715
|
+
sw: lemmatizeSw
|
|
716
|
+
};
|
|
717
|
+
function tryLookup(dict, ...candidates) {
|
|
718
|
+
for (const c of candidates) {
|
|
719
|
+
if (c && dict[c]) {
|
|
720
|
+
return dict[c];
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
return void 0;
|
|
724
|
+
}
|
|
725
|
+
var SV_SUFFIXES = [
|
|
726
|
+
// 4+ char suffixes
|
|
727
|
+
["erna", ["", "e"]],
|
|
728
|
+
["orna", ["", "a"]],
|
|
729
|
+
["ande", ["", "a"]],
|
|
730
|
+
["ende", ["", "a"]],
|
|
731
|
+
["aste", [""]],
|
|
732
|
+
// 3 char
|
|
733
|
+
["ade", ["", "a"]],
|
|
734
|
+
["igt", ["ig"]],
|
|
735
|
+
// 2 char
|
|
736
|
+
["en", [""]],
|
|
737
|
+
["et", ["", "e"]],
|
|
738
|
+
["an", ["", "a"]],
|
|
739
|
+
["ar", [""]],
|
|
740
|
+
["er", ["", "a"]],
|
|
741
|
+
["de", ["", "a"]],
|
|
742
|
+
["te", ["", "a"]],
|
|
743
|
+
// 1 char
|
|
744
|
+
["a", [""]],
|
|
745
|
+
["t", ["", "a"]],
|
|
746
|
+
["s", [""]],
|
|
747
|
+
["r", ["", "a"]],
|
|
748
|
+
["n", [""]]
|
|
749
|
+
];
|
|
750
|
+
function lemmatizeSv(dict, word) {
|
|
751
|
+
for (const [suffix, replacements] of SV_SUFFIXES) {
|
|
752
|
+
if (word.length > suffix.length && word.endsWith(suffix)) {
|
|
753
|
+
const stem = word.slice(0, -suffix.length);
|
|
754
|
+
const candidates = replacements.map((r) => stem + r);
|
|
755
|
+
const ipa = tryLookup(dict, ...candidates);
|
|
756
|
+
if (ipa) {
|
|
757
|
+
return ipa;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
if (word.endsWith("s") && word.length > 2) {
|
|
762
|
+
const inner = word.slice(0, -1);
|
|
763
|
+
if (dict[inner]) {
|
|
764
|
+
return dict[inner];
|
|
765
|
+
}
|
|
766
|
+
return lemmatizeSv(dict, inner);
|
|
767
|
+
}
|
|
768
|
+
return void 0;
|
|
769
|
+
}
|
|
770
|
+
var RO_SUFFIXES = [
|
|
771
|
+
// 4+ char
|
|
772
|
+
["ului", [""]],
|
|
773
|
+
["ilor", [""]],
|
|
774
|
+
["e\u0219te", ["", "i"]],
|
|
775
|
+
// 3 char
|
|
776
|
+
["ele", ["", "\u0103"]],
|
|
777
|
+
["uri", [""]],
|
|
778
|
+
["eau", ["", "i", "ea"]],
|
|
779
|
+
// 2 char
|
|
780
|
+
["ul", [""]],
|
|
781
|
+
["ii", ["", "ie", "iu"]],
|
|
782
|
+
["ea", ["", "e"]],
|
|
783
|
+
["\u021Bi", ["t"]],
|
|
784
|
+
["c\u0103", ["c", "ca"]],
|
|
785
|
+
// 1 char
|
|
786
|
+
["a", ["", "\u0103"]],
|
|
787
|
+
["e", ["", "\u0103"]],
|
|
788
|
+
["i", ["", "e"]]
|
|
789
|
+
];
|
|
790
|
+
var RO_PREFIX_RESTORE = [
|
|
791
|
+
["n", "\xEEn"],
|
|
792
|
+
["l", "\xEEl"],
|
|
793
|
+
["m", "m\u0103"]
|
|
794
|
+
];
|
|
795
|
+
function lemmatizeRo(dict, word) {
|
|
796
|
+
for (const [suffix, replacements] of RO_SUFFIXES) {
|
|
797
|
+
if (word.length > suffix.length && word.endsWith(suffix)) {
|
|
798
|
+
const stem = word.slice(0, -suffix.length);
|
|
799
|
+
const candidates = replacements.map((r) => stem + r);
|
|
800
|
+
const ipa = tryLookup(dict, ...candidates);
|
|
801
|
+
if (ipa) {
|
|
802
|
+
return ipa;
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
for (const [prefix, restored] of RO_PREFIX_RESTORE) {
|
|
807
|
+
if (word.startsWith(prefix)) {
|
|
808
|
+
const remainder = restored + word.slice(prefix.length);
|
|
809
|
+
if (dict[remainder]) {
|
|
810
|
+
return dict[remainder];
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
const withI = "\xEE" + word;
|
|
815
|
+
if (dict[withI]) {
|
|
816
|
+
return dict[withI];
|
|
817
|
+
}
|
|
818
|
+
return void 0;
|
|
819
|
+
}
|
|
820
|
+
var SW_VERB_PREFIXES = [
|
|
821
|
+
// 5+ char
|
|
822
|
+
"hatuku",
|
|
823
|
+
"hawaku",
|
|
824
|
+
"haiku",
|
|
825
|
+
"hatua",
|
|
826
|
+
"hatui",
|
|
827
|
+
// 4 char
|
|
828
|
+
"wali",
|
|
829
|
+
"tuli",
|
|
830
|
+
"nili",
|
|
831
|
+
"aali",
|
|
832
|
+
"wame",
|
|
833
|
+
"tume",
|
|
834
|
+
"nime",
|
|
835
|
+
"ame",
|
|
836
|
+
"wana",
|
|
837
|
+
"tuna",
|
|
838
|
+
"nina",
|
|
839
|
+
"ana",
|
|
840
|
+
"wata",
|
|
841
|
+
"tuta",
|
|
842
|
+
"nita",
|
|
843
|
+
"ata",
|
|
844
|
+
"yame",
|
|
845
|
+
"yata",
|
|
846
|
+
"yana",
|
|
847
|
+
"yali",
|
|
848
|
+
"kime",
|
|
849
|
+
"kita",
|
|
850
|
+
"kina",
|
|
851
|
+
"kili",
|
|
852
|
+
"lime",
|
|
853
|
+
"lita",
|
|
854
|
+
"lina",
|
|
855
|
+
"lili",
|
|
856
|
+
"vime",
|
|
857
|
+
"vita",
|
|
858
|
+
"vina",
|
|
859
|
+
"vili",
|
|
860
|
+
"zime",
|
|
861
|
+
"zita",
|
|
862
|
+
"zina",
|
|
863
|
+
"zili",
|
|
864
|
+
"haku",
|
|
865
|
+
"hatu",
|
|
866
|
+
"hani",
|
|
867
|
+
"hawa",
|
|
868
|
+
// 3 char
|
|
869
|
+
"ali",
|
|
870
|
+
"uli",
|
|
871
|
+
"ume",
|
|
872
|
+
"una",
|
|
873
|
+
"uta",
|
|
874
|
+
"tua",
|
|
875
|
+
"tui",
|
|
876
|
+
"wai",
|
|
877
|
+
"wal",
|
|
878
|
+
"iku",
|
|
879
|
+
"ina",
|
|
880
|
+
"hue",
|
|
881
|
+
"huj",
|
|
882
|
+
"hui",
|
|
883
|
+
"yat",
|
|
884
|
+
"yam",
|
|
885
|
+
"yan",
|
|
886
|
+
"kum",
|
|
887
|
+
"kui",
|
|
888
|
+
"kua",
|
|
889
|
+
// 2 char
|
|
890
|
+
"wa",
|
|
891
|
+
"tu",
|
|
892
|
+
"ni",
|
|
893
|
+
"li",
|
|
894
|
+
"ki",
|
|
895
|
+
"vi",
|
|
896
|
+
"zi",
|
|
897
|
+
"ya",
|
|
898
|
+
"ku",
|
|
899
|
+
"hu"
|
|
900
|
+
];
|
|
901
|
+
var SW_DERIV_SUFFIXES = [
|
|
902
|
+
["ika", ["a"]],
|
|
903
|
+
["isha", ["a"]],
|
|
904
|
+
["ana", ["a"]],
|
|
905
|
+
["wa", ["a"]],
|
|
906
|
+
["ia", ["a"]],
|
|
907
|
+
["ika", ["a", "ea"]]
|
|
908
|
+
];
|
|
909
|
+
function lemmatizeEo(dict, word) {
|
|
910
|
+
let w = word;
|
|
911
|
+
if (w.endsWith("n") && w.length > 2) {
|
|
912
|
+
const stripped = w.slice(0, -1);
|
|
913
|
+
if (dict[stripped]) {
|
|
914
|
+
return dict[stripped];
|
|
915
|
+
}
|
|
916
|
+
w = stripped;
|
|
917
|
+
}
|
|
918
|
+
if (w.endsWith("j") && w.length > 2) {
|
|
919
|
+
const stripped = w.slice(0, -1);
|
|
920
|
+
if (dict[stripped]) {
|
|
921
|
+
return dict[stripped];
|
|
922
|
+
}
|
|
923
|
+
w = stripped;
|
|
924
|
+
}
|
|
925
|
+
if (dict[w]) {
|
|
926
|
+
return dict[w];
|
|
927
|
+
}
|
|
928
|
+
for (const ending of ["as", "is", "os", "us"]) {
|
|
929
|
+
if (w.endsWith(ending) && w.length > ending.length + 1) {
|
|
930
|
+
const stem = w.slice(0, -ending.length);
|
|
931
|
+
const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
|
|
932
|
+
if (ipa) {
|
|
933
|
+
return ipa;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
if (w.endsWith("u") && w.length > 2) {
|
|
938
|
+
const stem = w.slice(0, -1);
|
|
939
|
+
const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
|
|
940
|
+
if (ipa) {
|
|
941
|
+
return ipa;
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
for (const suffix of [
|
|
945
|
+
"anta",
|
|
946
|
+
"inta",
|
|
947
|
+
"onta",
|
|
948
|
+
"ata",
|
|
949
|
+
"ita",
|
|
950
|
+
"ota",
|
|
951
|
+
"ante",
|
|
952
|
+
"inte",
|
|
953
|
+
"onte"
|
|
954
|
+
]) {
|
|
955
|
+
if (w.endsWith(suffix) && w.length > suffix.length + 1) {
|
|
956
|
+
const stem = w.slice(0, -suffix.length);
|
|
957
|
+
const ipa = tryLookup(dict, stem + "i", stem + "o", stem);
|
|
958
|
+
if (ipa) {
|
|
959
|
+
return ipa;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
if (w.endsWith("e") && w.length > 2) {
|
|
964
|
+
const stem = w.slice(0, -1);
|
|
965
|
+
const ipa = tryLookup(dict, stem + "o", stem + "a", stem + "i", stem);
|
|
966
|
+
if (ipa) {
|
|
967
|
+
return ipa;
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
for (const [suffix, replacements] of [
|
|
971
|
+
["isto", ["o", "i", ""]],
|
|
972
|
+
["ejo", ["o", "i", ""]],
|
|
973
|
+
["ilo", ["o", "i", ""]],
|
|
974
|
+
["eco", ["o", "a", ""]],
|
|
975
|
+
["ado", ["o", "i", ""]],
|
|
976
|
+
["igo", ["o", "a", ""]],
|
|
977
|
+
["i\u011Do", ["o", "a", ""]]
|
|
978
|
+
]) {
|
|
979
|
+
if (w.endsWith(suffix) && w.length > suffix.length + 1) {
|
|
980
|
+
const stem = w.slice(0, -suffix.length);
|
|
981
|
+
const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
|
|
982
|
+
if (ipa) {
|
|
983
|
+
return ipa;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
for (const prefix of ["mal", "ek", "re", "ne", "sen"]) {
|
|
988
|
+
if (w.startsWith(prefix) && w.length > prefix.length + 1) {
|
|
989
|
+
const remainder = w.slice(prefix.length);
|
|
990
|
+
if (dict[remainder]) {
|
|
991
|
+
return dict[remainder];
|
|
992
|
+
}
|
|
993
|
+
const inner = lemmatizeEo(dict, remainder);
|
|
994
|
+
if (inner) {
|
|
995
|
+
return inner;
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
return void 0;
|
|
1000
|
+
}
|
|
1001
|
+
function lemmatizeSw(dict, word) {
|
|
1002
|
+
for (const prefix of SW_VERB_PREFIXES) {
|
|
1003
|
+
if (word.length > prefix.length + 1 && word.startsWith(prefix)) {
|
|
1004
|
+
const remainder = word.slice(prefix.length);
|
|
1005
|
+
if (dict[remainder]) {
|
|
1006
|
+
return dict[remainder];
|
|
1007
|
+
}
|
|
1008
|
+
const kuForm = "ku" + remainder;
|
|
1009
|
+
if (dict[kuForm]) {
|
|
1010
|
+
return dict[kuForm];
|
|
1011
|
+
}
|
|
1012
|
+
for (const [suffix, replacements] of SW_DERIV_SUFFIXES) {
|
|
1013
|
+
if (remainder.length > suffix.length && remainder.endsWith(suffix)) {
|
|
1014
|
+
const stem = remainder.slice(0, -suffix.length);
|
|
1015
|
+
for (const r of replacements) {
|
|
1016
|
+
const candidate = stem + r;
|
|
1017
|
+
if (dict[candidate]) {
|
|
1018
|
+
return dict[candidate];
|
|
1019
|
+
}
|
|
1020
|
+
if (dict["ku" + candidate]) {
|
|
1021
|
+
return dict["ku" + candidate];
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
for (const [suffix, replacements] of SW_DERIV_SUFFIXES) {
|
|
1029
|
+
if (word.length > suffix.length && word.endsWith(suffix)) {
|
|
1030
|
+
const stem = word.slice(0, -suffix.length);
|
|
1031
|
+
for (const r of replacements) {
|
|
1032
|
+
const candidate = stem + r;
|
|
1033
|
+
if (dict[candidate]) {
|
|
1034
|
+
return dict[candidate];
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
return void 0;
|
|
1040
|
+
}
|
|
1041
|
+
var FI_SUFFIXES = [
|
|
1042
|
+
// Possessive + case combinations
|
|
1043
|
+
["ssani", ["", "nen"]],
|
|
1044
|
+
["ss\xE4ni", ["", "nen"]],
|
|
1045
|
+
["llani", ["", "nen"]],
|
|
1046
|
+
["ll\xE4ni", ["", "nen"]],
|
|
1047
|
+
["stani", ["", "nen"]],
|
|
1048
|
+
["st\xE4ni", ["", "nen"]],
|
|
1049
|
+
["ssaan", ["", "nen"]],
|
|
1050
|
+
["ss\xE4\xE4n", ["", "nen"]],
|
|
1051
|
+
// Plural case endings (4+ chars)
|
|
1052
|
+
["issa", ["", "a"]],
|
|
1053
|
+
["iss\xE4", ["", "\xE4"]],
|
|
1054
|
+
["illa", ["", "a"]],
|
|
1055
|
+
["ill\xE4", ["", "\xE4"]],
|
|
1056
|
+
["ista", ["", "a"]],
|
|
1057
|
+
["ist\xE4", ["", "\xE4"]],
|
|
1058
|
+
["ihin", ["", "i"]],
|
|
1059
|
+
["ojen", ["o"]],
|
|
1060
|
+
["ujen", ["u"]],
|
|
1061
|
+
["yjen", ["y"]],
|
|
1062
|
+
["iden", ["i"]],
|
|
1063
|
+
["jen", [""]],
|
|
1064
|
+
// Inessive -ssa/-ssä
|
|
1065
|
+
["ssa", ["", "s"]],
|
|
1066
|
+
["ss\xE4", ["", "s"]],
|
|
1067
|
+
// Elative -sta/-stä
|
|
1068
|
+
["sta", ["", "s"]],
|
|
1069
|
+
["st\xE4", ["", "s"]],
|
|
1070
|
+
// Adessive -lla/-llä
|
|
1071
|
+
["lla", ["", "a"]],
|
|
1072
|
+
["ll\xE4", ["", "\xE4"]],
|
|
1073
|
+
// Ablative -lta/-ltä
|
|
1074
|
+
["lta", ["", "a"]],
|
|
1075
|
+
["lt\xE4", ["", "\xE4"]],
|
|
1076
|
+
// Allative -lle
|
|
1077
|
+
["lle", ["", "i"]],
|
|
1078
|
+
// Essive -na/-nä
|
|
1079
|
+
["na", ["", "nen"]],
|
|
1080
|
+
["n\xE4", ["", "nen"]],
|
|
1081
|
+
// Translative -ksi
|
|
1082
|
+
["ksi", ["", "si"]],
|
|
1083
|
+
// Possessive -ni, -si, -nsa/-nsä, -mme, -nne
|
|
1084
|
+
["nsa", [""]],
|
|
1085
|
+
["ns\xE4", [""]],
|
|
1086
|
+
["mme", [""]],
|
|
1087
|
+
["nne", [""]],
|
|
1088
|
+
["ni", ["", "n"]],
|
|
1089
|
+
["si", ["", "s"]],
|
|
1090
|
+
// Partitive -a/-ä, -ta/-tä, -tta/-ttä
|
|
1091
|
+
["tta", [""]],
|
|
1092
|
+
["tt\xE4", [""]],
|
|
1093
|
+
["ta", ["", "nen"]],
|
|
1094
|
+
["t\xE4", ["", "nen"]],
|
|
1095
|
+
// Genitive -n, plural -t
|
|
1096
|
+
["en", ["", "i"]],
|
|
1097
|
+
["ot", ["o"]],
|
|
1098
|
+
["ut", ["u"]],
|
|
1099
|
+
["yt", ["y"]],
|
|
1100
|
+
["\xE4t", ["\xE4"]],
|
|
1101
|
+
["at", ["a"]],
|
|
1102
|
+
["et", ["e", "i"]],
|
|
1103
|
+
// Verb past -i
|
|
1104
|
+
["oi", ["o", "oa"]],
|
|
1105
|
+
["ui", ["u", "ua"]],
|
|
1106
|
+
// General fallbacks
|
|
1107
|
+
["a", [""]],
|
|
1108
|
+
["\xE4", [""]],
|
|
1109
|
+
["n", [""]],
|
|
1110
|
+
["t", [""]]
|
|
1111
|
+
];
|
|
1112
|
+
var FI_VERB_SUFFIXES = [
|
|
1113
|
+
// Archaic Kalevala -(tt)elevi/-(tt)avi patterns
|
|
1114
|
+
["ttelevi", ["tella", "della"]],
|
|
1115
|
+
["televi", ["tella", "della"]],
|
|
1116
|
+
["ttavi", ["ttaa", "t\xE4\xE4"]],
|
|
1117
|
+
["ttevi", ["tt\xE4\xE4", "ttaa"]],
|
|
1118
|
+
["elevi", ["ella", "ell\xE4"]],
|
|
1119
|
+
["alevi", ["alla", "all\xE4"]],
|
|
1120
|
+
["evi", ["", "a", "\xE4"]],
|
|
1121
|
+
["avi", ["", "a", "aa"]],
|
|
1122
|
+
["ovi", ["", "o", "oa"]],
|
|
1123
|
+
["uvi", ["", "u", "ua"]],
|
|
1124
|
+
// Past participle -nut/-nyt, -neet
|
|
1125
|
+
["neet", ["", "a", "\xE4"]],
|
|
1126
|
+
["nut", ["", "a", "da"]],
|
|
1127
|
+
["nyt", ["", "\xE4", "d\xE4"]],
|
|
1128
|
+
// Present participle -va/-vä
|
|
1129
|
+
["va", ["", "a"]],
|
|
1130
|
+
["v\xE4", ["", "\xE4"]],
|
|
1131
|
+
// Past tense 3rd person
|
|
1132
|
+
["tui", ["tua", "ty\xE4"]],
|
|
1133
|
+
["lui", ["la", "l\xE4"]],
|
|
1134
|
+
// Conditional
|
|
1135
|
+
["isi", ["", "a", "\xE4"]],
|
|
1136
|
+
// Agent noun -ja/-jä
|
|
1137
|
+
["ja", ["", "a"]],
|
|
1138
|
+
["j\xE4", ["", "\xE4"]]
|
|
1139
|
+
];
|
|
1140
|
+
function applyFiGradation(stem) {
|
|
1141
|
+
if (stem.endsWith("nt")) {
|
|
1142
|
+
return stem.slice(0, -2) + "nn";
|
|
1143
|
+
}
|
|
1144
|
+
if (stem.endsWith("lt")) {
|
|
1145
|
+
return stem.slice(0, -2) + "ll";
|
|
1146
|
+
}
|
|
1147
|
+
if (stem.endsWith("rt")) {
|
|
1148
|
+
return stem.slice(0, -2) + "rr";
|
|
1149
|
+
}
|
|
1150
|
+
if (stem.endsWith("nk")) {
|
|
1151
|
+
return stem.slice(0, -2) + "ng";
|
|
1152
|
+
}
|
|
1153
|
+
if (stem.endsWith("mp")) {
|
|
1154
|
+
return stem.slice(0, -2) + "mm";
|
|
1155
|
+
}
|
|
1156
|
+
if (stem.endsWith("lk")) {
|
|
1157
|
+
return stem.slice(0, -2) + "l";
|
|
1158
|
+
}
|
|
1159
|
+
if (stem.endsWith("rk")) {
|
|
1160
|
+
return stem.slice(0, -2) + "r";
|
|
1161
|
+
}
|
|
1162
|
+
if (stem.endsWith("hk")) {
|
|
1163
|
+
return stem.slice(0, -2) + "h";
|
|
1164
|
+
}
|
|
1165
|
+
return stem;
|
|
1166
|
+
}
|
|
1167
|
+
function applyFiStrengthening(stem) {
|
|
1168
|
+
if (stem.endsWith("nn")) {
|
|
1169
|
+
return stem.slice(0, -2) + "nt";
|
|
1170
|
+
}
|
|
1171
|
+
if (stem.endsWith("ll")) {
|
|
1172
|
+
return stem.slice(0, -2) + "lt";
|
|
1173
|
+
}
|
|
1174
|
+
if (stem.endsWith("rr")) {
|
|
1175
|
+
return stem.slice(0, -2) + "rt";
|
|
1176
|
+
}
|
|
1177
|
+
if (stem.endsWith("ng")) {
|
|
1178
|
+
return stem.slice(0, -2) + "nk";
|
|
1179
|
+
}
|
|
1180
|
+
if (stem.endsWith("mm")) {
|
|
1181
|
+
return stem.slice(0, -2) + "mp";
|
|
1182
|
+
}
|
|
1183
|
+
return stem;
|
|
1184
|
+
}
|
|
1185
|
+
function lemmatizeFi(dict, word) {
|
|
1186
|
+
for (const [suffix, replacements] of FI_VERB_SUFFIXES) {
|
|
1187
|
+
if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
|
|
1188
|
+
const stem = word.slice(0, -suffix.length);
|
|
1189
|
+
const candidates = replacements.map((r) => stem + r);
|
|
1190
|
+
const ipa = tryLookup(dict, ...candidates);
|
|
1191
|
+
if (ipa) {
|
|
1192
|
+
return ipa;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
for (const [suffix, replacements] of FI_SUFFIXES) {
|
|
1197
|
+
if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
|
|
1198
|
+
const stem = word.slice(0, -suffix.length);
|
|
1199
|
+
const candidates = replacements.map((r) => stem + r);
|
|
1200
|
+
const ipa = tryLookup(dict, ...candidates);
|
|
1201
|
+
if (ipa) {
|
|
1202
|
+
return ipa;
|
|
1203
|
+
}
|
|
1204
|
+
if (stem.length >= 2) {
|
|
1205
|
+
const gradated = applyFiGradation(stem);
|
|
1206
|
+
if (gradated !== stem) {
|
|
1207
|
+
const gradIpa = tryLookup(dict, ...replacements.map((r) => gradated + r));
|
|
1208
|
+
if (gradIpa) {
|
|
1209
|
+
return gradIpa;
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
const strengthened = applyFiStrengthening(stem);
|
|
1213
|
+
if (strengthened !== stem) {
|
|
1214
|
+
const strIpa = tryLookup(dict, ...replacements.map((r) => strengthened + r));
|
|
1215
|
+
if (strIpa) {
|
|
1216
|
+
return strIpa;
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
for (const poss of ["ni", "si", "nsa", "ns\xE4", "mme", "nne"]) {
|
|
1223
|
+
if (word.endsWith(poss) && word.length > poss.length + 2) {
|
|
1224
|
+
const inner = word.slice(0, -poss.length);
|
|
1225
|
+
if (dict[inner]) {
|
|
1226
|
+
return dict[inner];
|
|
1227
|
+
}
|
|
1228
|
+
for (const [suffix, replacements] of FI_SUFFIXES) {
|
|
1229
|
+
if (inner.length > suffix.length + 1 && inner.endsWith(suffix)) {
|
|
1230
|
+
const stem = inner.slice(0, -suffix.length);
|
|
1231
|
+
const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
|
|
1232
|
+
if (ipa) {
|
|
1233
|
+
return ipa;
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
return void 0;
|
|
1240
|
+
}
|
|
1241
|
+
function modernizeNb(word) {
|
|
1242
|
+
const variants = [];
|
|
1243
|
+
if (word.includes("aa")) {
|
|
1244
|
+
variants.push(word.replaceAll("aa", "\xE5"));
|
|
1245
|
+
}
|
|
1246
|
+
if (word.includes("Aa")) {
|
|
1247
|
+
variants.push(word.replaceAll("Aa", "\xC5"));
|
|
1248
|
+
}
|
|
1249
|
+
if (word === "af") {
|
|
1250
|
+
variants.push("av");
|
|
1251
|
+
}
|
|
1252
|
+
if (word === "efter") {
|
|
1253
|
+
variants.push("etter");
|
|
1254
|
+
}
|
|
1255
|
+
if (word === "imod") {
|
|
1256
|
+
variants.push("imot");
|
|
1257
|
+
}
|
|
1258
|
+
return variants;
|
|
1259
|
+
}
|
|
1260
|
+
var NB_SUFFIXES = [
|
|
1261
|
+
// 4+ char
|
|
1262
|
+
["erne", ["", "e"]],
|
|
1263
|
+
["enes", ["", "e"]],
|
|
1264
|
+
["ande", ["", "e"]],
|
|
1265
|
+
["ende", ["", "e"]],
|
|
1266
|
+
["else", ["", "e"]],
|
|
1267
|
+
// Definite plural
|
|
1268
|
+
["ene", ["", "e"]],
|
|
1269
|
+
["ane", ["", "e"]],
|
|
1270
|
+
// 2 char definite singular
|
|
1271
|
+
["en", ["", "e"]],
|
|
1272
|
+
["et", ["", "e"]],
|
|
1273
|
+
// Past tense / participle
|
|
1274
|
+
["te", ["", "e"]],
|
|
1275
|
+
["de", ["", "e"]],
|
|
1276
|
+
["dde", [""]],
|
|
1277
|
+
// Indefinite plural
|
|
1278
|
+
["er", ["", "e"]],
|
|
1279
|
+
// Comparative / superlative
|
|
1280
|
+
["ere", [""]],
|
|
1281
|
+
["est", [""]],
|
|
1282
|
+
["este", [""]],
|
|
1283
|
+
// Present tense
|
|
1284
|
+
["ar", ["", "e"]],
|
|
1285
|
+
// General
|
|
1286
|
+
["t", ["", "e"]],
|
|
1287
|
+
["a", ["", "e"]],
|
|
1288
|
+
["s", [""]],
|
|
1289
|
+
["e", [""]],
|
|
1290
|
+
["n", ["", "e"]],
|
|
1291
|
+
["r", ["", "e"]]
|
|
1292
|
+
];
|
|
1293
|
+
function lemmatizeNb(dict, word) {
|
|
1294
|
+
const modern = modernizeNb(word);
|
|
1295
|
+
for (const m of modern) {
|
|
1296
|
+
if (dict[m]) {
|
|
1297
|
+
return dict[m];
|
|
1298
|
+
}
|
|
1299
|
+
const lower = m.toLowerCase();
|
|
1300
|
+
if (dict[lower]) {
|
|
1301
|
+
return dict[lower];
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
for (const [suffix, replacements] of NB_SUFFIXES) {
|
|
1305
|
+
if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
|
|
1306
|
+
const stem = word.slice(0, -suffix.length);
|
|
1307
|
+
const ipa = tryLookup(dict, ...replacements.map((r) => stem + r));
|
|
1308
|
+
if (ipa) {
|
|
1309
|
+
return ipa;
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
for (const [suffix, replacements] of NB_SUFFIXES) {
|
|
1314
|
+
if (word.length > suffix.length + 1 && word.endsWith(suffix)) {
|
|
1315
|
+
const stem = word.slice(0, -suffix.length);
|
|
1316
|
+
for (const r of replacements) {
|
|
1317
|
+
const candidate = stem + r;
|
|
1318
|
+
const modernized = modernizeNb(candidate);
|
|
1319
|
+
for (const m of modernized) {
|
|
1320
|
+
if (dict[m]) {
|
|
1321
|
+
return dict[m];
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
return void 0;
|
|
1328
|
+
}
|
|
1329
|
+
var MA_SUFFIXES = ["nya", "mu", "ku", "kan", "an", "lah", "kah", "i"];
|
|
1330
|
+
var MA_PREFIXES = [
|
|
1331
|
+
["memper", [""]],
|
|
1332
|
+
["member", [""]],
|
|
1333
|
+
["menge", [""]],
|
|
1334
|
+
["meny", ["s", "c"]],
|
|
1335
|
+
["meng", ["k", "g", "h", ""]],
|
|
1336
|
+
["mem", ["p", "b", "f", ""]],
|
|
1337
|
+
["men", ["t", "d", "c", "j", ""]],
|
|
1338
|
+
["me", [""]],
|
|
1339
|
+
["diper", [""]],
|
|
1340
|
+
["ber", [""]],
|
|
1341
|
+
["per", [""]],
|
|
1342
|
+
["ter", [""]],
|
|
1343
|
+
["di", [""]],
|
|
1344
|
+
["ke", [""]],
|
|
1345
|
+
["se", [""]],
|
|
1346
|
+
["ku", [""]]
|
|
1347
|
+
];
|
|
1348
|
+
function lemmatizeMa(dict, word) {
|
|
1349
|
+
for (const suffix of MA_SUFFIXES) {
|
|
1350
|
+
if (word.length > suffix.length + 2 && word.endsWith(suffix)) {
|
|
1351
|
+
const stem = word.slice(0, -suffix.length);
|
|
1352
|
+
if (dict[stem]) {
|
|
1353
|
+
return dict[stem];
|
|
1354
|
+
}
|
|
1355
|
+
const fromPrefix = tryMaPrefixStrip(dict, stem);
|
|
1356
|
+
if (fromPrefix) {
|
|
1357
|
+
return fromPrefix;
|
|
1358
|
+
}
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
return tryMaPrefixStrip(dict, word);
|
|
1362
|
+
}
|
|
1363
|
+
function tryMaPrefixStrip(dict, word) {
|
|
1364
|
+
for (const [prefix, restorations] of MA_PREFIXES) {
|
|
1365
|
+
if (word.length > prefix.length + 1 && word.startsWith(prefix)) {
|
|
1366
|
+
const remainder = word.slice(prefix.length);
|
|
1367
|
+
if (dict[remainder]) {
|
|
1368
|
+
return dict[remainder];
|
|
1369
|
+
}
|
|
1370
|
+
for (const consonant of restorations) {
|
|
1371
|
+
if (consonant) {
|
|
1372
|
+
const restored = consonant + remainder;
|
|
1373
|
+
if (dict[restored]) {
|
|
1374
|
+
return dict[restored];
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
return void 0;
|
|
1381
|
+
}
|
|
1382
|
+
var ZWNJ = "\u200C";
|
|
1383
|
+
function lemmatizeFa(dict, word) {
|
|
1384
|
+
if (word.includes(ZWNJ)) {
|
|
1385
|
+
const parts = word.split(ZWNJ);
|
|
1386
|
+
for (const part of parts) {
|
|
1387
|
+
if (dict[part]) {
|
|
1388
|
+
return dict[part];
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
if (parts.length === 2 && (parts[0] === "\u0645\u06CC" || parts[0] === "\u0646\u0645\u06CC")) {
|
|
1392
|
+
const verb = parts[1];
|
|
1393
|
+
if (dict[verb]) {
|
|
1394
|
+
return dict[verb];
|
|
1395
|
+
}
|
|
1396
|
+
for (const ending of ["\u0646\u062F", "\u0645", "\u06CC", "\u062F", "\u06CC\u0645", "\u06CC\u062F"]) {
|
|
1397
|
+
if (verb.endsWith(ending) && verb.length > ending.length) {
|
|
1398
|
+
const stem = verb.slice(0, -ending.length);
|
|
1399
|
+
if (dict[stem]) {
|
|
1400
|
+
return dict[stem];
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
const joined = parts.join("");
|
|
1406
|
+
if (dict[joined]) {
|
|
1407
|
+
return dict[joined];
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
for (const suffix of [
|
|
1411
|
+
"\u0647\u0627\u06CC\u06CC",
|
|
1412
|
+
"\u0647\u0627\u06CC",
|
|
1413
|
+
"\u0647\u0627",
|
|
1414
|
+
"\u0627\u06CC",
|
|
1415
|
+
"\u0627\u0646",
|
|
1416
|
+
"\u0627\u062A",
|
|
1417
|
+
"\u06CC\u0646",
|
|
1418
|
+
"\u062A\u0631",
|
|
1419
|
+
"\u062A\u0631\u06CC\u0646",
|
|
1420
|
+
"\u0634",
|
|
1421
|
+
"\u0645",
|
|
1422
|
+
"\u062A"
|
|
1423
|
+
]) {
|
|
1424
|
+
if (word.endsWith(suffix) && word.length > suffix.length + 1) {
|
|
1425
|
+
const stem = word.slice(0, -suffix.length);
|
|
1426
|
+
if (dict[stem]) {
|
|
1427
|
+
return dict[stem];
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
}
|
|
1431
|
+
return void 0;
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
// src/overrides/ar.ts
|
|
1435
|
+
var ar = {
|
|
1436
|
+
\u0623\u062D\u062F\u0627: "/\u0294a\u02C8\u0127adan/",
|
|
1437
|
+
// anyone
|
|
1438
|
+
\u0623\u062D\u062F\u0647\u0645\u0627: "/\u0294a\u0127adu\u02C8huma\u02D0/",
|
|
1439
|
+
// one of them two
|
|
1440
|
+
\u0623\u062D\u0631\u0627\u0631\u0627: "/\u0294a\u0127\u02C8ra\u02D0ran/",
|
|
1441
|
+
\u0623\u0633\u062E\u064A\u0644\u064A\u0648\u0633: "/\u0294asxi\u02D0\u02C8li\u02D0ju\u02D0s/",
|
|
1442
|
+
// Aeschylus
|
|
1443
|
+
\u0623\u0645\u0647\u0627\u062A: "/um\u02D0a\u02C8ha\u02D0t/",
|
|
1444
|
+
\u0623\u0646\u0633\u064A\u062A: "/\u0294un\u02C8si\u02D0t/",
|
|
1445
|
+
\u0623\u0646\u0639\u0645\u062A: "/\u0294an\u02C8\u0295amta/",
|
|
1446
|
+
\u0623\u0648\u0631\u0641\u0644\u064A\u0633: "/\u0294ur\u02C8fali\u02D0s/",
|
|
1447
|
+
// Orphalese (Gibran)
|
|
1448
|
+
\u0623\u064A\u062F\u064A\u0647\u0645: "/\u0294aj\u02C8di\u02D0him/",
|
|
1449
|
+
\u0627\u0633\u062A\u064A\u0642\u0638\u062A: "/istaj\u02C8qa\xF0\u02E4tu/",
|
|
1450
|
+
\u0627\u0633\u0645\u0647: "/\u02C8ismuhu/",
|
|
1451
|
+
// his name
|
|
1452
|
+
\u0627\u0639\u062A\u0632\u0627\u0644\u0647: "/i\u0295ti\u02C8za\u02D0lihi/",
|
|
1453
|
+
// his seclusion
|
|
1454
|
+
\u0627\u0644\u0623\u0642\u0627\u0648\u0627\u0644: "/al\u0294aqa\u02D0\u02C8wi\u02D0l/",
|
|
1455
|
+
// the sayings/talk
|
|
1456
|
+
\u0627\u0644\u0623\u0644\u064A\u0641\u0629: "/al\u0294a\u02C8li\u02D0fah/",
|
|
1457
|
+
\u0627\u0644\u0623\u0648\u0644\u0649: "/al\u02C8\u0294u\u02D0la\u02D0/",
|
|
1458
|
+
// the first (fem.)
|
|
1459
|
+
\u0627\u0644\u062D\u064A\u0627\u0629: "/al\u0127a\u02C8ja\u02D0h/",
|
|
1460
|
+
\u0627\u0644\u062E\u0644\u064A\u0641\u0629: "/alxa\u02C8li\u02D0fah/",
|
|
1461
|
+
\u0627\u0644\u062E\u0644\u064A\u0642\u0629: "/alxa\u02C8li\u02D0qah/",
|
|
1462
|
+
// creation/nature
|
|
1463
|
+
\u0627\u0644\u0630\u0643\u0631\u064A\u0627\u062A: "/a\xF0\u02D0ikra\u02C8ja\u02D0t/",
|
|
1464
|
+
\u0627\u0644\u0633\u0645\u0627\u0648\u0627\u062A: "/as\u02D0ama\u02D0\u02C8wa\u02D0t/",
|
|
1465
|
+
\u0627\u0644\u0636\u0627\u0644\u064A\u0646: "/ad\u02E4\u02D0a\u02D0\u02C8li\u02D0n/",
|
|
1466
|
+
\u0627\u0644\u0639\u0627\u0644\u0645\u064A\u0646: "/al\u0295a\u02D0la\u02C8mi\u02D0n/",
|
|
1467
|
+
\u0627\u0644\u0639\u0634\u0631\u0629: "/al\u02C8\u0295a\u0283ara/",
|
|
1468
|
+
// the ten
|
|
1469
|
+
\u0627\u0644\u063A\u0631\u0628\u0629: "/al\u02C8\u0263urbah/",
|
|
1470
|
+
\u0627\u0644\u063A\u0632\u0627\u0629: "/al\u0263u\u02C8za\u02D0h/",
|
|
1471
|
+
\u0627\u0644\u0643\u0631\u0627\u0645\u0629: "/alka\u02C8ra\u02D0mah/",
|
|
1472
|
+
\u0627\u0644\u0644\u0648\u0649: "/al\u02C8liwa\u02D0/",
|
|
1473
|
+
\u0627\u0644\u0645\u0635\u0637\u0641\u0649: "/almus\u02C8t\u02E4afa\u02D0/",
|
|
1474
|
+
\u0627\u0644\u0646\u062F\u0648\u0627\u062A: "/annada\u02C8wa\u02D0t/",
|
|
1475
|
+
// the seminars/forums
|
|
1476
|
+
\u0627\u0645\u0631\u0623\u0629: "/im\u02C8ra\u0294ah/",
|
|
1477
|
+
\u0627\u0647\u062F\u0646\u0627: "/ih\u02C8dina\u02D0/",
|
|
1478
|
+
\u0628\u0623\u0648\u0642\u0627\u0641\u0647: "/bi\u0294aw\u02C8qa\u02D0fihi/",
|
|
1479
|
+
// with his endowments
|
|
1480
|
+
\u0628\u0625\u0630\u0646\u0647: "/bi\u0294i\xF0\u02C8nihi/",
|
|
1481
|
+
\u0628\u0628\u0631\u0642\u0629: "/bi\u02C8barqah/",
|
|
1482
|
+
// in Barqa (place)
|
|
1483
|
+
\u0628\u062C\u0632\u0627\u0626\u0631: "/bid\u0292a\u02C8za\u02D0\u0294ir/",
|
|
1484
|
+
// with islands
|
|
1485
|
+
\u0628\u062F\u0627\u064A\u0629: "/bi\u02C8da\u02D0jah/",
|
|
1486
|
+
\u0628\u0630\u064A: "/bi\u02C8\xF0i\u02D0/",
|
|
1487
|
+
// of importance
|
|
1488
|
+
\u0628\u0634\u064A\u0621: "/bi\u02C8\u0283aj\u0294/",
|
|
1489
|
+
\u0628\u0639\u0636\u0627: "/\u02C8ba\u0295d\u02E4an/",
|
|
1490
|
+
\u0628\u0639\u0636\u0647\u0645: "/ba\u0295\u02C8d\u02E4uhum/",
|
|
1491
|
+
\u0628\u0639\u064A\u0646\u0647: "/bi\u02C8\u0295ajnihi/",
|
|
1492
|
+
\u0628\u0639\u064A\u0646\u064A\u0647\u0627: "/bi\u0295aj\u02C8najha\u02D0/",
|
|
1493
|
+
// Egyptian National Anthem — بلادي بلادي
|
|
1494
|
+
\u0628\u0644\u0627\u062F\u064A: "/bi\u02C8la\u02D0di\u02D0/",
|
|
1495
|
+
// my country
|
|
1496
|
+
\u0628\u064A\u062F\u0628\u0627: "/bajda\u02C8ba\u02D0/",
|
|
1497
|
+
// Bidpai
|
|
1498
|
+
\u062A\u0623\u062E\u0630\u0647: "/ta\u0294\u02C8xu\xF0uhu/",
|
|
1499
|
+
\u062A\u0633\u0642\u0646\u064A: "/tas\u02C8qini\u02D0/",
|
|
1500
|
+
\u062A\u0639\u0644\u0645\u062A: "/ta\u0295al\u02D0\u02C8amtu/",
|
|
1501
|
+
\u062A\u0642\u0641\u0646: "/taqif\u02C8na/",
|
|
1502
|
+
\u062A\u0642\u0644\u0628\u062A: "/taqal\u02D0a\u02C8bat/",
|
|
1503
|
+
// it turned/fluctuated
|
|
1504
|
+
\u062A\u0643\u0644\u0645\u064A: "/takal\u02D0a\u02C8mi\u02D0/",
|
|
1505
|
+
// speak! (fem.)
|
|
1506
|
+
\u062B\u0647\u0645\u062F: "/\u02C8\u03B8ahmad/",
|
|
1507
|
+
// Thahmad (place name)
|
|
1508
|
+
\u062C\u0644\u0633\u062A: "/d\u0292a\u02C8lasat/",
|
|
1509
|
+
\u062D\u0627\u0631\u062A\u0646\u0627: "/\u0127a\u02D0\u02C8ratna\u02D0/",
|
|
1510
|
+
\u062D\u0641\u0638\u0647\u0645\u0627: "/\u0127if\u02C8\xF0\u02E4ahuma\u02D0/",
|
|
1511
|
+
\u062D\u0643\u0627\u064A\u0629: "/\u0127i\u02C8ka\u02D0jah/",
|
|
1512
|
+
\u062D\u064A\u0627\u062A\u0647: "/\u0127a\u02C8ja\u02D0tahu/",
|
|
1513
|
+
\u062E\u0641\u064A\u0641\u0627: "/xa\u02C8fi\u02D0fan/",
|
|
1514
|
+
\u062E\u0644\u0627\u0644\u0647\u0627: "/xi\u02C8la\u02D0laha\u02D0/",
|
|
1515
|
+
\u062E\u0644\u0641\u0647\u0645: "/xal\u02C8fahum/",
|
|
1516
|
+
\u062E\u0645\u0631\u0627: "/\u02C8xamran/",
|
|
1517
|
+
\u062F\u0628\u0634\u0644\u064A\u0645: "/dab\u0283a\u02C8li\u02D0m/",
|
|
1518
|
+
// King Dabshelim
|
|
1519
|
+
\u0631\u0627\u0626\u062D\u0629: "/\u02C8ra\u02D0\u0294i\u0127ah/",
|
|
1520
|
+
\u0631\u0627\u0641\u0639\u0627: "/\u02C8ra\u02D0fi\u0295an/",
|
|
1521
|
+
\u0631\u0633\u0645\u0647\u0627: "/ras\u02C8muha\u02D0/",
|
|
1522
|
+
\u0631\u064A\u0627\u062D\u0643: "/ri\u02C8ja\u02D0\u0127aka/",
|
|
1523
|
+
\u0632\u0639\u0645\u0648\u0627: "/za\u02C8\u0295amu\u02D0/",
|
|
1524
|
+
\u0632\u0645\u0627\u0646\u0647: "/za\u02C8ma\u02D0nihi/",
|
|
1525
|
+
\u0633\u0627\u062F\u062A\u064A: "/sa\u02D0\u02C8dati\u02D0/",
|
|
1526
|
+
\u0633\u0627\u0633\u0627\u0646: "/sa\u02D0\u02C8sa\u02D0n/",
|
|
1527
|
+
// Sasanian dynasty
|
|
1528
|
+
\u0633\u0628\u0639\u0629: "/\u02C8sab\u0295ah/",
|
|
1529
|
+
\u0633\u062A\u062D\u0645\u0644\u0647: "/sata\u0127\u02C8miluhu/",
|
|
1530
|
+
\u0633\u0641\u064A\u0646\u062A\u0647: "/sa\u02C8fi\u02D0natuhu/",
|
|
1531
|
+
\u0633\u0643\u0631\u0629: "/\u02C8sakrah/",
|
|
1532
|
+
\u0633\u0646\u0629: "/\u02C8sanah/",
|
|
1533
|
+
\u0633\u0646\u064A\u0646: "/si\u02C8ni\u02D0n/",
|
|
1534
|
+
\u0634\u0647\u0631\u064A\u0627\u0631: "/\u0283ahri\u02C8ja\u02D0r/",
|
|
1535
|
+
// King Shahryar (1001 Nights)
|
|
1536
|
+
\u0634\u064A\u0626\u0627: "/\u02C8\u0283aj\u0294an/",
|
|
1537
|
+
\u0635\u0628\u0627\u062D\u0627: "/s\u02E4a\u02C8ba\u02D0\u0127an/",
|
|
1538
|
+
// morning
|
|
1539
|
+
\u0635\u062D\u0628\u064A: "/\u02C8s\u02E4a\u0127bi\u02D0/",
|
|
1540
|
+
// my companions
|
|
1541
|
+
\u0635\u063A\u0627\u0631\u0647\u0627: "/s\u02E4i\u02C8\u0263a\u02D0ruha\u02D0/",
|
|
1542
|
+
\u0637\u0627\u0644\u0639\u0627: "/\u02C8t\u02E4a\u02D0li\u0295an/",
|
|
1543
|
+
\u0637\u0648\u064A\u0644\u0629: "/t\u02E4a\u02C8wi\u02D0lah/",
|
|
1544
|
+
// Ibn Khaldun — المقدمة
|
|
1545
|
+
\u0638\u0627\u0647\u0631\u0647: "/\xF0\u02E4a\u02D0\u02C8hirihi/",
|
|
1546
|
+
// in its outward appearance
|
|
1547
|
+
\u0639\u0627\u0645\u0627: "/\u02C8\u0295a\u02D0man/",
|
|
1548
|
+
\u0639\u0628\u0644\u0629: "/\u02C8\u0295ablah/",
|
|
1549
|
+
// Abla (name)
|
|
1550
|
+
\u0639\u062C\u064A\u0628\u0629: "/\u0295a\u02C8d\u0292i\u02D0bah/",
|
|
1551
|
+
\u0639\u062F\u062A: "/\u02C8\u0295udtu/",
|
|
1552
|
+
\u0639\u0631\u0641\u062A: "/\u0295a\u02C8rafta/",
|
|
1553
|
+
// you knew
|
|
1554
|
+
\u0639\u0635\u0631\u0647: "/\u02C8\u0295as\u02E4rihi/",
|
|
1555
|
+
\u0639\u0644\u0645\u0647: "/\u02C8\u0295ilmihi/",
|
|
1556
|
+
\u0639\u0644\u064A\u0643: "/\u0295a\u02C8lajka/",
|
|
1557
|
+
\u0639\u0644\u064A\u0647\u0645: "/\u0295a\u02C8lajhim/",
|
|
1558
|
+
\u0639\u0646\u062F\u0647: "/\u02C8\u0295indahu/",
|
|
1559
|
+
\u0639\u0646\u0647\u0627: "/\u0295an\u02C8ha\u02D0/",
|
|
1560
|
+
// about it
|
|
1561
|
+
\u063A\u0627\u064A\u062A\u064A: "/\u0263a\u02D0\u02C8jati\u02D0/",
|
|
1562
|
+
// my goal
|
|
1563
|
+
\u063A\u064A\u0628\u0629: "/\u02C8\u0263ajbah/",
|
|
1564
|
+
\u0641\u0625\u0646: "/fa\u02C8\u0294in/",
|
|
1565
|
+
\u0641\u0627\u0633\u0642\u0646\u064A: "/fas\u02C8qini\u02D0/",
|
|
1566
|
+
\u0641\u0627\u063A\u062A\u0646\u0645\u0647\u0627: "/fa\u0263\u02C8tanimha\u02D0/",
|
|
1567
|
+
\u0641\u0627\u0644\u0645\u0642\u0631\u0627\u0629: "/fal\u02C8miqra\u02D0h/",
|
|
1568
|
+
\u0641\u062C\u0631\u0627: "/\u02C8fad\u0292ran/",
|
|
1569
|
+
\u0641\u062D\u0648\u0645\u0644: "/fa\u02C8\u0127awmal/",
|
|
1570
|
+
\u0641\u0645\u0627: "/fa\u02C8ma\u02D0/",
|
|
1571
|
+
\u0641\u064A\u0647: "/fi\u02D0hi/",
|
|
1572
|
+
\u0641\u064A\u0647\u0627: "/\u02C8fi\u02D0ha\u02D0/",
|
|
1573
|
+
\u0642\u0627\u0644\u062A: "/\u02C8qa\u02D0lat/",
|
|
1574
|
+
\u0642\u0635\u0629: "/\u02C8qis\u02E4\u02D0ah/",
|
|
1575
|
+
\u0642\u0644\u062A: "/\u02C8qultu/",
|
|
1576
|
+
// I said
|
|
1577
|
+
\u0643\u062A\u0627\u0628\u0627\u062A: "/kita\u02D0\u02C8ba\u02D0t/",
|
|
1578
|
+
\u0643\u0631\u0633\u064A\u0647: "/kur\u02C8sij\u02D0uhu/",
|
|
1579
|
+
\u0644\u062E\u0648\u0644\u0629: "/lixaw\u02C8latah/",
|
|
1580
|
+
// for Khawla (name)
|
|
1581
|
+
// Tarafa ibn al-Abd — المعلقة
|
|
1582
|
+
\u0644\u0643\u0650: "/\u02C8laki/",
|
|
1583
|
+
// to you (fem.)
|
|
1584
|
+
\u0644\u0646\u064A\u0644\u0643: "/li\u02C8najlika/",
|
|
1585
|
+
// for your Nile
|
|
1586
|
+
\u0644\u0648\u0647\u0644\u0629: "/li\u02C8wahlah/",
|
|
1587
|
+
\u0645\u0628\u0646\u064A\u0629: "/mab\u02C8nij\u02D0ah/",
|
|
1588
|
+
// Antara ibn Shaddad — المعلقة
|
|
1589
|
+
\u0645\u062A\u0631\u062F\u0645: "/muta\u02C8rad\u02D0im/",
|
|
1590
|
+
// worn out/in ruins
|
|
1591
|
+
\u0645\u062A\u0642\u0637\u0639\u0627: "/mutaqat\u02E4\u02C8t\u02E4i\u0295an/",
|
|
1592
|
+
\u0645\u062F\u064A\u0646\u0629: "/ma\u02C8di\u02D0nah/",
|
|
1593
|
+
\u0645\u0637\u064A\u0647\u0645: "/mat\u02E4aj\u02D0a\u02C8hum/",
|
|
1594
|
+
// their riding beasts
|
|
1595
|
+
// Abu al-Ala al-Ma'arri — غير مجد في ملتي
|
|
1596
|
+
\u0645\u0644\u062A\u064A: "/mil\u02D0a\u02C8ti\u02D0/",
|
|
1597
|
+
// my religion/creed
|
|
1598
|
+
\u0645\u0647\u064A\u0627\u0631: "/mah\u02C8ja\u02D0r/",
|
|
1599
|
+
// Mahyar (Adonis)
|
|
1600
|
+
\u0645\u064A\u0644\u0627\u062F\u0647: "/mi\u02D0\u02C8la\u02D0dihi/",
|
|
1601
|
+
\u0646\u0633\u062C\u062A\u0647\u0627: "/nasa\u02C8d\u0292atha\u02D0/",
|
|
1602
|
+
\u0646\u0648\u0645\u0627: "/\u02C8nawman/",
|
|
1603
|
+
\u0647\u0627\u062F\u0645\u0627: "/\u02C8ha\u02D0diman/",
|
|
1604
|
+
\u0647\u0648\u0630\u0627: "/ha\u02D0\u02C8\xF0a\u02D0/",
|
|
1605
|
+
\u0648\u0623\u0639\u0648\u0627\u0646: "/wa\u0294a\u0295\u02C8wa\u02D0n/",
|
|
1606
|
+
// and followers
|
|
1607
|
+
\u0648\u0625\u0646\u0645\u0627: "/wa\u0294in\u02D0a\u02C8ma\u02D0/",
|
|
1608
|
+
\u0648\u0625\u064A\u0627\u0643: "/wa\u0294ij\u02C8ja\u02D0ka/",
|
|
1609
|
+
\u0648\u0627\u0633\u0644\u0645\u064A: "/wasla\u02C8mi\u02D0/",
|
|
1610
|
+
// and farewell/be safe (fem.)
|
|
1611
|
+
\u0648\u0627\u0639\u062A\u0642\u0627\u062F\u064A: "/wa\u0294i\u0295tiqa\u02D0\u02C8di\u02D0/",
|
|
1612
|
+
// and my belief
|
|
1613
|
+
\u0648\u0627\u0644\u0633\u0646\u0629: "/was\u02D0\u02C8anah/",
|
|
1614
|
+
\u0648\u0627\u0644\u0642\u0627\u0644: "/wal\u02C8qa\u02D0l/",
|
|
1615
|
+
// and the talk
|
|
1616
|
+
\u0648\u0627\u0644\u0645\u062D\u0628\u0629: "/walma\u02C8\u0127ab\u02D0ah/",
|
|
1617
|
+
\u0648\u0628\u0634\u0631\u0648\u0637\u0647: "/wabi\u0283u\u02C8ru\u02D0tihi/",
|
|
1618
|
+
// and his conditions
|
|
1619
|
+
\u0648\u062D\u0643\u0627\u064A\u0627\u062A: "/wa\u0127ika\u02D0\u02C8ja\u02D0t/",
|
|
1620
|
+
\u0648\u0634\u0628\u064A\u0647: "/wa\u0283a\u02C8bi\u02D0h/",
|
|
1621
|
+
// and similar to
|
|
1622
|
+
\u0648\u0634\u0645\u0623\u0644: "/wa\u0283a\u02C8ma\u0294l/",
|
|
1623
|
+
\u0648\u0636\u0639\u0647: "/\u02C8wad\u02E4\u0295ihi/",
|
|
1624
|
+
\u0648\u0639\u0644\u0649: "/wa\u02C8\u0295ala\u02D0/",
|
|
1625
|
+
// and upon
|
|
1626
|
+
\u0648\u0639\u0644\u064A\u0647\u0645: "/wa\u0295a\u02C8lajhim/",
|
|
1627
|
+
\u0648\u063A\u0627\u0628: "/wa\u02C8\u0263a\u02D0b/",
|
|
1628
|
+
\u0648\u0641\u0624\u0627\u062F\u064A: "/wafu\u02C8\u0294a\u02D0di\u02D0/",
|
|
1629
|
+
// and my heart
|
|
1630
|
+
\u0648\u0641\u0637\u0646\u0629: "/wa\u02C8fit\u02E4nah/",
|
|
1631
|
+
\u0648\u0642\u0648\u0641\u0627: "/wu\u02C8qu\u02D0fan/",
|
|
1632
|
+
// standing
|
|
1633
|
+
\u0648\u0645\u0627: "/wa\u02C8ma\u02D0/",
|
|
1634
|
+
\u0648\u0645\u062A\u0633\u0627\u0648\u064A\u0646: "/wamutasa\u02D0\u02C8wi\u02D0n/",
|
|
1635
|
+
\u0648\u0645\u0646\u0632\u0644: "/wa\u02C8manzil/",
|
|
1636
|
+
\u0648\u0647\u0628\u0648\u0627: "/wu\u02C8hibu\u02D0/",
|
|
1637
|
+
\u064A\u0624\u0648\u062F\u0647: "/ja\u0294u\u02D0\u02C8duhu/",
|
|
1638
|
+
\u064A\u062A\u062E\u0637\u0649: "/jata\u02C8xat\u02E4\u02D0a\u02D0/",
|
|
1639
|
+
\u064A\u062D\u064A\u0637\u0648\u0646: "/ju\u0127i\u02D0\u02C8t\u02E4u\u02D0n/",
|
|
1640
|
+
\u064A\u0636\u0639\u0647: "/jad\u02E4a\u02C8\u0295uhu/",
|
|
1641
|
+
\u064A\u0639\u0627\u0645\u0644\u0648\u0627: "/ju\u02C8\u0295a\u02D0milu\u02D0/",
|
|
1642
|
+
\u064A\u0642\u0648\u0644\u0648\u0646: "/jaqu\u02D0\u02C8lu\u02D0n/",
|
|
1643
|
+
// they say
|
|
1644
|
+
\u064A\u0647\u062A\u0645\u0648\u0627: "/jahtam\u02C8mu\u02D0/"
|
|
1645
|
+
// they cared
|
|
1646
|
+
};
|
|
1647
|
+
|
|
1648
|
+
// src/overrides/de.ts
|
|
1649
|
+
var de = {
|
|
1650
|
+
abendsonnenschein: "/\u02C8a\u02D0b\u0259nt\u02CCz\u0254n\u0259n\u0283a\u026A\u032Fn/",
|
|
1651
|
+
// Foreign city names in German texts
|
|
1652
|
+
Aires: "/\u02C8a\u026A\u032F\u0281\u025Bs/",
|
|
1653
|
+
andrer: "/\u02C8and\u0281\u0250/",
|
|
1654
|
+
balde: "/\u02C8bald\u0259/",
|
|
1655
|
+
bewu\u00DFtsein: "/b\u0259\u02C8v\u028Ast\u02CCza\u026A\u032Fn/",
|
|
1656
|
+
bl\u00FCtenschimmer: "/\u02C8bly\u02D0t\u0259n\u02CC\u0283\u026Am\u0250/",
|
|
1657
|
+
brahmanensohn: "/\u02C8b\u0281a\u02D0ma\u02D0n\u0259n\u02CCzo\u02D0n/",
|
|
1658
|
+
Buenos: "/\u02C8bue\u02D0n\u0254s/",
|
|
1659
|
+
"c\u2019est": "/s\u025B/",
|
|
1660
|
+
// French in Mann's Buddenbrooks
|
|
1661
|
+
ch\u00E8re: "/\u0283\u025B\u02D0\u0281/",
|
|
1662
|
+
// French in Mann's Buddenbrooks
|
|
1663
|
+
da\u00DF: "/das/",
|
|
1664
|
+
"davos-platz": "/\u02C8da\u02D0v\u0254s\u02CCplats/",
|
|
1665
|
+
// Swiss resort area
|
|
1666
|
+
demoiselle: "/d\u0259mwa\u02C8z\u025Bl/",
|
|
1667
|
+
// French in Mann's Buddenbrooks
|
|
1668
|
+
d\u00FCwel: "/\u02C8dy\u02D0v\u0259l/",
|
|
1669
|
+
// Low German: devil
|
|
1670
|
+
fa\u00DFt: "/fast/",
|
|
1671
|
+
feuertrunken: "/\u02C8f\u0254\u026A\u032F\u0250\u02CCt\u0281\u028A\u014Bk\u0259n/",
|
|
1672
|
+
flu\u00DFufers: "/\u02C8fl\u028As\u02CC\u0294u\u02D0f\u0250s/",
|
|
1673
|
+
fr\u00FChlingsnachmittag: "/\u02C8f\u0281y\u02D0l\u026A\u014Bs\u02CCna\u02D0xm\u026Ata\u02D0k/",
|
|
1674
|
+
gefahrdrohende: "/\u0261\u0259\u02C8fa\u02D0\u0250\u032F\u02CCd\u0281o\u02D0\u0259nd\u0259/",
|
|
1675
|
+
gek\u00FC\u00DFt: "/\u0261\u0259\u02C8k\u028Fst/",
|
|
1676
|
+
g\u00F6tterfunken: "/\u02C8\u0261\u0153t\u0250\u02CCf\u028A\u014Bk\u0259n/",
|
|
1677
|
+
govinda: "/\u0261o\u02C8v\u026Anda/",
|
|
1678
|
+
// Sanskrit name
|
|
1679
|
+
graub\u00FCndischen: "/\u0261\u0281a\u028A\u032F\u02C8b\u028Fnd\u026A\u0283\u0259n/",
|
|
1680
|
+
guizot: "/\u0261i\u02C8zo\u02D0/",
|
|
1681
|
+
// French name
|
|
1682
|
+
h\u00E4tt: "/h\u025Bt/",
|
|
1683
|
+
// contraction of hätte
|
|
1684
|
+
"ich\u2019s": "/\u026A\xE7s/",
|
|
1685
|
+
k\u00FC\u00DFnacht: "/\u02C8k\u028Fsnaxt/",
|
|
1686
|
+
macheath: "/m\u0259\u02C8ki\u02D0\u03B8/",
|
|
1687
|
+
// English name
|
|
1688
|
+
metternich: "/\u02C8m\u025Bt\u0250n\u026A\xE7/",
|
|
1689
|
+
m\u00FC\u00DFt: "/m\u028Fst/",
|
|
1690
|
+
mu\u00DFte: "/\u02C8m\u028Ast\u0259/",
|
|
1691
|
+
New: "/nju\u02D0/",
|
|
1692
|
+
ook: "/o\u02D0k/",
|
|
1693
|
+
// Low German: also
|
|
1694
|
+
"prinz-regentenstra\xDFe": "/\u02C8p\u0281\u026Ants\u0281e\u02CC\u0261\u025Bnt\u0259n\u02CC\u0283t\u0281a\u02D0s\u0259/",
|
|
1695
|
+
question: "/k\u025Bs\u02C8tj\u0254\u0303/",
|
|
1696
|
+
// French in Mann's Buddenbrooks
|
|
1697
|
+
salwaldes: "/\u02C8zal\u02CCvald\u0259s/",
|
|
1698
|
+
// Sal forest
|
|
1699
|
+
samsa: "/\u02C8zamza/",
|
|
1700
|
+
// Kafka character
|
|
1701
|
+
schriee: "/\u02C8\u0283\u0281i\u02D0\u0259/",
|
|
1702
|
+
siddhartha: "/z\u026A\u02C8da\u02D0\u0281ta/",
|
|
1703
|
+
sternklar: "/\u02C8\u0283t\u025B\u0281nkla\u02D0\u0250\u032F/",
|
|
1704
|
+
tr\u00E8s: "/t\u0281\u025B/",
|
|
1705
|
+
// French in Mann's Buddenbrooks
|
|
1706
|
+
verliess: "/f\u025B\u0250\u032F\u02C8li\u02D0s/",
|
|
1707
|
+
York: "/j\u0254\u0281k/",
|
|
1708
|
+
zarathustra: "/tsa\u0281a\u02C8t\u028Ast\u0281a/"
|
|
1709
|
+
};
|
|
1710
|
+
|
|
1711
|
+
// src/overrides/eo.ts
|
|
1712
|
+
var eo = {
|
|
1713
|
+
// G2P handles all Esperanto words with correct penultimate stress.
|
|
1714
|
+
// Overrides here are only needed for words where the IPA dictionary
|
|
1715
|
+
// has an incorrect entry that needs correction.
|
|
1716
|
+
};
|
|
1717
|
+
|
|
1718
|
+
// src/overrides/es.ts
|
|
1719
|
+
var es = {
|
|
1720
|
+
arts: "/a\u0281/",
|
|
1721
|
+
// French "Arts" in Cortázar (Pont des Arts)
|
|
1722
|
+
aureliano: "/aw\u027Ee\u02C8ljano/",
|
|
1723
|
+
beatriz: "/bea\u02C8t\u027Eis/",
|
|
1724
|
+
buend\u00EDa: "/bwen\u02C8dia/",
|
|
1725
|
+
ca\u00F1abrava: "/ka\u0272a\u02C8\u03B2\u027Ea\u03B2a/",
|
|
1726
|
+
conti: "/\u02C8konti/",
|
|
1727
|
+
// Italian name in Cortázar
|
|
1728
|
+
fierro: "/\u02C8fje\u027Eo/",
|
|
1729
|
+
macondo: "/ma\u02C8kondo/",
|
|
1730
|
+
pont: "/p\u0254\u0303/",
|
|
1731
|
+
// French in Cortázar
|
|
1732
|
+
porfirio: "/po\u027E\u02C8fi\u027Ejo/",
|
|
1733
|
+
quai: "/ke/",
|
|
1734
|
+
// French in Cortázar
|
|
1735
|
+
qued\u00F3se: "/ke\u02C8\xF0ose/",
|
|
1736
|
+
rue: "/\u0281y/",
|
|
1737
|
+
// French in Cortázar
|
|
1738
|
+
seine: "/s\u025Bn/",
|
|
1739
|
+
// French river in Cortázar
|
|
1740
|
+
s\u00E9pase: "/\u02C8sepase/",
|
|
1741
|
+
urbino: "/u\u027E\u02C8\u03B2ino/",
|
|
1742
|
+
vacilante: "/basi\u02C8lante/",
|
|
1743
|
+
viterbo: "/bi\u02C8te\u027E\u03B2o/"
|
|
1744
|
+
};
|
|
1745
|
+
|
|
1746
|
+
// src/overrides/fa.ts
|
|
1747
|
+
var fa = {
|
|
1748
|
+
\u0622\u0628\u0650: "/\u0252\u02D0be/",
|
|
1749
|
+
\u0622\u0631\u0627\u06CC: "/\u0252\u02D0\u027E\u0252\u02D0j/",
|
|
1750
|
+
\u0622\u0641\u0631\u06CC\u0646: "/\u0252\u02D0f\xE6\u027Ei\u02D0n/",
|
|
1751
|
+
// creator
|
|
1752
|
+
\u0622\u0645\u0648\u06CC: "/\u0252\u02D0mu\u02D0j/",
|
|
1753
|
+
\u0622\u0648\u0627\u0631\u0647: "/\u0252\u02D0v\u0252\u02D0\u027Ee/",
|
|
1754
|
+
// wandering, exiled
|
|
1755
|
+
\u0622\u0648\u0627\u0632\u0647: "/\u0252\u02D0v\u0252\u02D0ze/",
|
|
1756
|
+
// fame, renown
|
|
1757
|
+
\u0622\u06CC\u064E\u062F: "/\u0252\u02D0j\xE6d/",
|
|
1758
|
+
// --- Forough Farrokhzad, Another Birth ---
|
|
1759
|
+
\u0622\u06CC\u0647: "/\u0252\u02D0je/",
|
|
1760
|
+
// verse (of scripture)
|
|
1761
|
+
\u0627\u0628\u062F\u06CC: "/\xE6b\xE6di\u02D0/",
|
|
1762
|
+
// eternal
|
|
1763
|
+
\u0627\u0628\u0648\u0627\u0644\u0641\u0636\u0644: "/\xE6bolf\xE6zl/",
|
|
1764
|
+
\u0627\u062C\u062A\u0645\u0627\u0639\u06CC: "/ed\u0361\u0292tem\u0252\u02D0\u0294i\u02D0/",
|
|
1765
|
+
\u0627\u062F\u0631: "/\xE6de\u027E/",
|
|
1766
|
+
// pour! (Arabic imperative)
|
|
1767
|
+
\u0627\u0633\u0627\u0633: "/\xE6s\u0252\u02D0s/",
|
|
1768
|
+
// --- Iranian Constitution ---
|
|
1769
|
+
\u0627\u0633\u0627\u0633\u06CC: "/\xE6s\u0252\u02D0si\u02D0/",
|
|
1770
|
+
\u0627\u0635\u0648\u0644: "/osu\u02D0l/",
|
|
1771
|
+
\u0627\u0641\u062A\u0627\u062F: "/oft\u0252\u02D0d/",
|
|
1772
|
+
// fell, befell
|
|
1773
|
+
// --- UDHR Article 1 ---
|
|
1774
|
+
\u0627\u0641\u0631\u0627\u062F: "/\xE6f\u027E\u0252\u02D0d/",
|
|
1775
|
+
// individuals
|
|
1776
|
+
\u0627\u0642\u062A\u0635\u0627\u062F\u06CC: "/e\u0262tes\u0252\u02D0di\u02D0/",
|
|
1777
|
+
// --- Hafez, Divan (Ghazal 1) ---
|
|
1778
|
+
\u0627\u0644\u0627: "/\xE6l\u0252\u02D0/",
|
|
1779
|
+
// O! behold
|
|
1780
|
+
\u0627\u0644\u0633\u0627\u0642\u06CC: "/\xE6ss\u0252\u02D0\u0262i\u02D0/",
|
|
1781
|
+
// the cupbearer (Arabic definite)
|
|
1782
|
+
\u0627\u0644\u0644\u0651\u0647: "/\xE6ll\u0252\u02D0h/",
|
|
1783
|
+
\u0627\u0646\u062F\u0631\u0634: "/\xE6nd\xE6\u027E\xE6\u0283/",
|
|
1784
|
+
// within it
|
|
1785
|
+
\u0627\u0646\u0632\u0648\u0627: "/enzev\u0252\u02D0/",
|
|
1786
|
+
// seclusion
|
|
1787
|
+
\u0627\u0646\u0639\u06A9\u0627\u0633: "/en\u0294ek\u0252\u02D0s/",
|
|
1788
|
+
// --- Sepehri ---
|
|
1789
|
+
\u0627\u0647\u0644: "/\xE6hl/",
|
|
1790
|
+
\u0627\u06CC: "/ej/",
|
|
1791
|
+
// O! (vocative)
|
|
1792
|
+
\u0627\u06CC\u0647\u0627: "/\xE6jjoh\u0252\u02D0/",
|
|
1793
|
+
// O! (Arabic vocative)
|
|
1794
|
+
\u0628\u0627\u062F\u0647\u0627\u06CC: "/b\u0252\u02D0dh\u0252\u02D0je/",
|
|
1795
|
+
// winds (with ezafeh)
|
|
1796
|
+
"\u0628\u0628\u0631\u06CC\u062F\u0647\u200C\u0627\u0646\u062F": "/bob\u027Ei\u02D0de\xE6nd/",
|
|
1797
|
+
// they have cut
|
|
1798
|
+
// --- Baba Taher ---
|
|
1799
|
+
\u0628\u0628\u0646\u062F\u0645: "/beb\xE6nd\xE6m/",
|
|
1800
|
+
\u0628\u062A\u0627\u0628\u062F: "/bet\u0252\u02D0b\xE6d/",
|
|
1801
|
+
// will shine
|
|
1802
|
+
\u0628\u062C\u0627\u0646: "/bed\u0361\u0292\u0252\u02D0n/",
|
|
1803
|
+
// --- Attar, Conference of the Birds ---
|
|
1804
|
+
\u0628\u062E\u0634\u06CC\u062F: "/b\xE6x\u0283i\u02D0d/",
|
|
1805
|
+
\u0628\u062F\u0627\u0631: "/bed\u0252\u02D0\u027E/",
|
|
1806
|
+
\u0628\u062F\u0627\u0634\u062A: "/bed\u0252\u02D0\u0283t/",
|
|
1807
|
+
\u0628\u0631\u062A\u0631: "/b\xE6\u027Et\xE6\u027E/",
|
|
1808
|
+
// higher, superior
|
|
1809
|
+
\u0628\u0631\u062F: "/bo\u027Ed/",
|
|
1810
|
+
// will carry (subjunctive)
|
|
1811
|
+
\u0628\u0633\u062A\u0646\u062F: "/b\xE6st\xE6nd/",
|
|
1812
|
+
\u0628\u0634\u0627\u06CC\u062F: "/be\u0283\u0252\u02D0j\xE6d/",
|
|
1813
|
+
\u0628\u0634\u0631: "/b\xE6\u0283\xE6\u027E/",
|
|
1814
|
+
// humankind
|
|
1815
|
+
// --- Rumi, Masnavi (Song of the Reed) ---
|
|
1816
|
+
\u0628\u0634\u0646\u0648: "/be\u0283no/",
|
|
1817
|
+
// listen!
|
|
1818
|
+
\u0628\u0634\u0648\u06CC\u0645: "/be\u0283u\u02D0j\xE6m/",
|
|
1819
|
+
\u0628\u06AF\u0631\u062F\u0645: "/be\u0261\xE6\u027Ed\xE6m/",
|
|
1820
|
+
\u0628\u06AF\u0634\u0627\u06CC: "/be\u0261o\u0283\u0252\u02D0j/",
|
|
1821
|
+
\u0628\u06AF\u0648\u06CC\u0645: "/be\u0261u\u02D0j\xE6m/",
|
|
1822
|
+
// let me say
|
|
1823
|
+
\u0628\u0646\u0627\u0632\u0645: "/ben\u0252\u02D0z\xE6m/",
|
|
1824
|
+
// --- Saadi, Bustan ---
|
|
1825
|
+
\u0628\u0646\u0627\u0645: "/ben\u0252\u02D0m/",
|
|
1826
|
+
// in the name of
|
|
1827
|
+
\u0628\u0646\u0634\u0633\u062A\u0647: "/bene\u0283\xE6ste/",
|
|
1828
|
+
// has sat
|
|
1829
|
+
\u0628\u0646\u0645\u0627\u06CC: "/ben\xE6m\u0252\u02D0j/",
|
|
1830
|
+
// --- Rudaki ---
|
|
1831
|
+
\u0628\u0648\u06CC\u0650: "/bu\u02D0je/",
|
|
1832
|
+
\u0628\u06CC\u0647\u0642\u06CC: "/bejh\xE6\u0262i\u02D0/",
|
|
1833
|
+
\u067E\u064E\u0631\u0646\u06CC\u0627\u0646: "/p\xE6\u027Eni\u02D0\u0252\u02D0n/",
|
|
1834
|
+
// --- Sanai ---
|
|
1835
|
+
\u067E\u0631\u0648\u0631: "/p\xE6\u027Ev\xE6\u027E/",
|
|
1836
|
+
\u067E\u0633\u062A\u06CC: "/p\xE6sti\u02D0/",
|
|
1837
|
+
"\u067E\u0648\u0632\u0634\u200C\u067E\u0630\u06CC\u0631": "/pu\u02D0ze\u0283p\xE6zi\u02D0\u027E/",
|
|
1838
|
+
// excuse-accepting
|
|
1839
|
+
\u062A\u0627\u0631\u06CC\u06A9\u06CC\u0633\u062A: "/t\u0252\u02D0\u027Ei\u02D0ki\u02D0st/",
|
|
1840
|
+
// is a darkness (contraction)
|
|
1841
|
+
\u062A\u062D\u0642\u06CC\u0642: "/t\xE6h\u0262i\u02D0\u0262/",
|
|
1842
|
+
\u062A\u0648\u0641\u06CC\u0642: "/to\u028Afi\u02D0\u0262/",
|
|
1843
|
+
"\u062C\u062F\u0627\u06CC\u06CC\u200C\u0647\u0627": "/d\u0361\u0292od\u0252\u02D0ji\u02D0h\u0252\u02D0/",
|
|
1844
|
+
// separations
|
|
1845
|
+
\u062C\u0644: "/d\u0361\u0292\xE6ll/",
|
|
1846
|
+
// majesty
|
|
1847
|
+
\u062C\u0648\u0627\u0628\u0650: "/d\u0361\u0292\xE6v\u0252\u02D0be/",
|
|
1848
|
+
\u062C\u0648\u06CC\u0650: "/d\u0361\u0292u\u02D0je/",
|
|
1849
|
+
\u062D\u0627\u0641\u0638: "/h\u0252\u02D0fez/",
|
|
1850
|
+
\u062D\u0627\u0644: "/h\u0252\u02D0l/",
|
|
1851
|
+
// state, condition
|
|
1852
|
+
\u062D\u0627\u0644\u06CC: "/h\u0252\u02D0li\u02D0/",
|
|
1853
|
+
// pleasant (archaic: now)
|
|
1854
|
+
\u062D\u0642\u0648\u0642: "/ho\u0262u\u02D0\u0262/",
|
|
1855
|
+
// rights
|
|
1856
|
+
\u062D\u06A9\u06CC\u0645: "/h\xE6ki\u02D0m/",
|
|
1857
|
+
// wise
|
|
1858
|
+
\u062D\u06CC\u0627\u062A: "/h\xE6j\u0252\u02D0t/",
|
|
1859
|
+
// life
|
|
1860
|
+
\u062D\u06CC\u062B\u06CC\u062A: "/hejsi\u02D0j\xE6t/",
|
|
1861
|
+
// dignity
|
|
1862
|
+
// --- Nezami, Khosrow o Shirin ---
|
|
1863
|
+
\u062E\u062F\u0627\u0648\u0646\u062F\u0627: "/xod\u0252\u02D0v\xE6nd\u0252\u02D0/",
|
|
1864
|
+
// --- Saadi, Golestan ---
|
|
1865
|
+
\u062E\u062F\u0627\u06CC: "/xod\u0252\u02D0j/",
|
|
1866
|
+
// God (with ezafeh)
|
|
1867
|
+
\u062E\u0631\u062F\u0628\u062E\u0634: "/xe\u027E\xE6db\xE6x\u0283/",
|
|
1868
|
+
\u062E\u0637\u0627\u0628\u062E\u0634: "/x\xE6t\u0252\u02D0b\xE6x\u0283/",
|
|
1869
|
+
// sin-forgiving
|
|
1870
|
+
\u062E\u0650\u0646\u06AF\u0650: "/xen\u0261e/",
|
|
1871
|
+
\u062E\u0648\u0634\u0627: "/xo\u0283\u0252\u02D0/",
|
|
1872
|
+
\u062E\u0648\u0634\u062E\u0648\u0627\u0646: "/xo\u0283x\u0252\u02D0n/",
|
|
1873
|
+
// sweet-singing
|
|
1874
|
+
// --- Behbahani ---
|
|
1875
|
+
\u062E\u0648\u06CC\u0634: "/xi\u02D0\u0283/",
|
|
1876
|
+
\u062E\u06CC\u0632\u0631\u0627\u0646: "/xi\u02D0z\u027E\u0252\u02D0n/",
|
|
1877
|
+
// bamboo
|
|
1878
|
+
\u062F\u0627\u0631\u0627\u06CC: "/d\u0252\u02D0\u027E\u0252\u02D0je/",
|
|
1879
|
+
// possessing
|
|
1880
|
+
\u062F\u0627\u0646\u0646\u062F: "/d\u0252\u02D0n\xE6nd/",
|
|
1881
|
+
// they know
|
|
1882
|
+
\u062F\u0631\u0650: "/d\xE6\u027Ee/",
|
|
1883
|
+
\u062F\u064F\u0631\u0634\u062A\u06CC\u200C\u0647\u0627\u06CC: "/do\u027Eo\u0283ti\u02D0h\u0252\u02D0je/",
|
|
1884
|
+
\u062F\u0631\u06CC\u0686\u0647\u200C\u06CC: "/d\xE6\u027Ei\u02D0t\u0361\u0283eje/",
|
|
1885
|
+
\u062F\u0633\u062A\u06AF\u06CC\u0631: "/d\xE6st\u0261i\u02D0\u027E/",
|
|
1886
|
+
// helper
|
|
1887
|
+
\u062F\u0644\u06CC: "/deli/",
|
|
1888
|
+
\u062F\u0648\u0633\u062A\u0627\u0646\u06CC: "/du\u02D0st\u0252\u02D0ni\u02D0/",
|
|
1889
|
+
\u062F\u06CC\u062F\u0627\u0631: "/di\u02D0d\u0252\u02D0\u027E/",
|
|
1890
|
+
\u0630\u0648\u0642\u06CC: "/zo\u028A\u0262i\u02D0/",
|
|
1891
|
+
\u0631\u0627: "/\u027E\u0252\u02D0/",
|
|
1892
|
+
// object marker
|
|
1893
|
+
\u0631\u0627\u0632\u0642: "/\u027E\u0252\u02D0ze\u0262/",
|
|
1894
|
+
"\u0631\u0633\u062A\u0646\u200C\u0647\u0627\u06CC": "/\u027E\xE6st\xE6nh\u0252\u02D0je/",
|
|
1895
|
+
// growings (with ezafeh)
|
|
1896
|
+
\u0631\u0636\u06CC: "/\u027E\xE6zi\u02D0/",
|
|
1897
|
+
\u0631\u0641\u062A\u0627\u0631: "/\u027E\xE6ft\u0252\u02D0\u027E/",
|
|
1898
|
+
// behavior
|
|
1899
|
+
\u0631\u0647: "/\u027E\xE6h/",
|
|
1900
|
+
\u0631\u0647\u0650: "/\u027E\xE6he/",
|
|
1901
|
+
\u0631\u0647\u0646\u0645\u0627\u06CC: "/\u027E\xE6hn\xE6m\u0252\u02D0j/",
|
|
1902
|
+
// guide
|
|
1903
|
+
\u0631\u0648\u062D\u06CC\u0647: "/\u027Eu\u02D0hi\u02D0je/",
|
|
1904
|
+
// spirit, morale
|
|
1905
|
+
\u0631\u0648\u0632\u06AF\u0627\u0631\u0645: "/\u027Eu\u02D0ze\u0261\u0252\u02D0\u027E\xE6m/",
|
|
1906
|
+
"\u0631\u0648\u0632\u06CC\u200C\u062F\u0647": "/\u027Eu\u02D0zi\u02D0deh/",
|
|
1907
|
+
// sustenance-giver
|
|
1908
|
+
\u0631\u06CC\u06AF\u0650: "/\u027Ei\u02D0\u0261e/",
|
|
1909
|
+
\u0632\u0627\u06CC\u06CC\u062F\u0647: "/z\u0252\u02D0ji\u02D0de/",
|
|
1910
|
+
// born
|
|
1911
|
+
\u0632\u0628\u0627\u0646\u06CC: "/z\xE6b\u0252\u02D0ni\u02D0/",
|
|
1912
|
+
// --- Hedayat, The Blind Owl ---
|
|
1913
|
+
"\u0632\u062E\u0645\u200C\u0647\u0627\u06CC\u06CC": "/z\xE6xmh\u0252\u02D0ji\u02D0/",
|
|
1914
|
+
// wounds (indefinite)
|
|
1915
|
+
\u0632\u06CC: "/zi\u02D0/",
|
|
1916
|
+
\u0632\u06CC\u0652: "/zi\u02D0/",
|
|
1917
|
+
"\u0633\u0627\u062D\u0644\u200C\u0647\u0627": "/s\u0252\u02D0helh\u0252\u02D0/",
|
|
1918
|
+
// shores
|
|
1919
|
+
\u0633\u0628\u06A9\u0628\u0627\u0631\u0627\u0646: "/s\xE6bokb\u0252\u02D0\u027E\u0252\u02D0n/",
|
|
1920
|
+
// light-laden ones
|
|
1921
|
+
\u0633\u0628\u06A9\u062A\u06AF\u06CC\u0646: "/sobokte\u0261i\u02D0n/",
|
|
1922
|
+
\u0633\u062D\u0631\u06AF\u0627\u0647: "/s\xE6h\xE6\u027E\u0261\u0252\u02D0h/",
|
|
1923
|
+
// dawn
|
|
1924
|
+
// --- Nezami, Layli o Majnun ---
|
|
1925
|
+
\u0633\u0631\u0622\u063A\u0627\u0632: "/s\xE6r\u0252\u02D0\u0261\u0252\u02D0z/",
|
|
1926
|
+
\u0633\u0631\u0627\u06CC\u062F: "/so\u027E\u0252\u02D0j\xE6d/",
|
|
1927
|
+
\u0633\u0644\u0627\u0645\u06CC: "/s\xE6l\u0252\u02D0mi\u02D0/",
|
|
1928
|
+
// --- Khayyam, Rubaiyat ---
|
|
1929
|
+
\u0633\u0648\u062F\u0627: "/so\u028Ad\u0252\u02D0/",
|
|
1930
|
+
// passion, melancholy
|
|
1931
|
+
\u0633\u06CC\u0627\u0633\u06CC: "/sij\u0252\u02D0si\u02D0/",
|
|
1932
|
+
\u0634\u0627\u062F\u0645\u0627\u0646: "/\u0283\u0252\u02D0dm\u0252\u02D0n/",
|
|
1933
|
+
\u0634\u0631\u062D: "/\u0283\xE6\u027Eh/",
|
|
1934
|
+
// explanation
|
|
1935
|
+
\u0634\u0631\u062D\u0647: "/\u0283\xE6\u027Ehe/",
|
|
1936
|
+
// explanation (with ezafeh)
|
|
1937
|
+
\u0634\u06A9\u0627\u06CC\u062A: "/\u0283ek\u0252\u02D0j\xE6t/",
|
|
1938
|
+
// complaint
|
|
1939
|
+
"\u0634\u06A9\u0641\u062A\u0646\u200C\u0647\u0627": "/\u0283ekoft\xE6nh\u0252\u02D0/",
|
|
1940
|
+
// blossomings
|
|
1941
|
+
\u0635\u0644\u062A\u0650: "/sele\u02D0te/",
|
|
1942
|
+
\u0635\u0646\u0639: "/son\u0294/",
|
|
1943
|
+
\u0636\u0648\u0627\u0628\u0637: "/z\xE6v\u0252\u02D0bet/",
|
|
1944
|
+
\u0637\u0627\u0639\u062A\u0634: "/t\u0252\u02D0\u0294\xE6t\xE6\u0283/",
|
|
1945
|
+
// his obedience
|
|
1946
|
+
\u0639\u0632: "/\u0294\xE6zz/",
|
|
1947
|
+
// glory
|
|
1948
|
+
\u0639\u0645\u0631: "/om\u027E/",
|
|
1949
|
+
\u0639\u0646\u0647: "/\xE6nho/",
|
|
1950
|
+
\u063A\u0627\u0632\u06CC: "/\u0263\u0252\u02D0zi\u02D0/",
|
|
1951
|
+
\u063A\u0627\u06CC\u062A: "/\u0263\u0252\u02D0j\xE6t/",
|
|
1952
|
+
\u063A\u0632\u0644: "/\u0263\xE6z\xE6l/",
|
|
1953
|
+
\u063A\u0632\u0646\u0648\u06CC: "/\u0263\xE6zn\xE6vi\u02D0/",
|
|
1954
|
+
\u063A\u0632\u0646\u06CC: "/\u0263\xE6zni\u02D0/",
|
|
1955
|
+
\u0641\u0631\u0627\u0642: "/fe\u027E\u0252\u02D0\u0262/",
|
|
1956
|
+
// separation
|
|
1957
|
+
\u0641\u0631\u062F: "/f\xE6\u027Ed/",
|
|
1958
|
+
// alone, single
|
|
1959
|
+
\u0641\u0631\u0647\u0646\u06AF\u06CC: "/f\xE6rh\xE6n\u0261i\u02D0/",
|
|
1960
|
+
\u0641\u0631\u0648: "/fo\u027Eu\u02D0/",
|
|
1961
|
+
// down
|
|
1962
|
+
\u0641\u0644\u06A9: "/f\xE6l\xE6k/",
|
|
1963
|
+
\u0642\u062F\u06A9: "/\u0262\xE6d\xE6k/",
|
|
1964
|
+
\u0642\u0631\u0628\u062A: "/\u0262o\u027Eb\xE6t/",
|
|
1965
|
+
// nearness (to God)
|
|
1966
|
+
\u0642\u0635\u06CC\u062F\u0647\u200C\u0627\u06CC: "/\u0262\xE6si\u02D0de\xE6j/",
|
|
1967
|
+
// --- Nima Yushij, The Phoenix ---
|
|
1968
|
+
\u0642\u0642\u0646\u0648\u0633: "/\u0262o\u0262nu\u02D0s/",
|
|
1969
|
+
// phoenix
|
|
1970
|
+
\u06A9\u0622\u0641\u0631\u06CC\u0646\u062A: "/k\u0252\u02D0f\xE6\u027Ei\u02D0n\xE6t/",
|
|
1971
|
+
\u06A9\u0627\u0631\u06AF\u0634\u0627\u06CC: "/k\u0252\u02D0\u027E\u0261o\u0283\u0252\u02D0j/",
|
|
1972
|
+
\u06A9\u0627\u0633\u0627: "/k\xE6\u0294s\u0252\u02D0/",
|
|
1973
|
+
// a cup (Arabic accusative, Persianized)
|
|
1974
|
+
\u06A9\u0627\u0634\u0627\u0646\u0645: "/k\u0252\u02D0\u0283\u0252\u02D0n\xE6m/",
|
|
1975
|
+
\u06A9\u0627\u0648: "/k\u0252\u02D0w/",
|
|
1976
|
+
\u06A9\u0631\u06CC\u0645: "/k\xE6\u027Ei\u02D0m/",
|
|
1977
|
+
// generous
|
|
1978
|
+
// --- Ferdowsi, Shahnameh ---
|
|
1979
|
+
\u06A9\u0632: "/k\xE6z/",
|
|
1980
|
+
// contraction of که از (that from)
|
|
1981
|
+
\u06A9\u0634\u06CC\u062F\u0645: "/ke\u0283i\u02D0d\xE6m/",
|
|
1982
|
+
// I drew (a sigh)
|
|
1983
|
+
\u06A9\u0644\u0627\u0645: "/k\xE6l\u0252\u02D0m/",
|
|
1984
|
+
\u06A9\u0646\u0627\u0646: "/kon\u0252\u02D0n/",
|
|
1985
|
+
// doing (present participle suffix)
|
|
1986
|
+
\u06A9\u0646\u0645: "/kon\xE6m/",
|
|
1987
|
+
\u06A9\u0646\u0646\u062F: "/kon\xE6nd/",
|
|
1988
|
+
// they do (subjunctive)
|
|
1989
|
+
\u06AF\u0631\u062F\u0627\u0628\u06CC: "/\u0261e\u027Ed\u0252\u02D0bi\u02D0/",
|
|
1990
|
+
// a whirlpool (indefinite)
|
|
1991
|
+
// --- Akhavan-Sales ---
|
|
1992
|
+
\u06AF\u0641\u062A: "/\u0261oft/",
|
|
1993
|
+
\u06AF\u064F\u0644: "/\u0261ol/",
|
|
1994
|
+
\u06AF\u0648\u06CC\u062F: "/\u0261u\u02D0j\xE6d/",
|
|
1995
|
+
// --- Beyhaqi ---
|
|
1996
|
+
\u06AF\u0648\u06CC\u0646\u062F\u0647\u0654: "/\u0261u\u02D0j\xE6ndeje/",
|
|
1997
|
+
\u0644\u063A\u0632\u0627\u0646: "/l\xE6\u0263z\u0252\u02D0n/",
|
|
1998
|
+
\u0645\u0627\u062F\u0631\u06CC: "/m\u0252\u02D0d\xE6\u027Ei\u02D0/",
|
|
1999
|
+
\u0645\u0627\u0646\u062F\u0647: "/m\u0252\u02D0nde/",
|
|
2000
|
+
// remaining
|
|
2001
|
+
\u0645\u0627\u0647\u062A\u0627\u0628: "/m\u0252\u02D0ht\u0252\u02D0b/",
|
|
2002
|
+
// moonlight
|
|
2003
|
+
\u0645\u0628\u06CC\u0646: "/mob\xE6jjen/",
|
|
2004
|
+
\u0645\u062D\u0645\u0648\u062F: "/m\xE6hmu\u02D0d/",
|
|
2005
|
+
\u0645\u062F\u0647: "/m\xE6deh/",
|
|
2006
|
+
\u0645\u0632\u06CC\u062F: "/m\xE6zi\u02D0d/",
|
|
2007
|
+
// increase
|
|
2008
|
+
\u0645\u0641\u0631\u062D: "/mof\xE6\u027E\u027Eeh/",
|
|
2009
|
+
// gladdening
|
|
2010
|
+
\u0645\u0645\u062F: "/momedd/",
|
|
2011
|
+
// sustainer
|
|
2012
|
+
\u0645\u0648\u062C\u0628: "/mo\u028Ad\u0361\u0292eb/",
|
|
2013
|
+
// cause of
|
|
2014
|
+
\u0645\u0648\u0644\u06CC\u0627\u0646: "/mu\u02D0li\u02D0\u0252\u02D0n/",
|
|
2015
|
+
\u0645\u0648\u0646\u0633: "/mu\u02D0nes/",
|
|
2016
|
+
\u0645\u0648\u0647\u0628\u062A: "/mo\u028Aheb\xE6t/",
|
|
2017
|
+
// gift, endowment
|
|
2018
|
+
"\u0645\u06CC\u200C\u0622\u06CC\u062F": "/mi\u02D0\u0252\u02D0j\xE6d/",
|
|
2019
|
+
// comes (up)
|
|
2020
|
+
"\u0645\u06CC\u200C\u062A\u0631\u0627\u0634\u062F": "/mi\u02D0t\xE6\u027E\u0252\u02D0\u0283\xE6d/",
|
|
2021
|
+
// scrapes, carves
|
|
2022
|
+
"\u0645\u06CC\u200C\u062E\u0648\u0631\u062F": "/mi\u02D0xo\u027E\xE6d/",
|
|
2023
|
+
// eats, erodes
|
|
2024
|
+
"\u0645\u06CC\u200C\u0631\u0648\u062F": "/mi\u02D0\u027E\xE6v\xE6d/",
|
|
2025
|
+
// goes (down)
|
|
2026
|
+
"\u0645\u06CC\u200C\u0634\u0648\u0646\u062F": "/mi\u02D0\u0283\xE6v\xE6nd/",
|
|
2027
|
+
// they become (passive)
|
|
2028
|
+
"\u0645\u06CC\u200C\u06A9\u0646\u062F": "/mi\u02D0kon\xE6d/",
|
|
2029
|
+
// does, is doing
|
|
2030
|
+
\u0646\u0627\u067E\u0633\u0646\u062F\u200C\u0645: "/n\u0252\u02D0p\xE6s\xE6nd\xE6m/",
|
|
2031
|
+
\u0646\u0627\u062E\u0648\u0628: "/n\u0252\u02D0xu\u02D0b/",
|
|
2032
|
+
\u0646\u0627\u0635\u0631: "/n\u0252\u02D0se\u027E/",
|
|
2033
|
+
"\u0646\u0627\u0644\u06CC\u062F\u0647\u200C\u0627\u0646\u062F": "/n\u0252\u02D0li\u02D0de\xE6nd/",
|
|
2034
|
+
// they have lamented
|
|
2035
|
+
\u0646\u0627\u0646\u06CC: "/n\u0252\u02D0ni\u02D0/",
|
|
2036
|
+
\u0646\u0627\u0648\u0644\u0647\u0627: "/n\u0252\u02D0velh\u0252\u02D0/",
|
|
2037
|
+
// pass it around
|
|
2038
|
+
\u0646\u062A\u0648\u0627\u0646\u062F: "/n\xE6t\xE6v\u0252\u02D0n\xE6d/",
|
|
2039
|
+
\u0646\u0634\u0627\u0637\u0650: "/ne\u0283\u0252\u02D0te/",
|
|
2040
|
+
\u0646\u0638\u0627\u0645\u06CC: "/nez\u0252\u02D0mi\u02D0/",
|
|
2041
|
+
\u0646\u0638\u0631\u0628\u0627\u0632\u06CC\u0627: "/n\xE6z\xE6\u027Eb\u0252\u02D0zi\u02D0\u0252\u02D0/",
|
|
2042
|
+
\u0646\u0639\u0645\u062A: "/ne\u0294m\xE6t/",
|
|
2043
|
+
// blessing
|
|
2044
|
+
\u0646\u0641\u0633\u06CC: "/n\xE6f\xE6si\u02D0/",
|
|
2045
|
+
// a breath (indefinite)
|
|
2046
|
+
\u0646\u0641\u06CC\u0631\u0645: "/n\xE6fi\u02D0\u027E\xE6m/",
|
|
2047
|
+
// my wailing
|
|
2048
|
+
\u0646\u06AF\u0627\u0647\u0650: "/ne\u0261\u0252\u02D0he/",
|
|
2049
|
+
\u0646\u06AF\u0630\u0631\u062F: "/n\xE6\u0261oz\xE6\u027E\xE6d/",
|
|
2050
|
+
// does not pass
|
|
2051
|
+
\u0646\u06AF\u0647: "/ne\u0261\xE6h/",
|
|
2052
|
+
\u0646\u0645\u0648\u062F: "/nemu\u02D0d/",
|
|
2053
|
+
// appeared, showed
|
|
2054
|
+
"\u0646\u0645\u06CC\u200C\u0634\u0648\u062F": "/n\xE6mi\u02D0\u0283\xE6v\xE6d/",
|
|
2055
|
+
// is not possible
|
|
2056
|
+
// --- Shared classical forms ---
|
|
2057
|
+
\u0646\u0647\u0627\u062F: "/neh\u0252\u02D0d/",
|
|
2058
|
+
\u0646\u0647\u0627\u062F\u0647\u0627\u06CC: "/n\xE6h\u0252\u02D0dh\u0252\u02D0je/",
|
|
2059
|
+
\u0646\u0648\u0634: "/nu\u02D0\u0283/",
|
|
2060
|
+
// drink!
|
|
2061
|
+
\u0646\u06CC\u0627\u0628\u062F: "/n\xE6j\u0252\u02D0b\xE6d/",
|
|
2062
|
+
// will not find
|
|
2063
|
+
\u0646\u06CC\u0627\u0631\u062F: "/n\xE6j\u0252\u02D0\u027E\xE6d/",
|
|
2064
|
+
\u0646\u06CC\u0633\u062A\u0627\u0646: "/nejest\u0252\u02D0n/",
|
|
2065
|
+
// reed bed
|
|
2066
|
+
// --- Shamlou ---
|
|
2067
|
+
\u0646\u06CC\u0633\u062A\u06CC: "/ni\u02D0sti\u02D0/",
|
|
2068
|
+
\u0647\u0627\u06CC\u0644: "/h\u0252\u02D0jel/",
|
|
2069
|
+
// terrifying
|
|
2070
|
+
\u0647\u0633\u062A\u0646\u062F: "/h\xE6st\xE6nd/",
|
|
2071
|
+
// they are
|
|
2072
|
+
\u0647\u064E\u0645\u06CC: "/h\xE6mi\u02D0/",
|
|
2073
|
+
\u0647\u0648\u0634\u06CC: "/hu\u02D0\u0283i\u02D0/",
|
|
2074
|
+
\u0648\u062C\u062F\u0627\u0646: "/ved\u0361\u0292d\u0252\u02D0n/",
|
|
2075
|
+
// conscience
|
|
2076
|
+
\u0648\u062F\u06CC\u0639\u062A: "/v\xE6di\u02D0\xE6t/",
|
|
2077
|
+
\u0648\u0632\u0634: "/v\xE6ze\u0283/",
|
|
2078
|
+
// blowing
|
|
2079
|
+
\u0648\u06CC: "/v\xE6j/",
|
|
2080
|
+
\u06CC\u0627\u062F\u0650: "/j\u0252\u02D0de/",
|
|
2081
|
+
\u06CC\u0627\u0631\u0650: "/j\u0252\u02D0\u027Ee/",
|
|
2082
|
+
\u06CC\u0627\u0641\u062A: "/j\u0252\u02D0ft/",
|
|
2083
|
+
\u06CC\u06A9\u062F\u06CC\u06AF\u0631: "/jekdi\u02D0\u0261\xE6\u027E/"
|
|
2084
|
+
// one another
|
|
2085
|
+
};
|
|
2086
|
+
|
|
2087
|
+
// src/overrides/fi.ts
|
|
2088
|
+
var fi = {
|
|
2089
|
+
// G2P handles all Finnish words correctly. Overrides here are only
|
|
2090
|
+
// needed for words where G2P produces an incorrect entry.
|
|
2091
|
+
};
|
|
2092
|
+
|
|
2093
|
+
// src/overrides/fr.ts
|
|
2094
|
+
var fr = {
|
|
2095
|
+
conflans: "/k\u0254\u0303fl\u0251\u0303/",
|
|
2096
|
+
// place name
|
|
2097
|
+
est: "/\u025B/",
|
|
2098
|
+
// verb "is" — st is silent (dict has /ɛst/)
|
|
2099
|
+
jolies: "/\u0292\u0254li/",
|
|
2100
|
+
luit: "/l\u0265i/",
|
|
2101
|
+
marchiennes: "/ma\u0281\u0283j\u025Bn/",
|
|
2102
|
+
// place name (Zola)
|
|
2103
|
+
"m\xE9taphysico-th\xE9ologo-cosmolonigologie": "/metafizikoteol\u0254\u0261\u0254k\u0254sm\u0254l\u0254ni\u0261\u0254l\u0254\u0292i/",
|
|
2104
|
+
// Voltaire
|
|
2105
|
+
montsou: "/m\u0254\u0303su/",
|
|
2106
|
+
// fictional town (Zola)
|
|
2107
|
+
morgion: "/m\u0254\u0281\u0292j\u0254\u0303/",
|
|
2108
|
+
// place name (Dumas)
|
|
2109
|
+
myriel: "/mi\u0281j\u025Bl/",
|
|
2110
|
+
// character name (Hugo)
|
|
2111
|
+
"neuve-sainte-genevi\xE8ve": "/n\u0153vs\u025B\u0303t\u0292\u0259nvj\u025Bv/",
|
|
2112
|
+
// Paris street (Balzac)
|
|
2113
|
+
nicole: "/nik\u0254l/",
|
|
2114
|
+
pangloss: "/p\u0251\u0303\u0261l\u0254s/",
|
|
2115
|
+
// Voltaire character
|
|
2116
|
+
rainur\u00E9e: "/\u0281\u025Bny\u0281e/",
|
|
2117
|
+
rion: "/\u0281j\u0254\u0303/",
|
|
2118
|
+
// place name (Dumas)
|
|
2119
|
+
"saint-marcel": "/s\u025B\u0303ma\u0281s\u025Bl/",
|
|
2120
|
+
// Paris quarter (Dumas)
|
|
2121
|
+
"thunder-ten-tronckh": "/t\u0254n\u025B\u0281t\u025Bnt\u0281\u0254nk/",
|
|
2122
|
+
// Voltaire
|
|
2123
|
+
trieste: "/t\u0281ij\u025Bst/",
|
|
2124
|
+
// city name (Dumas)
|
|
2125
|
+
vauquer: "/voke/",
|
|
2126
|
+
// character name (Balzac)
|
|
2127
|
+
y: "/i/"
|
|
2128
|
+
// pronoun "there" — dict has letter name /igʁɛk/
|
|
2129
|
+
};
|
|
2130
|
+
|
|
2131
|
+
// src/overrides/is.ts
|
|
2132
|
+
var is = {
|
|
2133
|
+
\u00E1sir: "/\u02C8au\u02D0s\u026Ar/",
|
|
2134
|
+
\u00E1synjur: "/\u02C8au\u02D0s\u026Anj\u028Fr/",
|
|
2135
|
+
ballir: "/\u02C8patl\u026Ar/",
|
|
2136
|
+
beytils: "/\u02C8pei\u02D0t\u02B0\u026Als/",
|
|
2137
|
+
Bj\u00E1lfa: "/\u02C8pjaulva/",
|
|
2138
|
+
Bjarnar: "/\u02C8pjartnar/",
|
|
2139
|
+
bl\u00F3t: "/plou\u02D0t\u02B0/",
|
|
2140
|
+
// sacrifice/ritual
|
|
2141
|
+
b\u00F3klaus: "/\u02C8pouk\u02B0l\u0153ys/",
|
|
2142
|
+
br\u00E9fberi: "/\u02C8prj\u025B\u02D0vp\u025Br\u026A/",
|
|
2143
|
+
// letter carrier
|
|
2144
|
+
burluf\u00F3ts: "/\u02C8p\u028Frtl\u028Ffouts/",
|
|
2145
|
+
efla\u00F0i: "/\u02C8\u025Bpla\xF0\u026A/",
|
|
2146
|
+
// past tense: strengthened/performed
|
|
2147
|
+
ek: "/\u025Bk\u02B0/",
|
|
2148
|
+
fannhv\u00EDtir: "/\u02C8fan\u02D0kvit\u02B0\u026Ar/",
|
|
2149
|
+
farandi: "/\u02C8farant\u026A/",
|
|
2150
|
+
fars\u00E6lda: "/\u02C8farsailta/",
|
|
2151
|
+
fira: "/\u02C8f\u026Ara/",
|
|
2152
|
+
flatnefur: "/\u02C8flatn\u025Bv\u028Fr/",
|
|
2153
|
+
fl\u00FD\u00F0u: "/\u02C8fli\u02D0\xF0\u028F/",
|
|
2154
|
+
fornaldar: "/\u02C8f\u0254rtnaltar/",
|
|
2155
|
+
fornrit: "/\u02C8f\u0254rtnr\u026At\u02B0/",
|
|
2156
|
+
galdursmenn: "/\u02C8kalt\u028Frsm\u025Bn\u02D0/",
|
|
2157
|
+
gautr: "/\u02C8k\u0153y\u02D0tr/",
|
|
2158
|
+
// New saga/literary overrides
|
|
2159
|
+
gengr: "/k\u025B\u014Bkr/",
|
|
2160
|
+
go\u00F0or\u00F0sma\u00F0ur: "/\u02C8k\u0254\u02D0\xF0\u0254r\xF0sma\u02D0\xF0\u028Fr/",
|
|
2161
|
+
h\u00E6ngs: "/\u02C8hai\u014Bs/",
|
|
2162
|
+
hags\u00E6lda: "/\u02C8haksailta/",
|
|
2163
|
+
H\u00E1lfdanarsonar: "/\u02C8haulf\u02CCtanar\u02CCs\u0254nar/",
|
|
2164
|
+
h\u00E1lftr\u00F6lls: "/\u02C8haulftr\u0153tls/",
|
|
2165
|
+
Hallbjarnar: "/\u02C8hatlpjartnar/",
|
|
2166
|
+
Hallfre\u00F0s: "/\u02C8hatlfr\u025B\xF0s/",
|
|
2167
|
+
// genitive of Hallfreður
|
|
2168
|
+
Hallfre\u00F0ur: "/\u02C8hatlfr\u025B\xF0\u028Fr/",
|
|
2169
|
+
// proper name
|
|
2170
|
+
h\u00E1rfagra: "/\u02C8haur\u02CCfa\u0263ra/",
|
|
2171
|
+
// fair-haired (epithet)
|
|
2172
|
+
h\u00E1rfagri: "/\u02C8haur\u02CCfa\u0263r\u026A/",
|
|
2173
|
+
hersir: "/\u02C8h\u025Brs\u026Ar/",
|
|
2174
|
+
hersis: "/\u02C8h\u025Brs\u026As/",
|
|
2175
|
+
herskarar: "/\u02C8h\u025Brsk\u02B0arar/",
|
|
2176
|
+
Hrafnkelsdal: "/\u02C8r\u0325apnk\u02B0\u025Blstalr/",
|
|
2177
|
+
// place name
|
|
2178
|
+
Hreggvi\u00F0sson: "/\u02C8r\u0325\u025Bk\u02D0v\u026A\xF0s\u02D0\u0254n/",
|
|
2179
|
+
hr\u00EDmhv\u00EDta: "/\u02C8r\u0325imkvit\u02B0a/",
|
|
2180
|
+
Ing\u00F3lfr: "/\u02C8\u026A\u014Bkoulfr/",
|
|
2181
|
+
ins: "/\u026Ans/",
|
|
2182
|
+
\u00CDvarssonar: "/\u02C8ivars\u02D0\u0254nar/",
|
|
2183
|
+
j\u00F6klanna: "/\u02C8j\u0153k\u02B0lan\u02D0a/",
|
|
2184
|
+
J\u00F3rsalalands: "/\u02C8jourtsalalants/",
|
|
2185
|
+
Ketils: "/\u02C8k\u02B0\u025Bt\u02B0\u026Als/",
|
|
2186
|
+
kindir: "/\u02C8k\u02B0\u026Ant\u026Ar/",
|
|
2187
|
+
K\u00F3lumkilli: "/\u02C8k\u02B0ou\u02D0l\u028Fmk\u02B0\u026Atl\u026A/",
|
|
2188
|
+
konungd\u00E6mi: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014B\u02CCtai\u02D0m\u026A/",
|
|
2189
|
+
konungd\u00F3mur: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014B\u02CCdou\u02D0m\u028Fr/",
|
|
2190
|
+
konungr: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014Bkr/",
|
|
2191
|
+
konungseigninni: "/\u02C8k\u02B0\u0254\u02D0n\u028F\u014Bkseikn\u026An\u02D0\u026A/",
|
|
2192
|
+
kotb\u00F3ndi: "/\u02C8k\u02B0\u0254t\u02B0pount\u026A/",
|
|
2193
|
+
kunnigt: "/\u02C8k\u02B0\u028Fn\u02D0\u026Ak\u02B0t\u02B0/",
|
|
2194
|
+
kvonga\u00F0ur: "/\u02C8k\u02B0v\u0254\u014Bka\xF0\u028Fr/",
|
|
2195
|
+
kynst\u00F3r: "/\u02C8k\u02B0\u026Anstou\u02D0r/",
|
|
2196
|
+
ma\u00F0r: "/ma\u02D0\xF0r/",
|
|
2197
|
+
magra: "/\u02C8ma\u0263ra/",
|
|
2198
|
+
mannd\u00E1\u00F0in: "/\u02C8man\u02D0tau\u02D0\xF0\u026An/",
|
|
2199
|
+
mannf\u00F3lkit: "/\u02C8man\u02D0fouk\u02B0\u026At\u02B0/",
|
|
2200
|
+
mj\u00F6k: "/mj\u0153k\u02B0/",
|
|
2201
|
+
m\u00F6gu: "/\u02C8m\u0153\u02D0k\u028F/",
|
|
2202
|
+
n\u00F3rr\u00E6nn: "/\u02C8nou\u02D0rrai\u02D0n\u02D0/",
|
|
2203
|
+
N\u00F6rvasundum: "/\u02C8n\u0153rvas\u028Fnt\u028Fm/",
|
|
2204
|
+
Norvegi: "/\u02C8n\u0254rv\u025Bj\u026A/",
|
|
2205
|
+
n\u00FDu: "/\u02C8ni\u02D0\u028F/",
|
|
2206
|
+
\u00F3arga: "/\u02C8ou\u02D0arka/",
|
|
2207
|
+
\u00D6nundur: "/\u02C8\u0153\u02D0n\u028Fnt\u028Fr/",
|
|
2208
|
+
or: "/\u0254r/",
|
|
2209
|
+
\u00F3r: "/ou\u02D0r/",
|
|
2210
|
+
\u00F6xn: "/\u02C8\u0153ksn/",
|
|
2211
|
+
pl\u00F3gsland: "/\u02C8p\u02B0lou\u02D0kslant/",
|
|
2212
|
+
r\u00E1\u00F0u: "/\u02C8rau\u02D0\xF0\u028F/",
|
|
2213
|
+
r\u00E6kir: "/\u02C8rai\u02D0k\u02B0\u026Ar/",
|
|
2214
|
+
Raumar\u00EDki: "/\u02C8r\u0153y\u02D0mar\u026A\u02D0k\u02B0\u026A/",
|
|
2215
|
+
Raumsd\u00E6lafylki: "/\u02C8r\u0153ymstailaf\u026Alk\u02B0\u026A/",
|
|
2216
|
+
Raumsdal: "/\u02C8r\u0153ymstal/",
|
|
2217
|
+
raumur: "/\u02C8r\u0153y\u02D0m\u028Fr/",
|
|
2218
|
+
Rein: "/rei\u02D0n/",
|
|
2219
|
+
sannliga: "/\u02C8san\u02D0l\u026A\u0263a/",
|
|
2220
|
+
Sk\u00ED\u00F0ason: "/\u02C8ski\u02D0\xF0as\u0254n/",
|
|
2221
|
+
s\u00F6\u00F0ul: "/\u02C8s\u0153\u02D0\xF0\u028Fl/",
|
|
2222
|
+
s\u00F3lkerfum: "/\u02C8soulk\u02B0\u025Brv\u028Fm/",
|
|
2223
|
+
tindar: "/\u02C8t\u02B0\u026Antar/",
|
|
2224
|
+
\u00FAtsj\u00E1num: "/\u02C8u\u02D0t\u02B0sjau\u02D0n\u028Fm/",
|
|
2225
|
+
v\u00E1gskorin: "/\u02C8vau\u0263sk\u0254r\u026An/",
|
|
2226
|
+
Valf\u00F6\u00F0r: "/\u02C8valv\u0153\xF0r/",
|
|
2227
|
+
v\u00EDkverskur: "/\u02C8vi\u02D0kv\u025Brsk\u028Fr/",
|
|
2228
|
+
Yngveldi: "/\u02C8\u026A\u014Bkv\u025Blt\u026A/",
|
|
2229
|
+
\u00FEat: "/\u03B8at\u02B0/"
|
|
2230
|
+
};
|
|
2231
|
+
|
|
2232
|
+
// src/overrides/ja.ts
|
|
2233
|
+
var ja = {
|
|
2234
|
+
\u3042\u3051\u307C\u306E: "/akebono/",
|
|
2235
|
+
\u3042\u3063\u305F: "/at\u02D0a/",
|
|
2236
|
+
\u3042\u3068: "/ato/",
|
|
2237
|
+
\u3042\u307E\u305F: "/amata/",
|
|
2238
|
+
\u3042\u3089\u306C: "/a\u027Ean\u026F/",
|
|
2239
|
+
\u3042\u308A: "/a\u027Ei/",
|
|
2240
|
+
\u3042\u308B: "/a\u027E\u026F/",
|
|
2241
|
+
\u3042\u308B\u304F: "/a\u027E\u026Fk\u026F/",
|
|
2242
|
+
\u3044\u3046: "/i\u026F/",
|
|
2243
|
+
\u30A4\u30AE\u30EA\u30B9: "/i\u0261i\u027Eis\u026F/",
|
|
2244
|
+
// England
|
|
2245
|
+
\u3044\u305F: "/ita/",
|
|
2246
|
+
\u3044\u305F\u3046: "/ita\u026F/",
|
|
2247
|
+
// archaic: greatly
|
|
2248
|
+
// New sample overrides
|
|
2249
|
+
\u3044\u3064\u3082: "/its\u026Fmo/",
|
|
2250
|
+
\u3044\u3065\u308C: "/id\u026F\u027Ee/",
|
|
2251
|
+
// archaic: which
|
|
2252
|
+
\u3044\u308B: "/i\u027E\u026F/",
|
|
2253
|
+
\u3046\u307F: "/\u026Fmi/",
|
|
2254
|
+
\u3046\u3089: "/\u026F\u027Ea/",
|
|
2255
|
+
\u304A\u308B: "/o\u027E\u026F/",
|
|
2256
|
+
\u304B\u305F: "/kata/",
|
|
2257
|
+
\u304B\u305F\u3061: "/katat\u0255i/",
|
|
2258
|
+
\u304B\u3064\u3050: "/kats\u026F\u0261\u026F/",
|
|
2259
|
+
\u304B\u306E: "/kano/",
|
|
2260
|
+
\u304F: "/k\u026F/",
|
|
2261
|
+
// archaic verb stem
|
|
2262
|
+
\u3054: "/\u0261o/",
|
|
2263
|
+
\u3053\u3053: "/koko/",
|
|
2264
|
+
\u3054\u3056\u3044\u307E\u3059: "/\u0261ozaimas\u026F/",
|
|
2265
|
+
\u3053\u306E: "/kono/",
|
|
2266
|
+
\u3055\u3056\u308C: "/saza\u027Ee/",
|
|
2267
|
+
\u3055\u3073\u3057\u3044: "/sabi\u0255i\u02D0/",
|
|
2268
|
+
\u3055\u3089: "/sa\u027Ea/",
|
|
2269
|
+
// archaic: furthermore
|
|
2270
|
+
\u3057\u3088\u3046: "/\u0255ijo\u02D0/",
|
|
2271
|
+
\u3059\u3050\u308C\u308B: "/s\u026F\u0261\u026F\u027Ee\u027E\u026F/",
|
|
2272
|
+
\u3059\u3053\u3057: "/s\u026Fko\u0255i/",
|
|
2273
|
+
\u3059\u308B: "/s\u026F\u027E\u026F/",
|
|
2274
|
+
\u305D\u3053: "/soko/",
|
|
2275
|
+
\u305D\u306E: "/sono/",
|
|
2276
|
+
\u3060\u3044\u3076: "/daib\u026F/",
|
|
2277
|
+
\u3060\u3051: "/dake/",
|
|
2278
|
+
\u305F\u3060: "/tada/",
|
|
2279
|
+
\u3060\u3064: "/dats\u026F/",
|
|
2280
|
+
// archaic suffix: -ish
|
|
2281
|
+
\u305F\u306A\u3073\u304F: "/tanabik\u026F/",
|
|
2282
|
+
\u3064\u304F: "/ts\u026Fk\u026F/",
|
|
2283
|
+
\u3064\u308C\u308B: "/ts\u026F\u027Ee\u027E\u026F/",
|
|
2284
|
+
\u3069\u3053: "/doko/",
|
|
2285
|
+
\u3068\u3053\u308D: "/toko\u027Eo/",
|
|
2286
|
+
\u3068\u3089\u3048\u308B: "/to\u027Eae\u027E\u026F/",
|
|
2287
|
+
\u306A\u304A: "/nao/",
|
|
2288
|
+
\u306A\u304B: "/naka/",
|
|
2289
|
+
\u306A\u304F: "/nak\u026F/",
|
|
2290
|
+
\u306A\u3063\u305F: "/nat\u02D0a/",
|
|
2291
|
+
\u306A\u3073\u304F: "/nabik\u026F/",
|
|
2292
|
+
\u306E\u3051: "/noke/",
|
|
2293
|
+
\u3070\u304B\u308A: "/baka\u027Ei/",
|
|
2294
|
+
\u306F\u305F: "/hata/",
|
|
2295
|
+
\u30CF\u30F3\u30D6\u30EB\u30AF: "/hamb\u026F\u027E\u026Fk\u026F/",
|
|
2296
|
+
\u3075\u3046: "/\u0278\u026F\u02D0/",
|
|
2297
|
+
\u3075\u3061: "/\u0278\u026Ft\u0255i/",
|
|
2298
|
+
\u307B\u304B: "/hoka/",
|
|
2299
|
+
\u307B\u3069: "/hodo/",
|
|
2300
|
+
\u307B\u3093\u3068\u3046: "/honto\u02D0/",
|
|
2301
|
+
\u307E\u305F: "/mata/",
|
|
2302
|
+
\u307E\u3060: "/mada/",
|
|
2303
|
+
\u307E\u3067: "/made/",
|
|
2304
|
+
\u307F\u306A\u3055\u3093: "/minasa\u0274/",
|
|
2305
|
+
\u307F\u3093\u306A: "/min\u02D0a/",
|
|
2306
|
+
\u3080\u3059: "/m\u026Fs\u026F/",
|
|
2307
|
+
\u30E1\u30ED\u30B9: "/me\u027Eos\u026F/",
|
|
2308
|
+
\u3084\u3046\u3084\u3046: "/ja\u026Fja\u026F/",
|
|
2309
|
+
// archaic: gradually
|
|
2310
|
+
\u3084\u307F: "/jami/",
|
|
2311
|
+
\u3084\u3080: "/jam\u026F/",
|
|
2312
|
+
\u3084\u3093\u3054\u3068\u306A\u3057: "/ja\u014B\u0261otona\u0255i/",
|
|
2313
|
+
// archaic: noble
|
|
2314
|
+
\u3088\u304F: "/jok\u026F/",
|
|
2315
|
+
\u3088\u307B\u3069: "/johodo/",
|
|
2316
|
+
\u308B: "/\u027E\u026F/",
|
|
2317
|
+
// classical auxiliary
|
|
2318
|
+
\u308F\u304B\u308B: "/waka\u027E\u026F/",
|
|
2319
|
+
\u4E00\u8336: "/it\u02D0\u0255a/",
|
|
2320
|
+
// Issa (poet)
|
|
2321
|
+
\u4E09\u5341\u4E03: "/sa\u0274d\u0291\u026F\u02D0\u0255it\u0255i/",
|
|
2322
|
+
\u5019\u3046: "/so\u02D0\u027Eo\u02D0/",
|
|
2323
|
+
// archaic polite auxiliary
|
|
2324
|
+
\u547C\u3093\u3067: "/jo\u0274de/",
|
|
2325
|
+
\u6B62\u307E\u3063\u305F: "/tomat\u02D0a/",
|
|
2326
|
+
\u75E9: "/jase/",
|
|
2327
|
+
// thin
|
|
2328
|
+
\u767D\u304F: "/\u0255i\u027Eok\u026F/",
|
|
2329
|
+
\u77E5\u3063\u3066: "/\u0255it\u02D0e/",
|
|
2330
|
+
\u7A4D\u307F: "/ts\u026Fmi/",
|
|
2331
|
+
\u7F85\u751F\u9580: "/\u027Ea\u0255o\u02D0mo\u0274/",
|
|
2332
|
+
// Rashomon
|
|
2333
|
+
\u821E\u9DB4: "/maiz\u026F\u027E\u026F/",
|
|
2334
|
+
// Maizuru (place)
|
|
2335
|
+
\u91D1\u95A3: "/ki\u0274kak\u026F/",
|
|
2336
|
+
// Golden Pavilion
|
|
2337
|
+
\u9759\u304B\u3055: "/\u0255iz\u026Fkasa/"
|
|
2338
|
+
};
|
|
2339
|
+
|
|
2340
|
+
// src/overrides/km.ts
|
|
2341
|
+
var km = {
|
|
2342
|
+
// --- UDHR Article 1 ---
|
|
2343
|
+
\u1780\u17C6\u178E\u17BE\u178F: "/k\u0251mna\u0259t/",
|
|
2344
|
+
// birth
|
|
2345
|
+
// --- Reamker (Ramayana) ---
|
|
2346
|
+
\u1780\u17D2\u178A\u17B8: "/kd\u0259y/",
|
|
2347
|
+
// matter, affair
|
|
2348
|
+
\u1780\u17D2\u179A\u17C1\u179C: "/kreew/",
|
|
2349
|
+
// furious
|
|
2350
|
+
\u1780\u17D2\u179A\u17C1\u179C\u1780\u17D2\u179A\u17C4\u1792: "/kreew krout/",
|
|
2351
|
+
// furiously angry
|
|
2352
|
+
\u1780\u17D2\u179A\u17C4\u1792: "/krout/",
|
|
2353
|
+
// anger
|
|
2354
|
+
\u1780\u17D2\u179F: "/ks/",
|
|
2355
|
+
// consonant cluster (browser splits ក្សត្រី)
|
|
2356
|
+
\u1780\u17D2\u179F\u178F\u17D2\u179A: "/ksaat/",
|
|
2357
|
+
// king
|
|
2358
|
+
// --- Nokor Reach (National Anthem) ---
|
|
2359
|
+
\u1780\u17D2\u179F\u178F\u17D2\u179A\u17B6: "/ksaatraa/",
|
|
2360
|
+
// king (literary form)
|
|
2361
|
+
\u1780\u17D2\u179F\u178F\u17D2\u179A\u17B8: "/ksaatr\u0259y/",
|
|
2362
|
+
// queen
|
|
2363
|
+
\u1781\u17D2\u1798\u17B8: "/km\u0259y/",
|
|
2364
|
+
// ogre (literary)
|
|
2365
|
+
\u1781\u17D2\u179C\u17BE\u1780: "/kwa\u0259k/",
|
|
2366
|
+
// to stir, disturb
|
|
2367
|
+
\u1782\u17B6\u1794\u17CB: "/koap/",
|
|
2368
|
+
// beloved
|
|
2369
|
+
// --- Tum Teav ---
|
|
2370
|
+
\u1782\u17BC: "/kuu/",
|
|
2371
|
+
// partner
|
|
2372
|
+
\u1782\u17BC\u1782\u17B6\u1794\u17CB: "/kuu koap/",
|
|
2373
|
+
// partner, beloved
|
|
2374
|
+
\u1783\u17D2\u179B\u17B6\u178F: "/kliet/",
|
|
2375
|
+
// separated
|
|
2376
|
+
\u1785\u179A: "/c\u0251\u0251/",
|
|
2377
|
+
// to walk, go
|
|
2378
|
+
\u1785\u17D2\u1793\u17C1\u17C7: "/cneh/",
|
|
2379
|
+
// this (literary)
|
|
2380
|
+
\u1787\u17D0\u1799: "/cey/",
|
|
2381
|
+
// victory
|
|
2382
|
+
\u1787\u17D0\u1799\u1798\u1784\u17D2\u1782\u179B: "/cey m\u0254\u014Bk\u0254l/",
|
|
2383
|
+
// victory, auspicious
|
|
2384
|
+
// --- Proverbs ---
|
|
2385
|
+
\u1787\u17B6\u1780\u17CB: "/ceak/",
|
|
2386
|
+
// certain, sure
|
|
2387
|
+
\u1787\u17B6\u179B\u17C6\u178A\u17B6\u1794\u17CB: "/cie l\u0254m\u0257aap/",
|
|
2388
|
+
// successively
|
|
2389
|
+
// --- Preah Chinawong ---
|
|
2390
|
+
\u1787\u17B7\u1793: "/c\u0268n/",
|
|
2391
|
+
// Jin (proper name element)
|
|
2392
|
+
\u1787\u17B7\u1793\u179C\u1784\u17D2\u0E2A: "/c\u0268n w\u0254\u014B/",
|
|
2393
|
+
// Chinawong (proper name)
|
|
2394
|
+
\u1787\u17BC: "/cuu/",
|
|
2395
|
+
// proper (literary)
|
|
2396
|
+
\u1789\u17B6\u1780\u17CB: "/\u0272eak/",
|
|
2397
|
+
// to startle, twitch
|
|
2398
|
+
\u1789\u17B6\u1780\u17CB\u1785\u17B7\u1789\u17D2\u1785\u17BE\u1798: "/\u0272eak c\u0259\u0272ca\u0259m/",
|
|
2399
|
+
// to raise eyebrows
|
|
2400
|
+
\u178A\u17C2\u1793: "/daen/",
|
|
2401
|
+
// territory
|
|
2402
|
+
\u178E\u17B6\u1799: "/naay/",
|
|
2403
|
+
// to yearn
|
|
2404
|
+
\u178E\u17B6\u1799\u1785\u17B7\u178F\u17D2\u178F: "/naay c\u0259t/",
|
|
2405
|
+
// heart yearns
|
|
2406
|
+
\u178F\u1794: "/t\u0251p/",
|
|
2407
|
+
// to reply
|
|
2408
|
+
\u1790\u17D2\u1780\u17B6\u1793: "/tkaan/",
|
|
2409
|
+
// magnificent (literary)
|
|
2410
|
+
\u1790\u17D2\u1780\u17BE\u1784: "/tk\u0259\u0259\u014B/",
|
|
2411
|
+
// to glorify
|
|
2412
|
+
\u1790\u17D2\u1780\u17BE\u1784\u1790\u17D2\u1780\u17B6\u1793: "/tk\u0259\u0259\u014B tkaan/",
|
|
2413
|
+
// glorious, magnificent
|
|
2414
|
+
\u1790\u17D2\u1793\u17BC\u179A: "/tnoo/",
|
|
2415
|
+
// dignity, nobility
|
|
2416
|
+
\u1790\u17D2\u1793\u17C2: "/tnae/",
|
|
2417
|
+
// aspect (partial segmentation of ថ្នែក)
|
|
2418
|
+
\u1790\u17D2\u1793\u17C2\u1780: "/tnaek/",
|
|
2419
|
+
// aspect, class
|
|
2420
|
+
\u1790\u17D2\u179C\u17B6\u178F\u17CB: "/twat/",
|
|
2421
|
+
// harshly
|
|
2422
|
+
\u1791\u178F: "/t\u0254t/",
|
|
2423
|
+
// to look, behold (royal)
|
|
2424
|
+
\u1791\u17B6\u179C: "/tiew/",
|
|
2425
|
+
// Teav (proper name)
|
|
2426
|
+
\u1791\u17BB\u1780\u17D2\u1781: "/tuk/",
|
|
2427
|
+
// suffering (dukkha)
|
|
2428
|
+
\u1791\u17BB\u1780\u17D2\u1781\u179C\u17C1\u1791\u1793\u17B6: "/tukweet\u0251\u0251nie/",
|
|
2429
|
+
// suffering
|
|
2430
|
+
\u1791\u17BC\u179B: "/tuul/",
|
|
2431
|
+
// to inform (royal register)
|
|
2432
|
+
\u1791\u17C1\u1796\u17D2\u178F\u17B6: "/teep\u0257aa/",
|
|
2433
|
+
// devas, celestial beings
|
|
2434
|
+
\u1793\u179A\u1794\u178F\u17B8: "/n\u0254r\u0254pa\u0257\u0259y/",
|
|
2435
|
+
// sovereign, king
|
|
2436
|
+
\u1793\u17B6\u179C: "/niew/",
|
|
2437
|
+
// Nav (proper name)
|
|
2438
|
+
\u1793\u17B7\u1798\u17CC\u179B: "/n\u0268mm\u0254l/",
|
|
2439
|
+
// pure, immaculate
|
|
2440
|
+
\u1794\u17C6\u1795\u17D2\u179B\u17B6\u1789: "/b\u0251mp\u02B0lie\u0272/",
|
|
2441
|
+
// to destroy
|
|
2442
|
+
\u1794\u17C6\u1795\u17D2\u179B\u17B7\u1785: "/b\u0251mp\u02B0l\u0259c/",
|
|
2443
|
+
// to demolish
|
|
2444
|
+
\u1794\u1796\u17B7\u178F\u17D2\u179A: "/b\u0251p\u0268t/",
|
|
2445
|
+
// lord, sir (polite address)
|
|
2446
|
+
\u1794\u17BB\u178F\u17D2\u179A: "/\u0253ot/",
|
|
2447
|
+
// son
|
|
2448
|
+
// --- Chbab Srey (Code for Women) ---
|
|
2449
|
+
\u1794\u17D2\u179A\u178A\u17C5: "/pr\u0251\u0257aw/",
|
|
2450
|
+
// to advise, counsel
|
|
2451
|
+
\u1794\u17D2\u179A\u178E\u17B8: "/pr\u0251n\u0259y/",
|
|
2452
|
+
// loving, affectionate
|
|
2453
|
+
\u1794\u17D2\u179A\u17B6\u1787\u17D2\u1789: "/praac/",
|
|
2454
|
+
// wisdom
|
|
2455
|
+
\u1794\u17D2\u179A\u17B6\u179F\u17B6\u1791: "/praasaat/",
|
|
2456
|
+
// temple, palace
|
|
2457
|
+
\u1796\u17B7\u179A\u17C4\u1792: "/piroot/",
|
|
2458
|
+
// anger
|
|
2459
|
+
\u1796\u17BB\u17C6: "/pum/",
|
|
2460
|
+
// not (literary)
|
|
2461
|
+
\u1796\u17BB\u17C6\u1787\u17BC: "/pum cuu/",
|
|
2462
|
+
// not proper
|
|
2463
|
+
\u1796\u17D2\u1799\u17B6\u1794\u17B6\u1791: "/pjiebaat/",
|
|
2464
|
+
// malice
|
|
2465
|
+
\u1796\u17D2\u179A\u17C7\u1798\u17A0\u17C1\u179F\u17B8: "/preah m\u0254hees\u0259y/",
|
|
2466
|
+
// queen consort (with ព្រះ)
|
|
2467
|
+
\u1796\u17D2\u179A\u17C7\u179A\u17B6\u1787\u1794\u17BB\u178F\u17D2\u179A: "/preah riec \u0253ot/",
|
|
2468
|
+
// prince (with ព្រះ)
|
|
2469
|
+
\u1796\u17D2\u179A\u17C7\u179A\u17B6\u1787\u17B6: "/preah riecie/",
|
|
2470
|
+
// the king
|
|
2471
|
+
\u1797\u17D0\u1780\u17D2\u179A\u17D2\u178F: "/p\u02B0eak/",
|
|
2472
|
+
// face (royal register)
|
|
2473
|
+
\u1797\u17B6\u178F\u179A: "/p\u02B0iet\u0251\u0251/",
|
|
2474
|
+
// brother (standalone; final រ silent)
|
|
2475
|
+
\u1797\u17B6\u178F\u179A\u1797\u17B6\u1796: "/p\u02B0iet\u0251\u0251r\u0251p\u02B0iep/",
|
|
2476
|
+
// brotherhood
|
|
2477
|
+
\u1797\u17D2\u1789\u17B6: "/p\u0272ie/",
|
|
2478
|
+
// to awaken
|
|
2479
|
+
\u1798\u1784\u17D2\u1782\u179B: "/m\u0254\u014Bk\u0254l/",
|
|
2480
|
+
// auspicious
|
|
2481
|
+
\u1798\u17A0\u17B6: "/m\u0254haa/",
|
|
2482
|
+
// great (prefix)
|
|
2483
|
+
\u1798\u17A0\u17B6\u1780\u17D2\u179F\u178F\u17D2\u179A: "/m\u0254haa ksaat/",
|
|
2484
|
+
// great king, maharaja
|
|
2485
|
+
\u1798\u17A0\u17B6\u1787\u17B6\u178F\u17B7: "/m\u0254haa ciet/",
|
|
2486
|
+
// great nation
|
|
2487
|
+
\u1798\u17A0\u17C1\u179F\u17B8: "/m\u0254hees\u0259y/",
|
|
2488
|
+
// queen consort
|
|
2489
|
+
\u179A\u17B6\u1787: "/riec/",
|
|
2490
|
+
// royal
|
|
2491
|
+
\u179A\u17B6\u1787\u1794\u17BB\u178F\u17D2\u179A: "/riec \u0253ot/",
|
|
2492
|
+
// prince
|
|
2493
|
+
\u179A\u17B6\u1787\u17B6: "/riecie/",
|
|
2494
|
+
// king
|
|
2495
|
+
\u179A\u17BB\u1784: "/ru\u014B/",
|
|
2496
|
+
// to shine
|
|
2497
|
+
\u179A\u17BB\u1784\u179A\u17BF\u1784: "/ru\u014B r\u0268\u0259\u014B/",
|
|
2498
|
+
// brilliant, prosperous
|
|
2499
|
+
\u179B\u17C6\u178A\u17B6\u1794\u17CB: "/l\u0254m\u0257aap/",
|
|
2500
|
+
// successively, in order
|
|
2501
|
+
\u179C\u1784\u17D2\u179F: "/w\u0254\u014B/",
|
|
2502
|
+
// dynasty, lineage
|
|
2503
|
+
\u179C\u17B7\u1785\u17B6\u179A\u178E\u1789\u17D2\u1789\u17B6\u178E: "/wicaar\u0251\u0272\u0272aan/",
|
|
2504
|
+
// discernment
|
|
2505
|
+
\u179C\u17C1\u1791\u1793\u17B6: "/weet\u0251\u0251nie/",
|
|
2506
|
+
// pain (vedana)
|
|
2507
|
+
\u179F\u178F\u17B7: "/sa\u0294te\u0294/",
|
|
2508
|
+
// consciousness, mindfulness
|
|
2509
|
+
\u179F\u178F\u17B7\u179F\u1798\u17D2\u1794\u1787\u1789\u17D2\u1789\u17C8: "/sa\u0294te\u0294 sampa\u0294c\u028A\u0259\u0272\u0272ea\u0294/",
|
|
2510
|
+
// conscience
|
|
2511
|
+
\u179F\u1798\u17D2\u1794\u1787\u1789\u17D2\u1789\u17C8: "/sampa\u0294c\u028A\u0259\u0272\u0272ea\u0294/",
|
|
2512
|
+
// awareness
|
|
2513
|
+
\u179F\u17B6\u1791\u179A: "/saat\u0254\u0254/",
|
|
2514
|
+
// to welcome
|
|
2515
|
+
\u179F\u17B7\u179A\u17B8: "/ser\u0259y/",
|
|
2516
|
+
// glory (from Pali)
|
|
2517
|
+
\u179F\u17BD: "/su\u0259/",
|
|
2518
|
+
// (first syllable of សួស្តី)
|
|
2519
|
+
\u179F\u17BD\u179F\u17D2\u178F\u17B8: "/su\u0259sd\u0259y/",
|
|
2520
|
+
// greeting, well-being
|
|
2521
|
+
\u179F\u17C1\u1785\u1780\u17D2\u178A\u17B8: "/sac kd\u0259y/",
|
|
2522
|
+
// matter, affair
|
|
2523
|
+
\u179F\u17C1\u1785\u1780\u17D2\u178A\u17B8\u1790\u17D2\u179B\u17C3\u1790\u17D2\u1793\u17BC\u179A: "/sac kd\u0259y tlay tnoo/",
|
|
2524
|
+
// dignity
|
|
2525
|
+
\u179F\u17D2\u178A\u17C1\u1785: "/sdac/",
|
|
2526
|
+
// king
|
|
2527
|
+
\u179F\u17D2\u178F\u17B6\u1794\u17CB: "/sdaap/",
|
|
2528
|
+
// to listen
|
|
2529
|
+
\u179F\u17D2\u178F\u17B8: "/sd\u0259y/",
|
|
2530
|
+
// (second syllable of សួស្តី)
|
|
2531
|
+
\u179F\u17D2\u1791\u17BB\u17C7: "/stuh/",
|
|
2532
|
+
// to rush
|
|
2533
|
+
\u179F\u17D2\u1793\u17C6: "/sn\u0251m/",
|
|
2534
|
+
// concubine
|
|
2535
|
+
\u17A0\u178F\u17D2\u1790\u17B6: "/hat\u0251\u0251t\u02B0aa/",
|
|
2536
|
+
// hand (literary)
|
|
2537
|
+
// --- Constitution Preamble ---
|
|
2538
|
+
\u17A2\u1784\u17D2\u1782\u179A: "/\u0294\u0251\u014Bk\u0254\u0254/",
|
|
2539
|
+
// Angkor
|
|
2540
|
+
\u17A2\u179A: "/\u0294\u0251\u0251/",
|
|
2541
|
+
// to rejoice
|
|
2542
|
+
\u17A2\u179F\u17D2\u1785\u17B6\u179A\u17D2\u1799: "/\u0294\u0251scaa/",
|
|
2543
|
+
// wonderful, marvelous
|
|
2544
|
+
\u17A2\u17B6\u179B: "/\u0294aal/",
|
|
2545
|
+
// to rush, boast
|
|
2546
|
+
\u17A2\u17BD\u178F: "/\u0294u\u0259t/",
|
|
2547
|
+
// to boast
|
|
2548
|
+
\u17B1\u17D2\u1799: "/\u0294aoy/"
|
|
2549
|
+
// to give, let
|
|
2550
|
+
};
|
|
2551
|
+
|
|
2552
|
+
// src/overrides/ko.ts
|
|
2553
|
+
var ko = {
|
|
2554
|
+
\uAC10\uB098\uBB34: "/kam.na.mu/",
|
|
2555
|
+
\uAC1C\uCC9C: "/k\u025B.t\u0255\u02B0\u028Cn/",
|
|
2556
|
+
\uACBD\uC131: "/kj\u028C\u014B.s\u028C\u014B/",
|
|
2557
|
+
// old name for Seoul
|
|
2558
|
+
// New sample overrides
|
|
2559
|
+
\uACE0\uB2EC\uD504\uB2E4: "/ko.dal.p\u02B0\u026F.da/",
|
|
2560
|
+
\uACE0\uC774: "/ko.i/",
|
|
2561
|
+
\uAD11\uC74C: "/kwa\u014B.\u026Fm/",
|
|
2562
|
+
\uAE08\uBE5B: "/k\u026Fm.pit\u031A/",
|
|
2563
|
+
\uAE30\uB098\uAE30\uB2E4: "/ki.na.\u0261i.da/",
|
|
2564
|
+
// very long
|
|
2565
|
+
\uAE38\uB3D9: "/kil.do\u014B/",
|
|
2566
|
+
// Hong Gildong (character name)
|
|
2567
|
+
\uAF79\uACFC\uB9AC: "/k\u0348w\u025B\u014B.gwa.\u027Ei/",
|
|
2568
|
+
// kkwaenggwari (percussion instrument)
|
|
2569
|
+
\uB04A\uC784: "/k\u0348\u026Fn.im/",
|
|
2570
|
+
\uB098\uD0C0\uC0E4: "/na.t\u02B0a.\u0255a/",
|
|
2571
|
+
// Natasha
|
|
2572
|
+
\uB17C\uAC00: "/non.\u0261a/",
|
|
2573
|
+
\uB2C8\uAE4C: "/ni.k\u0348a/",
|
|
2574
|
+
// because (suffix)
|
|
2575
|
+
\uB3C4\uC6B0\uB2E4: "/to.u.da/",
|
|
2576
|
+
// to help
|
|
2577
|
+
\uB3D9\uC9D3\uB2EC: "/to\u014B.d\u0291it\u031A.t\u0348al/",
|
|
2578
|
+
\uB9E4\uC5B4\uB2EC\uB9AC\uB2E4: "/m\u025B.\u028C.dal.li.da/",
|
|
2579
|
+
\uBB3C\uB4E4\uB2E4: "/mul.d\u026Fl.da/",
|
|
2580
|
+
\uBC31\uB450\uC0B0: "/p\u025Bk\u031A.t\u0348u.san/",
|
|
2581
|
+
// Mt. Baekdu
|
|
2582
|
+
\uBC94\uD558\uB2E4: "/p\u028Cm.ha.da/",
|
|
2583
|
+
\uC0B0\uBAA8\uD241\uC774: "/san.mo.t\u02B0u\u014B.i/",
|
|
2584
|
+
\uC18C\uC90F\uC9D1: "/so.d\u0291ut\u031A.t\u0348\u0255ip\u031A/",
|
|
2585
|
+
\uC544\uB77C\uB9AC\uC694: "/a.\u027Ea.\u027Ei.jo/",
|
|
2586
|
+
// arirang refrain
|
|
2587
|
+
\uC544\uC2DC\uB2E4: "/a.\u0255i.da/",
|
|
2588
|
+
// to know (honorific)
|
|
2589
|
+
\uC57D\uC0B0: "/jak\u031A.s\u0348an/",
|
|
2590
|
+
// Yaksan (place)
|
|
2591
|
+
\uC5B4\uB450: "/\u028C.du/",
|
|
2592
|
+
\uC5B4\uB860: "/\u028C.\u027Eon/",
|
|
2593
|
+
// archaic: elder
|
|
2594
|
+
\uC5ED\uACB9\uB2E4: "/j\u028Ck\u031A.kj\u028Cp\u031A.t\u0348a/",
|
|
2595
|
+
\uC601\uBCC0: "/j\u028C\u014B.bj\u028Cn/",
|
|
2596
|
+
// Yeongbyeon (place)
|
|
2597
|
+
\uC624\uB3D9\uB098\uBB34: "/o.do\u014B.na.mu/",
|
|
2598
|
+
\uC624\uB3D9\uC78E: "/o.do\u014B.ip\u031A/",
|
|
2599
|
+
\uC624\uC2DC\uB2E4: "/o.\u0255i.da/",
|
|
2600
|
+
// honorific: to come
|
|
2601
|
+
\uC654\uB2E4: "/wat\u031A.t\u0348a/",
|
|
2602
|
+
\uC6B0\uB7EC\uB974\uB2E4: "/u.\u027E\u028C.\u027E\u026F.da/",
|
|
2603
|
+
\uC6D0\uD1B5\uD558\uB2E4: "/w\u028Cn.t\u02B0o\u014B.ha.da/",
|
|
2604
|
+
\uC774\uB77C: "/i.\u027Ea/",
|
|
2605
|
+
// copula ending
|
|
2606
|
+
\uC774\uC5B4\uB4E0: "/i.\u028C.d\u026Fn/",
|
|
2607
|
+
// archaic conditional
|
|
2608
|
+
\uC774\uC5D0: "/i.e/",
|
|
2609
|
+
\uC78A\uD788\uB2E4: "/i.t\u02B0i.da/",
|
|
2610
|
+
\uC78E\uC0C8: "/ip\u031A.s\u0348\u025B/",
|
|
2611
|
+
\uC7A5\uB0A0: "/t\u0255a\u014B.nal/",
|
|
2612
|
+
\uC7A5\uD130: "/t\u0255a\u014B.t\u02B0\u028C/",
|
|
2613
|
+
\uC810\uC21C\uC774: "/t\u0255\u028Cm.su.ni/",
|
|
2614
|
+
// character name (diminutive)
|
|
2615
|
+
\uC9C0\uB9AC\uB2E4: "/t\u0255i.\u027Ei.da/",
|
|
2616
|
+
\uC9C0\uC904\uB300\uB2E4: "/t\u0255i.d\u0291ul.d\u025B.da/",
|
|
2617
|
+
\uCC44\uC2DD: "/t\u0255\u02B0\u025B.\u0255ik\u031A/",
|
|
2618
|
+
\uCCAD\uCC9C: "/t\u0255\u02B0\u028C\u014B.t\u0255\u02B0\u028Cn/",
|
|
2619
|
+
\uCD5C: "/t\u0255\u02B0we/",
|
|
2620
|
+
// surname Choi
|
|
2621
|
+
\uCD98\uD48D: "/t\u0255\u02B0un.p\u02B0u\u014B/",
|
|
2622
|
+
\uD0C0\uC791\uB9C8\uB2F9: "/t\u02B0a.d\u0291ak\u031A.ma.da\u014B/",
|
|
2623
|
+
// threshing ground
|
|
2624
|
+
\uD2F0\uB04C: "/t\u02B0i.k\u0348\u026Fl/",
|
|
2625
|
+
\uD53C\uC5B4\uC624\uB974\uB2E4: "/p\u02B0i.\u028C.o.\u027E\u026F.da/",
|
|
2626
|
+
\uD55C\uD14C: "/han.t\u02B0e/",
|
|
2627
|
+
\uD574\uC124\uD53C: "/h\u025B.s\u028Cl.p\u02B0i/",
|
|
2628
|
+
// poetic: at sunset
|
|
2629
|
+
\uD5E4\uB2E4: "/he.da/",
|
|
2630
|
+
// archaic: to count
|
|
2631
|
+
\uD718\uB2EC\uB9AC\uB2E4: "/hwi.dal.li.da/",
|
|
2632
|
+
\uD718\uB3CC\uB2E4: "/hwi.dol.da/"
|
|
2633
|
+
};
|
|
2634
|
+
|
|
2635
|
+
// src/overrides/ma.ts
|
|
2636
|
+
var ma = {
|
|
2637
|
+
Abdul: "abdul",
|
|
2638
|
+
bebas: "bebas",
|
|
2639
|
+
boleh: "boleh",
|
|
2640
|
+
gemawan: "\u0261\u0259mawan",
|
|
2641
|
+
Hamid: "hamid",
|
|
2642
|
+
ibni: "ibni",
|
|
2643
|
+
Jebat: "d\u0292\u0259bat",
|
|
2644
|
+
Johor: "d\u0292oho\u027E",
|
|
2645
|
+
kebebasan: "k\u0259bebasan",
|
|
2646
|
+
Lekir: "l\u0259ki\u027E",
|
|
2647
|
+
Lekiu: "l\u0259kiw",
|
|
2648
|
+
Malaysia: "malejsia",
|
|
2649
|
+
merdeka: "m\u0259\u027Edeka",
|
|
2650
|
+
mereka: "m\u0259reka",
|
|
2651
|
+
Oleh: "oleh",
|
|
2652
|
+
samarata: "samarata",
|
|
2653
|
+
Sarawak: "sa\u027Eawak",
|
|
2654
|
+
seekor: "s\u0259eko\u027E",
|
|
2655
|
+
seksaan: "seksa\u0294an",
|
|
2656
|
+
Selangor: "s\u0259la\u014Bo\u027E",
|
|
2657
|
+
selendang: "s\u0259l\u0259nda\u014B",
|
|
2658
|
+
Singapura: "si\u014B\u0261apu\u027Ea",
|
|
2659
|
+
Terengganu: "t\u0259\u027Ee\u014B\u0261anu",
|
|
2660
|
+
Tuhan: "tuhan"
|
|
2661
|
+
};
|
|
2662
|
+
|
|
2663
|
+
// src/overrides/nb.ts
|
|
2664
|
+
var nb = {
|
|
2665
|
+
// Old orthography (Riksmål/Danish) and common words
|
|
2666
|
+
al: "\u0251\u02D0l",
|
|
2667
|
+
\u00E5rsalderen: "o\u02D0\u027Es\u0251l\u02D0d\u0259\u027E\u0259n",
|
|
2668
|
+
arv: "\u0251\u027Ev",
|
|
2669
|
+
behold: "b\u0259h\u0254l",
|
|
2670
|
+
Bj\u00F8rgulfson: "bj\xF8\u02D0\u027E\u0261\u0289lfs\u0254n",
|
|
2671
|
+
Blik: "bl\u026Ak",
|
|
2672
|
+
Bliv: "bli\u02D0",
|
|
2673
|
+
b\u00F8r: "b\xF8\u02D0\u027E",
|
|
2674
|
+
B\u00F8rnene: "b\xF8\u02D0\u0273\u0259n\u0259",
|
|
2675
|
+
brorskapets: "b\u027Eu\u02D0\u027Esk\u0251\u02D0p\u0259ts",
|
|
2676
|
+
demokratiet: "d\u025Bmok\u027E\u0251ti\u02D0\u0259",
|
|
2677
|
+
dig: "d\u0251j",
|
|
2678
|
+
Dyb: "dy\u02D0p",
|
|
2679
|
+
Eftermiddag: "\u025Bft\u025B\u027Em\u026Ad\u0251\u02D0\u0261",
|
|
2680
|
+
ej: "\u0251j",
|
|
2681
|
+
fandens: "f\u0251n\u02D0\u0259ns",
|
|
2682
|
+
f\u00E5tt: "f\u0254t",
|
|
2683
|
+
fiender: "fi\u02D0\u025Bnd\u0259\u027E",
|
|
2684
|
+
Fjeldbygden: "fj\u025Blb\u028F\u0261d\u0259n",
|
|
2685
|
+
fordrukken: "f\u0254\u027Ed\u027E\u0289k\u02D0\u0259n",
|
|
2686
|
+
fornuft: "f\u0254\u027En\u0289ft",
|
|
2687
|
+
frem: "f\u027E\u025Bm",
|
|
2688
|
+
Fremmedkarl: "f\u027E\u025Bm\u02D0\u0259k\u0251\u027El",
|
|
2689
|
+
frostblaa: "f\u027E\u0254stblo\u02D0",
|
|
2690
|
+
frygt: "f\u027E\u028Fkt",
|
|
2691
|
+
gamlingen: "\u0261\u0251ml\u026A\u014B\u0259n",
|
|
2692
|
+
Gem: "j\u025Bm",
|
|
2693
|
+
gikk: "j\u026Ak",
|
|
2694
|
+
Gjesling: "j\u025Bsl\u026A\u014B",
|
|
2695
|
+
gjorde: "ju\u02D0\u027E\u0259",
|
|
2696
|
+
Glands: "\u0261l\u0251ns",
|
|
2697
|
+
gnistred: "\u0261n\u026Ast\u027E\u0259d",
|
|
2698
|
+
gr\u00E5spr\u00E6ngt: "\u0261\u027Eo\u02D0sp\u027E\u025B\u014Bt",
|
|
2699
|
+
Grunnlov: "\u0261\u027E\u0289n\u02D0lo\u02D0v",
|
|
2700
|
+
Guld: "\u0261\u0289l",
|
|
2701
|
+
Gyldenlak: "j\u028Fl\u02D0\u0259nl\u0251k",
|
|
2702
|
+
Gyldentop: "j\u028Fl\u02D0\u0259nt\u0254p",
|
|
2703
|
+
ham: "h\u0251m",
|
|
2704
|
+
Hej: "h\u0251j",
|
|
2705
|
+
h\u00F8r: "h\xF8\u02D0\u027E",
|
|
2706
|
+
Horisonten: "h\u0254\u027E\u026As\u0254nt\u0259n",
|
|
2707
|
+
humanistiske: "h\u0289m\u0251n\u026Ast\u026Ask\u0259",
|
|
2708
|
+
husbond: "h\u0289\u02D0sb\u0254n",
|
|
2709
|
+
Hvad: "v\u0251",
|
|
2710
|
+
hvem: "v\u025Bm",
|
|
2711
|
+
hverandre: "v\u025B\u027E\u0251nd\u027E\u0259",
|
|
2712
|
+
hverken: "v\u025B\u027Ek\u0259n",
|
|
2713
|
+
Hvor: "vu\u02D0\u027E",
|
|
2714
|
+
hvoraf: "vu\u02D0\u027E\u0251v",
|
|
2715
|
+
Hvorfor: "v\u0254\u027Ef\u0254\u027E",
|
|
2716
|
+
Hvormeget: "vu\u02D0\u027Em\u0251j\u02D0\u0259",
|
|
2717
|
+
iaften: "i\u0251ft\u0259n",
|
|
2718
|
+
Idet: "i\u02D0de\u02D0t",
|
|
2719
|
+
ihob: "iho\u02D0b",
|
|
2720
|
+
imellem: "im\u025Bl\u02D0\u0259m",
|
|
2721
|
+
imod: "i\u02D0mu\u02D0t",
|
|
2722
|
+
inn: "\u026An",
|
|
2723
|
+
intet: "\u026Ant\u0259",
|
|
2724
|
+
Intet: "\u026Ant\u0259",
|
|
2725
|
+
Ivar: "i\u02D0v\u0251\u027E",
|
|
2726
|
+
Jammer: "j\u0251m\u025B\u027E",
|
|
2727
|
+
Jensen: "j\u025Bns\u0259n",
|
|
2728
|
+
jordegods: "ju\u02D0\u027E\u0259\u0261\u0254ts",
|
|
2729
|
+
juletr\u00E6et: "j\u0289\u02D0l\u0259t\u027E\u025B\u02D0\u0259",
|
|
2730
|
+
just: "j\u0289st",
|
|
2731
|
+
kan: "k\u0251n",
|
|
2732
|
+
kanske: "k\u0251n\u0283\u0259",
|
|
2733
|
+
Kj\u00F8ttmeisene: "\xE7\xF8tm\xE6\u026As\u0259n\u0259",
|
|
2734
|
+
kold: "k\u0254l",
|
|
2735
|
+
kom: "k\u0254m",
|
|
2736
|
+
Kongeriket: "k\u0254\u014B\u0259\u027Ei\u02D0k\u0259",
|
|
2737
|
+
Kringsatt: "k\u027E\u026A\u014Bs\u0251t",
|
|
2738
|
+
Kristiania: "k\u027E\u026Ast\u026A\u0251\u02D0n\u026A\u0251",
|
|
2739
|
+
kristne: "k\u027E\u026Astn\u0259",
|
|
2740
|
+
Landets: "l\u0251n\u0259ts",
|
|
2741
|
+
Lavrans: "l\u0251\u02D0v\u027E\u0251ns",
|
|
2742
|
+
lektor: "l\u025Bkt\u0254\u027E",
|
|
2743
|
+
lig: "li\u02D0\u0261",
|
|
2744
|
+
m\u00E5: "mo\u02D0",
|
|
2745
|
+
maa: "mo\u02D0",
|
|
2746
|
+
Mandemagt: "m\u0251n\u02D0\u0259m\u0251kt",
|
|
2747
|
+
menneskerettighetene: "m\u025Bn\u02D0\u0259sk\u0259\u027E\u025Bt\u02D0\u026A\u0261he\u02D0t\u0259n\u0259",
|
|
2748
|
+
menneskerettigheter: "m\u025Bn\u02D0\u0259sk\u0259\u027E\u025Bt\u02D0\u026A\u0261he\u02D0t\u0259\u027E",
|
|
2749
|
+
menneskeverd: "m\u025Bn\u02D0\u0259sk\u0259ve\u02D0\u027Ed",
|
|
2750
|
+
mig: "m\u0251j",
|
|
2751
|
+
mod: "mo\u02D0d",
|
|
2752
|
+
monarkisk: "m\u0254n\u0251\u027Ek\u026Ask",
|
|
2753
|
+
Muld: "m\u0289l",
|
|
2754
|
+
Nej: "n\u0251j",
|
|
2755
|
+
noget: "no\u02D0\u0259",
|
|
2756
|
+
n\u00F8gne: "n\xF8jn\u0259",
|
|
2757
|
+
Norge: "n\u0254\u027E\u0261\u0259",
|
|
2758
|
+
nu: "n\u0289\u02D0",
|
|
2759
|
+
nysn\u00F8en: "ny\u02D0sn\xF8\u02D0\u0259n",
|
|
2760
|
+
\u00F8jne: "\u0254jn\u0259",
|
|
2761
|
+
op: "\u0254p",
|
|
2762
|
+
Peer: "pe\u02D0\u027E",
|
|
2763
|
+
Plads: "pl\u0251s",
|
|
2764
|
+
proppenerer: "p\u027E\u0254p\u0259ne\u02D0\u027E\u0259\u027E",
|
|
2765
|
+
Puslinger: "p\u0289\u02D0sl\u026A\u014B\u0259\u027E",
|
|
2766
|
+
raaber: "\u027Eo\u02D0b\u0259\u027E",
|
|
2767
|
+
R\u00E6kke: "\u027E\u025Bk\u0259",
|
|
2768
|
+
r\u00E6kker: "\u027E\u025Bk\u02D0\u0259\u027E",
|
|
2769
|
+
Ragnfrid: "\u027E\u0251\u014Bnf\u027Ei\u02D0d",
|
|
2770
|
+
regjeringsform: "\u027Eeje\u02D0\u027E\u026A\u014Bsf\u0254\u027Em",
|
|
2771
|
+
rettsstaten: "\u027E\u025Bt\u02D0s\u02D0t\u0251\u02D0t\u0259n",
|
|
2772
|
+
Rosentinter: "\u027Eu\u02D0s\u0259nt\u026Ant\u025B\u027E",
|
|
2773
|
+
rummeligt: "\u027E\u0289m\u02D0\u0259l\u026A\u0261t",
|
|
2774
|
+
Ryg: "\u027E\u028F\u0261",
|
|
2775
|
+
saganatt: "s\u0251\u02D0\u0261\u0251n\u0251t",
|
|
2776
|
+
samvittighet: "s\u0251mv\u026At\u02D0\u026A\u0261he\u02D0t",
|
|
2777
|
+
sidste: "s\u026Ast\u0259",
|
|
2778
|
+
siger: "si\u02D0\u0259\u027E",
|
|
2779
|
+
skabt: "sk\u0251pt",
|
|
2780
|
+
skal: "sk\u0251l",
|
|
2781
|
+
Skar: "sk\u0251\u02D0\u027E",
|
|
2782
|
+
skj\u00E6ms: "\u0283\u025Bms",
|
|
2783
|
+
Skodden: "sk\u0254d\u02D0\u0259n",
|
|
2784
|
+
Skydsskiftet: "\u0283\u028Ftssk\u026Aft\u0259",
|
|
2785
|
+
slig: "sli\u02D0\u0261",
|
|
2786
|
+
smaa: "smo\u02D0",
|
|
2787
|
+
Snees: "sne\u02D0s",
|
|
2788
|
+
Sneskavler: "sne\u02D0sk\u0251\u02D0vl\u025B\u027E",
|
|
2789
|
+
S\u00F8rby: "s\xF8\u02D0\u027Eby\u02D0",
|
|
2790
|
+
sp\u00F8r: "sp\xF8\u02D0\u027E",
|
|
2791
|
+
steget: "ste\u02D0\u0261\u0259",
|
|
2792
|
+
stundom: "st\u0289nd\u0254m",
|
|
2793
|
+
stygt: "st\u028Fkt",
|
|
2794
|
+
Sundbu: "s\u0289nb\u0289\u02D0",
|
|
2795
|
+
Syd: "sy\u02D0d",
|
|
2796
|
+
tabt: "t\u0251pt",
|
|
2797
|
+
Tant: "t\u0251nt",
|
|
2798
|
+
Terje: "t\u025B\u027Ej\u0259",
|
|
2799
|
+
T\u00F8v: "t\xF8\u02D0v",
|
|
2800
|
+
Tvi: "tvi\u02D0",
|
|
2801
|
+
tykkner: "t\u028Fk\u02D0n\u0259\u027E",
|
|
2802
|
+
uavhendelig: "\u0289\u02D0\u0251vh\u025Bnd\u0259l\u026A\u0261",
|
|
2803
|
+
udekket: "\u0289\u02D0d\u025Bk\u02D0\u0259",
|
|
2804
|
+
udelelig: "\u0289\u02D0de\u02D0l\u0259\u0261",
|
|
2805
|
+
uden: "\u0289\u02D0d\u0259n",
|
|
2806
|
+
underjordisk: "\u0289n\u02D0\u0259\u027Eju\u02D0\u027Ed\u026Ask",
|
|
2807
|
+
v\u00E6rbitt: "v\xE6\u02D0\u027Eb\u026At",
|
|
2808
|
+
vejr: "ve\u02D0\u027E",
|
|
2809
|
+
Verdigrunnlaget: "ve\u02D0\u027Ed\u026A\u0261\u027E\u0289n\u02D0l\u0251\u02D0\u0261\u0259",
|
|
2810
|
+
Vigen: "vi\u02D0\u0261\u0259n",
|
|
2811
|
+
vil: "v\u026Al",
|
|
2812
|
+
Vindvet: "v\u026Andv\u0259",
|
|
2813
|
+
visst: "v\u026Ast",
|
|
2814
|
+
Vorherres: "vo\u02D0\u027Eh\u025B\u027E\u0259s",
|
|
2815
|
+
yderste: "y\u02D0d\u0259\u027Est\u0259"
|
|
2816
|
+
};
|
|
2817
|
+
|
|
2818
|
+
// src/overrides/nl.ts
|
|
2819
|
+
var nl = {
|
|
2820
|
+
aandelen: "/\u02C8a\u02D0nd\u0259l\u0259n/",
|
|
2821
|
+
achterhaalt: "/\u02C8\u0251xt\u0259rha\u02D0lt/",
|
|
2822
|
+
ademde: "/\u02C8a\u02D0d\u0259md\u0259/",
|
|
2823
|
+
// past tense: breathed
|
|
2824
|
+
alchemie: "/\u0251lx\u0259\u02C8mi/",
|
|
2825
|
+
allen: "/\u02C8\u0251l\u0259n/",
|
|
2826
|
+
alsoo: "/\u0251l\u02C8so\u02D0/",
|
|
2827
|
+
// archaic: thus
|
|
2828
|
+
Anton: "/\u02C8\u0251nt\u0254n/",
|
|
2829
|
+
// proper name
|
|
2830
|
+
balkons: "/b\u0251l\u02C8k\u0254ns/",
|
|
2831
|
+
bange: "/\u02C8b\u0251\u014B\u0259/",
|
|
2832
|
+
benaeuwde: "/b\u0259\u02C8na\u02D0ud\u0259/",
|
|
2833
|
+
// archaic: oppressed
|
|
2834
|
+
// New sample overrides
|
|
2835
|
+
benoemd: "/b\u0259\u02C8nu\u02D0mt/",
|
|
2836
|
+
bevonden: "/b\u0259\u02C8v\u0254nd\u0259n/",
|
|
2837
|
+
bleeke: "/\u02C8ble\u02D0k\u0259/",
|
|
2838
|
+
// archaic: pale
|
|
2839
|
+
bloedroze: "/\u02C8blud\u02CCro\u02D0z\u0259/",
|
|
2840
|
+
boomen: "/\u02C8bo\u02D0m\u0259n/",
|
|
2841
|
+
// archaic: bomen (trees)
|
|
2842
|
+
braamstoelen: "/\u02C8bra\u02D0m\u02CCstu\u02D0l\u0259n/",
|
|
2843
|
+
// blackberry bushes
|
|
2844
|
+
bureautje: "/by\u02C8ro\u02D0tj\u0259/",
|
|
2845
|
+
burgery: "/b\u028Fr\u0263\u0259\u02C8r\u025Bi/",
|
|
2846
|
+
// archaic: burgerij
|
|
2847
|
+
dagelix: "/\u02C8da\u02D0\u0263\u0259l\u026Aks/",
|
|
2848
|
+
// archaic: dagelijks
|
|
2849
|
+
dagschemer: "/\u02C8d\u0251\u0263\u02CCsxe\u02D0m\u0259r/",
|
|
2850
|
+
dengenen: "/d\u025Bn\u02C8\u0263e\u02D0n\u0259n/",
|
|
2851
|
+
// archaic: those
|
|
2852
|
+
dese: "/\u02C8de\u02D0z\u0259/",
|
|
2853
|
+
// archaic: deze
|
|
2854
|
+
deselve: "/d\u0259\u02C8z\u025Blv\u0259/",
|
|
2855
|
+
// archaic: dezelfde
|
|
2856
|
+
dien: "/di\u02D0n/",
|
|
2857
|
+
// archaic dative: that
|
|
2858
|
+
draaide: "/\u02C8dra\u02D0id\u0259/",
|
|
2859
|
+
duitsen: "/\u02C8d\u0153yts\u0259n/",
|
|
2860
|
+
// archaic: German
|
|
2861
|
+
eenvouds: "/\u02C8e\u02D0nv\u0251uts/",
|
|
2862
|
+
// archaic: simplicity
|
|
2863
|
+
eert: "/e\u02D0rt/",
|
|
2864
|
+
egters: "/\u02C8\u025B\u0263t\u0259rs/",
|
|
2865
|
+
// proper name
|
|
2866
|
+
engelen: "/\u02C8\u025B\u014B\u0259l\u0259n/",
|
|
2867
|
+
erbarremt: "/\u025Br\u02C8b\u0251r\u0259mt/",
|
|
2868
|
+
// archaic: have mercy
|
|
2869
|
+
erembodegem: "/\u02C8e\u02D0r\u0259m\u02CCbo\u02D0d\u025B\u0263\u0259m/",
|
|
2870
|
+
erkers: "/\u02C8\u025Brk\u0259rs/",
|
|
2871
|
+
flauwe: "/\u02C8fl\u0251u\u0259/",
|
|
2872
|
+
frits: "/fr\u026Ats/",
|
|
2873
|
+
gekend: "/\u0263\u0259\u02C8k\u025Bnt/",
|
|
2874
|
+
// past participle: known
|
|
2875
|
+
gekomen: "/\u0263\u0259\u02C8ko\u02D0m\u0259n/",
|
|
2876
|
+
gekund: "/\u0263\u0259\u02C8k\u028Fnt/",
|
|
2877
|
+
gemeenschappelijks: "/\u0263\u0259\u02C8me\u02D0nsx\u0251p\u0259l\u0259ks/",
|
|
2878
|
+
// archaic genitive
|
|
2879
|
+
geschrey: "/\u0263\u0259\u02C8sxr\u025Bi/",
|
|
2880
|
+
// archaic: outcry
|
|
2881
|
+
gestelt: "/\u0263\u0259\u02C8st\u025Blt/",
|
|
2882
|
+
// archaic: placed
|
|
2883
|
+
geverfd: "/\u0263\u0259\u02C8v\u025Brft/",
|
|
2884
|
+
// painted, dyed
|
|
2885
|
+
gewone: "/\u0263\u0259\u02C8wo\u02D0n\u0259/",
|
|
2886
|
+
ghewelt: "/\u0263\u0259\u02C8w\u025Blt/",
|
|
2887
|
+
// archaic: violence
|
|
2888
|
+
godt: "/\u0263\u0254t/",
|
|
2889
|
+
// archaic: God
|
|
2890
|
+
goedkope: "/\u02C8\u0263utk\u02B0o\u02D0p\u0259/",
|
|
2891
|
+
graaft: "/\u0263ra\u02D0ft/",
|
|
2892
|
+
groeiden: "/\u02C8\u0263ru\u02D0id\u0259n/",
|
|
2893
|
+
grooten: "/\u02C8\u0263ro\u02D0t\u0259n/",
|
|
2894
|
+
// archaic: great
|
|
2895
|
+
Haarlem: "/\u02C8ha\u02D0rl\u0259m/",
|
|
2896
|
+
// city name
|
|
2897
|
+
had: "/h\u0251t/",
|
|
2898
|
+
hadden: "/\u02C8h\u0251d\u0259n/",
|
|
2899
|
+
hare: "/\u02C8ha\u02D0r\u0259/",
|
|
2900
|
+
// archaic: her
|
|
2901
|
+
hele: "/\u02C8he\u02D0l\u0259/",
|
|
2902
|
+
hemelsche: "/\u02C8he\u02D0m\u0259lsx\u0259/",
|
|
2903
|
+
// archaic: heavenly
|
|
2904
|
+
henri: "/\u0251\u0303\u02C8ri/",
|
|
2905
|
+
// French name
|
|
2906
|
+
herinner: "/h\u025B\u02C8r\u026An\u0259r/",
|
|
2907
|
+
hispanje: "/h\u026As\u02C8p\u0251\u0272\u0259/",
|
|
2908
|
+
// archaic: Spain
|
|
2909
|
+
hooft: "/ho\u02D0ft/",
|
|
2910
|
+
// archaic: hoofd
|
|
2911
|
+
hooren: "/\u02C8ho\u02D0r\u0259n/",
|
|
2912
|
+
// archaic: horen
|
|
2913
|
+
immense: "/\u026A\u02C8m\u025Bns\u0259/",
|
|
2914
|
+
inni: "/\u02C8\u026Ani/",
|
|
2915
|
+
// proper name (Mulisch)
|
|
2916
|
+
kapellekensbaan: "/ka\u02C8p\u025Bl\u0259k\u0259ns\u02CCba\u02D0n/",
|
|
2917
|
+
kennelick: "/\u02C8k\u025Bn\u0259l\u0259k/",
|
|
2918
|
+
// archaic: evident
|
|
2919
|
+
kraanwagentje: "/\u02C8kra\u02D0n\u02CCwa\u02D0\u0263\u0259ntj\u0259/",
|
|
2920
|
+
laatsten: "/\u02C8la\u02D0tst\u0259n/",
|
|
2921
|
+
lande: "/\u02C8l\u0251nd\u0259/",
|
|
2922
|
+
// archaic dative: land
|
|
2923
|
+
lauriergracht: "/l\u0251u\u02C8ri\u02D0r\u0263r\u0251xt/",
|
|
2924
|
+
lesen: "/\u02C8le\u02D0z\u0259n/",
|
|
2925
|
+
// archaic: lezen
|
|
2926
|
+
lesten: "/\u02C8l\u025Bst\u0259n/",
|
|
2927
|
+
// archaic: last
|
|
2928
|
+
louterende: "/\u02C8l\u0251ut\u0259r\u025Bnd\u0259/",
|
|
2929
|
+
mooiste: "/\u02C8mo\u02D0ist\u0259/",
|
|
2930
|
+
// superlative: prettiest
|
|
2931
|
+
my: "/m\u025Bi/",
|
|
2932
|
+
// archaic: mij
|
|
2933
|
+
nassouwe: "/n\u0251\u02C8s\u0251u\u0259/",
|
|
2934
|
+
// archaic: Nassau
|
|
2935
|
+
niemandsbos: "/\u02C8ni\u02D0m\u0251nts\u02CCb\u0254s/",
|
|
2936
|
+
// nobody's forest
|
|
2937
|
+
oeroeg: "/\u02C8uru\u0263/",
|
|
2938
|
+
// proper name (Haasse)
|
|
2939
|
+
ondersaten: "/\u02C8\u0254nd\u0259r\u02CCza\u02D0t\u0259n/",
|
|
2940
|
+
// archaic: subjects
|
|
2941
|
+
ontwaakte: "/\u0254nt\u02C8wa\u02D0kt\u0259/",
|
|
2942
|
+
onverveerd: "/\u02CC\u0254nv\u0259r\u02C8ve\u02D0rt/",
|
|
2943
|
+
opgerezen: "/\u02C8\u0254p\u0263\u0259\u02CCre\u02D0z\u0259n/",
|
|
2944
|
+
osewoudt: "/\u02C8o\u02D0z\u0259\u02CCw\u0251ut/",
|
|
2945
|
+
// proper name (Hermans)
|
|
2946
|
+
ouders: "/\u02C8\u0251ud\u0259rs/",
|
|
2947
|
+
philips: "/\u02C8fil\u026Aps/",
|
|
2948
|
+
plachten: "/\u02C8pl\u0251xt\u0259n/",
|
|
2949
|
+
// archaic: used to
|
|
2950
|
+
pleegde: "/\u02C8ple\u02D0\u0263d\u0259/",
|
|
2951
|
+
po\u00EBtische: "/po\u02C8e\u02D0t\u026Asx\u0259/",
|
|
2952
|
+
prince: "/\u02C8pr\u026Ans\u0259/",
|
|
2953
|
+
// archaic: prins
|
|
2954
|
+
prinse: "/\u02C8pr\u026Ans\u0259/",
|
|
2955
|
+
// archaic variant
|
|
2956
|
+
reed: "/re\u02D0t/",
|
|
2957
|
+
romans: "/ro\u02C8m\u0251ns/",
|
|
2958
|
+
saluyt: "/sa\u02D0\u02C8l\u0153yt/",
|
|
2959
|
+
// archaic: greeting
|
|
2960
|
+
sarphatistraat: "/s\u0251r\u02C8fa\u02D0tistra\u02D0t/",
|
|
2961
|
+
// street name (Nescio)
|
|
2962
|
+
scherpste: "/\u02C8sx\u025Brpst\u0259/",
|
|
2963
|
+
schilderskade: "/\u02C8sx\u026Ald\u0259rs\u02CCka\u02D0d\u0259/",
|
|
2964
|
+
// street name
|
|
2965
|
+
Seynaeve: "/\u02C8s\u025Bina\u02D0v\u0259/",
|
|
2966
|
+
// Belgian proper name
|
|
2967
|
+
sien: "/si\u02D0n/",
|
|
2968
|
+
// archaic: zien
|
|
2969
|
+
smalle: "/\u02C8sm\u0251l\u0259/",
|
|
2970
|
+
// inflected: narrow
|
|
2971
|
+
spoorwegzate: "/\u02C8spo\u02D0r\u02CCw\u025Bxza\u02D0t\u0259/",
|
|
2972
|
+
// archaic: railway junction
|
|
2973
|
+
Steenwijk: "/\u02C8ste\u02D0n\u028B\u025Bik/",
|
|
2974
|
+
// place name
|
|
2975
|
+
stonden: "/\u02C8st\u0254nd\u0259n/",
|
|
2976
|
+
stopte: "/\u02C8st\u0254pt\u0259/",
|
|
2977
|
+
tamarindeboomen: "/ta\u02D0ma\u02D0\u02C8r\u026And\u0259\u02CCbo\u02D0m\u0259n/",
|
|
2978
|
+
// archaic: tamarind trees
|
|
2979
|
+
tegenwoordighe: "/\u02C8te\u02D0\u0263\u0259n\u02CCwo\u02D0rd\u0259\u0263\u0259/",
|
|
2980
|
+
// archaic
|
|
2981
|
+
terechtgekomen: "/t\u0259\u02C8r\u025Bxt\u0263\u0259\u02CCko\u02D0m\u0259n/",
|
|
2982
|
+
termurenlaan: "/t\u025Br\u02C8my\u02D0r\u0259n\u02CCla\u02D0n/",
|
|
2983
|
+
terugdenk: "/t\u0259\u02C8r\u028Fxd\u025B\u014Bk/",
|
|
2984
|
+
// compound: think back
|
|
2985
|
+
torentjes: "/\u02C8to\u02D0r\u0259ntj\u0259s/",
|
|
2986
|
+
toverplaatjes: "/\u02C8to\u02D0v\u0259r\u02CCpla\u02D0tj\u0259s/",
|
|
2987
|
+
// magic pictures
|
|
2988
|
+
tracht: "/tr\u0251xt/",
|
|
2989
|
+
// verb: try
|
|
2990
|
+
tragische: "/\u02C8tra\u02D0\u0263\u026Asx\u0259/",
|
|
2991
|
+
tusschen: "/\u02C8t\u028Fsx\u0259n/",
|
|
2992
|
+
// archaic: tussen (between)
|
|
2993
|
+
velden: "/\u02C8v\u025Bld\u0259n/",
|
|
2994
|
+
vermoordde: "/v\u0259r\u02C8mo\u02D0rd\u0259/",
|
|
2995
|
+
verschijnt: "/v\u0259r\u02C8sx\u025Bint/",
|
|
2996
|
+
// appears
|
|
2997
|
+
vlamde: "/\u02C8vl\u0251md\u0259/",
|
|
2998
|
+
vloog: "/vlo\u02D0x/",
|
|
2999
|
+
volcx: "/v\u0254lks/",
|
|
3000
|
+
// archaic: volks
|
|
3001
|
+
volle: "/\u02C8v\u0254l\u0259/",
|
|
3002
|
+
voorschoten: "/\u02C8vo\u02D0rsxo\u02D0t\u0259n/",
|
|
3003
|
+
// place name
|
|
3004
|
+
voorzomermiddag: "/\u02C8vo\u02D0r\u02CCzo\u02D0m\u0259r\u02CCm\u026Ad\u0251x/",
|
|
3005
|
+
vroege: "/\u02C8vru\u0263\u0259/",
|
|
3006
|
+
warme: "/\u02C8v\u0251rm\u0259/",
|
|
3007
|
+
weerd: "/we\u02D0rt/",
|
|
3008
|
+
// archaic: waard
|
|
3009
|
+
weggegaan: "/\u02C8v\u025Bx\u0263\u0259\u02CC\u0263a\u02D0n/",
|
|
3010
|
+
werd: "/w\u025Brt/",
|
|
3011
|
+
werkmenschen: "/\u02C8w\u025Brk\u02CCm\u025Bnsx\u0259n/",
|
|
3012
|
+
// archaic: working people
|
|
3013
|
+
"west-java": "/\u02CCw\u025Bst\u02C8ja\u02D0va/",
|
|
3014
|
+
wintrop: "/\u02C8v\u026Antr\u0254p/",
|
|
3015
|
+
// proper name (Mulisch)
|
|
3016
|
+
wonderlijker: "/\u02C8\u028B\u0254nd\u0259rl\u025Bik\u0259r/",
|
|
3017
|
+
// comparative: more wonderful
|
|
3018
|
+
woonde: "/\u02C8wo\u02D0nd\u0259/",
|
|
3019
|
+
wordt: "/w\u0254rt/",
|
|
3020
|
+
yegelick: "/\u02C8je\u02D0\u0263\u0259l\u026Ak/",
|
|
3021
|
+
// archaic: iedereen
|
|
3022
|
+
zakkende: "/\u02C8z\u0251k\u0259nd\u0259/",
|
|
3023
|
+
// sinking
|
|
3024
|
+
zekere: "/\u02C8ze\u02D0k\u0259r\u0259/",
|
|
3025
|
+
// a certain
|
|
3026
|
+
zijne: "/\u02C8z\u025Bin\u0259/",
|
|
3027
|
+
// archaic: zijn
|
|
3028
|
+
zocht: "/z\u0254xt/",
|
|
3029
|
+
zuivere: "/\u02C8z\u0153yv\u0259r\u0259/",
|
|
3030
|
+
// pure (inflected)
|
|
3031
|
+
zulke: "/\u02C8z\u028Flk\u0259/"
|
|
3032
|
+
};
|
|
3033
|
+
|
|
3034
|
+
// src/overrides/or.ts
|
|
3035
|
+
var or_ = {
|
|
3036
|
+
// Bande Utkala Janani (Odia anthem)
|
|
3037
|
+
\u0B05\u0B19\u0B4D\u0B17\u0B47: "\u0254\u014B\u0261e",
|
|
3038
|
+
// Jagannath Das — Odia Bhagabata
|
|
3039
|
+
\u0B05\u0B28\u0B3E\u0B26\u0B3F: "\u0254na\u02D0d\u032Ai",
|
|
3040
|
+
\u0B05\u0B2A\u0B4D\u0B30\u0B2E\u0B3F\u0B24: "\u0254p\u027E\u0254mit\u032A\u0254",
|
|
3041
|
+
\u0B05\u0B30\u0B4D\u0B25\u0B47: "\u0254\u027Et\u032A\u02B0e",
|
|
3042
|
+
\u0B05\u0B36\u0B47\u0B37: "\u0254\u0283e\u0282\u0254",
|
|
3043
|
+
\u0B06\u0B17\u0B41\u0B01: "a\u02D0\u0261\u0169",
|
|
3044
|
+
\u0B06\u0B24\u0B4D\u0B2E\u0B40\u0B5F: "a\u02D0t\u032Ami\u02D0j\u0254",
|
|
3045
|
+
\u0B06\u0B30\u0B24: "a\u02D0\u027E\u0254t\u032A\u0254",
|
|
3046
|
+
// Gangadhar Meher — Tapaswinee
|
|
3047
|
+
\u0B06\u0B39\u0B4D\u0B32\u0B3E\u0B26: "a\u02D0hl\u032Aa\u02D0d\u032A\u0254",
|
|
3048
|
+
\u0B07\u0B28\u0B4D\u0B26\u0B4D\u0B30\u0B28\u0B40\u0B33: "ind\u032A\u027E\u0254ni\u02D0l\u032A\u0254",
|
|
3049
|
+
\u0B09\u0B24\u0B4D\u0B15\u0B33\u0B2D\u0B41\u0B2C\u0B28\u0B47: "ut\u032Ak\u0254l\u032A\u0254b\u02B1ub\u0254ne",
|
|
3050
|
+
\u0B09\u0B24\u0B4D\u0B15\u0B33\u0B30: "ut\u032Ak\u0254l\u032A\u0254\u027E\u0254",
|
|
3051
|
+
// Upendra Bhanja — Baidehisha Bilasa
|
|
3052
|
+
\u0B09\u0B26\u0B4D\u0B2D\u0B2C: "ud\u032Ab\u02B1\u0254b\u0254",
|
|
3053
|
+
\u0B0F\u0B23\u0B47: "e\u0273e",
|
|
3054
|
+
\u0B14\u0B30\u0B38\u0B41: "\u0254w\u027E\u0254su",
|
|
3055
|
+
// Rangabati (folk song)
|
|
3056
|
+
\u0B15\u0B28\u0B15\u0B32\u0B24\u0B3E: "k\u0254n\u0254k\u0254l\u032A\u0254t\u032Aa\u02D0",
|
|
3057
|
+
\u0B15\u0B33: "k\u0254l\u032A\u0254",
|
|
3058
|
+
\u0B15\u0B39\u0B32\u0B4B: "k\u0254h\u0254l\u032Ao\u02D0",
|
|
3059
|
+
\u0B15\u0B3E\u0B24\u0B30: "ka\u02D0t\u032A\u0254\u027E\u0254",
|
|
3060
|
+
// Fakir Mohan Senapati — Chha Mana Atha Guntha
|
|
3061
|
+
\u0B15\u0B3E\u0B30\u0B2C\u0B3E\u0B30: "ka\u02D0\u027E\u0254ba\u02D0\u027E\u0254",
|
|
3062
|
+
\u0B15\u0B3E\u0B30\u0B3E\u0B26\u0B23\u0B4D\u0B21\u0B47: "ka\u02D0\u027Ea\u02D0d\u032A\u0254\u0273\u0256e",
|
|
3063
|
+
\u0B15\u0B3F\u0B2E\u0B4D\u0B2A\u0B3E: "kimpa\u02D0",
|
|
3064
|
+
\u0B15\u0B43\u0B37\u0B4D\u0B23\u0B2A\u0B3E\u0B26: "k\u027Eu\u0282\u0273\u0254pa\u02D0d\u032A\u0254",
|
|
3065
|
+
\u0B15\u0B47\u0B2C\u0B3E: "keba\u02D0",
|
|
3066
|
+
\u0B15\u0B47\u0B36\u0B3E: "ke\u0283a\u02D0",
|
|
3067
|
+
\u0B15\u0B4B\u0B36: "ko\u02D0\u0283\u0254",
|
|
3068
|
+
// Madhusudan Das — Utkala Santana
|
|
3069
|
+
\u0B17\u0B19\u0B4D\u0B17\u0B3E: "\u0261\u0254\u014B\u0261a\u02D0",
|
|
3070
|
+
\u0B17\u0B33\u0B41\u0B1B\u0B3F: "\u0261\u0254l\u032Aut\u0361\u0283\u02B0i",
|
|
3071
|
+
\u0B17\u0B40\u0B24\u0B28\u0B3E\u0B26: "\u0261i\u02D0t\u032A\u0254na\u02D0d\u032A\u0254",
|
|
3072
|
+
\u0B17\u0B41\u0B23\u0B3E\u0B33\u0B5F: "\u0261u\u0273a\u02D0l\u032A\u0254j\u0254",
|
|
3073
|
+
\u0B17\u0B41\u0B01\u0B25\u0B3E: "\u0261\u0169t\u032A\u02B0a\u02D0",
|
|
3074
|
+
\u0B17\u0B4B: "\u0261o\u02D0",
|
|
3075
|
+
\u0B17\u0B4B\u0B26\u0B3E\u0B2C\u0B30\u0B40: "\u0261o\u02D0d\u032Aa\u02D0b\u0254\u027Ei\u02D0",
|
|
3076
|
+
\u0B18\u0B1F\u0B47: "\u0261\u02B1\u0254\u0288e",
|
|
3077
|
+
\u0B18\u0B41\u0B2E\u0B41\u0B38\u0B30\u0B05\u0B27\u0B3F\u0B2A: "\u0261\u02B1umus\u0254\u027E\u0254d\u032A\u02B1ip\u0254",
|
|
3078
|
+
\u0B1A\u0B33\u0B47: "t\u0361\u0283\u0254l\u032Ae",
|
|
3079
|
+
\u0B1A\u0B33\u0B47\u0B28\u0B3E\u0B39\u0B3F\u0B01: "t\u0361\u0283\u0254l\u032Aena\u02D0h\u0129",
|
|
3080
|
+
\u0B1A\u0B3E\u0B30\u0B41: "t\u0361\u0283a\u02D0\u027Eu",
|
|
3081
|
+
\u0B1A\u0B3E\u0B32\u0B3F\u0B2F\u0B3E\u0B06\u0B28\u0B4D\u0B24\u0B41: "t\u0361\u0283a\u02D0l\u032Aija\u02D0a\u02D0nt\u032Au",
|
|
3082
|
+
\u0B1C\u0B17\u0B28\u0B4D\u0B28\u0B3E\u0B25\u0B47: "d\u0361\u0292\u0254\u0261\u0254nna\u02D0t\u032A\u02B0e",
|
|
3083
|
+
\u0B1C\u0B28\u0B4D\u0B2E\u0B15\u0B3E\u0B33\u0B30\u0B41: "d\u0361\u0292\u0254nm\u0254ka\u02D0l\u032A\u0254\u027Eu",
|
|
3084
|
+
\u0B1C\u0B2E\u0B3F\u0B26\u0B3E\u0B30: "d\u0361\u0292\u0254mid\u032Aa\u02D0\u027E\u0254",
|
|
3085
|
+
\u0B1C\u0B3F\u0B24: "d\u0361\u0292it\u032A\u0254",
|
|
3086
|
+
\u0B1C\u0B4D\u0B5F\u0B4B\u0B24\u0B3F\u0B30\u0B4D\u0B2E\u0B5F\u0B40: "d\u0361\u0292jo\u02D0t\u032Ai\u027Em\u0254ji\u02D0",
|
|
3087
|
+
\u0B24\u0B26\u0B4D\u0B27\u0B24: "t\u032A\u0254d\u032Ad\u032A\u02B1\u0254t\u032A\u0254",
|
|
3088
|
+
\u0B24\u0B28\u0B41\u0B15\u0B3E\u0B28\u0B4D\u0B24\u0B3F: "t\u032A\u0254nuka\u02D0nt\u032Ai",
|
|
3089
|
+
\u0B24\u0B30\u0B19\u0B4D\u0B17\u0B47: "t\u032A\u0254\u027E\u0254\u014B\u0261e",
|
|
3090
|
+
\u0B24\u0B30\u0B3F\u0B32\u0B47: "t\u032A\u0254\u027Eil\u032Ae",
|
|
3091
|
+
\u0B24\u0B3E\u0B17\u0B47: "t\u032Aa\u02D0\u0261e",
|
|
3092
|
+
\u0B24\u0B41\u0B39\u0B3F: "t\u032Auhi",
|
|
3093
|
+
\u0B24\u0B4B\u0B39\u0B30: "t\u032Ao\u02D0h\u0254\u027E\u0254",
|
|
3094
|
+
// Fakir Mohan Senapati — Odia Kahiki Daridra
|
|
3095
|
+
\u0B26\u0B30\u0B3F\u0B26\u0B4D\u0B30\u0B24\u0B3E: "d\u032A\u0254\u027Eid\u032A\u027E\u0254t\u032Aa\u02D0",
|
|
3096
|
+
\u0B26\u0B40\u0B30\u0B4D\u0B18\u0B3F\u0B15\u0B3E: "d\u032Ai\u02D0\u027E\u0261\u02B1ika\u02D0",
|
|
3097
|
+
\u0B26\u0B40\u0B30\u0B4D\u0B18\u0B47: "d\u032Ai\u02D0\u027E\u0261\u02B1e",
|
|
3098
|
+
\u0B26\u0B41\u0B03\u0B16: "d\u032Auk\u02B1\u0254",
|
|
3099
|
+
\u0B26\u0B41\u0B30\u0B4D\u0B17\u0B24\u0B3F\u0B30: "d\u032Au\u027E\u0261\u0254t\u032Ai\u027E\u0254",
|
|
3100
|
+
\u0B26\u0B47\u0B36\u0B2C\u0B3E\u0B38\u0B40: "d\u032Ae\u0283\u0254ba\u02D0si\u02D0",
|
|
3101
|
+
\u0B26\u0B47\u0B36\u0B2E\u0B3E\u0B1F\u0B3F\u0B30\u0B47: "d\u032Ae\u0283\u0254ma\u02D0\u0288i\u027Ee",
|
|
3102
|
+
\u0B26\u0B4D\u0B5F\u0B41\u0B24\u0B3F: "d\u032Ajut\u032Ai",
|
|
3103
|
+
\u0B27\u0B28\u0B1E\u0B4D\u0B1C\u0B5F: "d\u032A\u02B1\u0254n\u0254\u0272d\u0361\u0292\u0254j\u0254",
|
|
3104
|
+
\u0B27\u0B3E\u0B28\u0B30: "d\u032A\u02B1a\u02D0n\u0254\u027E\u0254",
|
|
3105
|
+
\u0B28: "n\u0254",
|
|
3106
|
+
\u0B28\u0B17\u0B26: "n\u0254\u0261\u0254d\u032A\u0254",
|
|
3107
|
+
\u0B28\u0B28\u0B4D\u0B26\u0B28: "n\u0254nd\u032A\u0254n\u0254",
|
|
3108
|
+
\u0B28\u0B2E\u0B07\u0B01: "n\u0254m\u0254\u0129",
|
|
3109
|
+
\u0B28\u0B30\u0B4D\u0B15\u0B47: "n\u0254\u027Eke",
|
|
3110
|
+
\u0B28\u0B3E\u0B30\u0B40\u0B19\u0B4D\u0B15: "na\u02D0\u027Ei\u02D0\u014Bk\u0254",
|
|
3111
|
+
\u0B28\u0B3F\u0B30\u0B28\u0B4D\u0B24\u0B30\u0B47: "ni\u027E\u0254nt\u032A\u0254\u027Ee",
|
|
3112
|
+
\u0B28\u0B3F\u0B39\u0B3F\u0B24: "nihit\u032A\u0254",
|
|
3113
|
+
\u0B28\u0B40\u0B33\u0B3E\u0B2E\u0B4D\u0B2C\u0B41: "ni\u02D0l\u032Aa\u02D0mbu",
|
|
3114
|
+
\u0B28\u0B43\u0B2A: "n\u027Eup\u0254",
|
|
3115
|
+
\u0B2A\u0B21\u0B3C\u0B3F\u0B25\u0B3E\u0B09: "p\u0254\u0256\u032Ait\u032A\u02B0a\u02D0u",
|
|
3116
|
+
\u0B2A\u0B26\u0B41: "p\u0254d\u032Au",
|
|
3117
|
+
\u0B2A\u0B26\u0B47: "p\u0254d\u032Ae",
|
|
3118
|
+
\u0B2A\u0B5F\u0B30\u0B47: "p\u0254j\u0254\u027Ee",
|
|
3119
|
+
\u0B2A\u0B30\u0B3E: "p\u0254\u027Ea\u02D0",
|
|
3120
|
+
\u0B2A\u0B42\u0B30\u0B4D\u0B2C\u0B2A\u0B41\u0B30\u0B41\u0B37: "pu\u02D0\u027Eb\u0254pu\u027Eu\u0282\u0254",
|
|
3121
|
+
\u0B2A\u0B4B\u0B37\u0B23: "po\u02D0\u0282\u0254\u0273\u0254",
|
|
3122
|
+
\u0B2A\u0B4D\u0B30\u0B1C\u0B4D\u0B1E\u0B3E: "p\u027E\u0254d\u0361\u0292\u0272a\u02D0",
|
|
3123
|
+
\u0B2A\u0B4D\u0B30\u0B2C\u0B28\u0B4D\u0B27\u0B47: "p\u027E\u0254b\u0254nd\u032A\u02B1e",
|
|
3124
|
+
\u0B2A\u0B4D\u0B30\u0B3E\u0B23\u0B40\u0B2E\u0B3E\u0B28\u0B19\u0B4D\u0B15: "p\u027Ea\u02D0\u0273i\u02D0ma\u02D0n\u0254\u014Bk\u0254",
|
|
3125
|
+
\u0B2C\u0B28\u0B2D\u0B42\u0B2E\u0B3F: "b\u0254n\u0254b\u02B1u\u02D0mi",
|
|
3126
|
+
\u0B2C\u0B28\u0B4D\u0B26\u0B28: "b\u0254nd\u032A\u0254n\u0254",
|
|
3127
|
+
\u0B2C\u0B28\u0B4D\u0B26\u0B47: "b\u0254nd\u032Ae",
|
|
3128
|
+
\u0B2C\u0B30\u0B39\u0B3F\u0B2C\u0B02\u0B36\u0B47: "b\u0254\u027E\u0254hib\u0254\u014B\u0283e",
|
|
3129
|
+
\u0B2C\u0B30\u0B4D\u0B23\u0B4D\u0B23\u0B07: "b\u0254\u027E\u0273\u0273\u0254i",
|
|
3130
|
+
\u0B2C\u0B3F\u0B1A\u0B3E\u0B30\u0B47: "bit\u0361\u0283a\u02D0\u027Ee",
|
|
3131
|
+
\u0B2C\u0B3F\u0B28\u0B41: "binu",
|
|
3132
|
+
\u0B2C\u0B3F\u0B2C\u0B47\u0B15: "bibek",
|
|
3133
|
+
\u0B2C\u0B3F\u0B33\u0B3E\u0B38: "bil\u032Aa\u02D0s\u0254",
|
|
3134
|
+
\u0B2C\u0B3F\u0B36\u0B3F\u0B37\u0B4D\u0B1F\u0B47: "bi\u0283i\u0282\u0288e",
|
|
3135
|
+
\u0B2C\u0B3F\u0B39\u0B19\u0B4D\u0B17\u0B47: "bih\u0254\u014B\u0261e",
|
|
3136
|
+
\u0B2C\u0B47\u0B28\u0B3F: "beni",
|
|
3137
|
+
\u0B2C\u0B47\u0B36\u0B3E: "be\u0283a\u02D0",
|
|
3138
|
+
\u0B2C\u0B4B\u0B32\u0B3E\u0B09: "bo\u02D0l\u032Aa\u02D0u",
|
|
3139
|
+
\u0B2C\u0B4D\u0B5F\u0B25\u0B3E: "bj\u0254t\u032A\u02B0a\u02D0",
|
|
3140
|
+
\u0B2C\u0B4D\u0B30\u0B1C: "b\u027E\u0254d\u0361\u0292\u0254",
|
|
3141
|
+
\u0B2D\u0B23\u0B4D\u0B21\u0B3E\u0B30: "b\u02B1\u0254\u0273\u0256a\u02D0\u027E\u0254",
|
|
3142
|
+
\u0B2D\u0B3E\u0B24\u0B43\u0B2D\u0B3E\u0B2C: "b\u02B1a\u02D0t\u032A\u027Eub\u02B1a\u02D0b\u0254",
|
|
3143
|
+
\u0B2D\u0B3E\u0B37\u0B2E\u0B5F\u0B40: "b\u02B1a\u02D0\u0282\u0254m\u0254ji\u02D0",
|
|
3144
|
+
\u0B2D\u0B40\u0B30\u0B41: "b\u02B1i\u02D0\u027Eu",
|
|
3145
|
+
\u0B2D\u0B42\u0B27\u0B30\u0B2E\u0B3E\u0B33\u0B3E: "b\u02B1u\u02D0d\u032A\u02B1\u0254\u027E\u0254ma\u02D0l\u032Aa\u02D0",
|
|
3146
|
+
\u0B2D\u0B47\u0B26\u0B3F: "b\u02B1ed\u032Ai",
|
|
3147
|
+
\u0B2E\u0B15\u0B30\u0B28\u0B4D\u0B26: "m\u0254k\u0254\u027E\u0254nd\u032A\u0254",
|
|
3148
|
+
\u0B2E\u0B19\u0B4D\u0B17\u0B30\u0B3E\u0B1C: "m\u0254\u014B\u0261\u0254\u027Ea\u02D0d\u0361\u0292\u0254",
|
|
3149
|
+
\u0B2E\u0B28\u0B30\u0B47: "m\u0254n\u0254\u027Ee",
|
|
3150
|
+
\u0B2E\u0B28\u0B4B\u0B39\u0B30: "m\u0254no\u02D0h\u0254\u027E\u0254",
|
|
3151
|
+
\u0B2E\u0B2B\u0B38\u0B32\u0B30: "m\u0254p\u02B0\u0254s\u0254l\u032A\u0254\u027E\u0254",
|
|
3152
|
+
\u0B2E\u0B30\u0B3E\u0B33\u0B2E\u0B3E\u0B33\u0B3F\u0B28\u0B40: "m\u0254\u027Ea\u02D0l\u032A\u0254ma\u02D0l\u032Aini\u02D0",
|
|
3153
|
+
\u0B2E\u0B30\u0B4D\u0B2F\u0B4D\u0B5F\u0B3E\u0B26\u0B3E: "m\u0254\u027Ed\u0292ja\u02D0d\u032Aa\u02D0",
|
|
3154
|
+
\u0B2E\u0B39\u0B3E\u0B1C\u0B28: "m\u0254ha\u02D0d\u0361\u0292\u0254n\u0254",
|
|
3155
|
+
\u0B2E\u0B39\u0B3E\u0B1C\u0B28\u0B40: "m\u0254ha\u02D0d\u0361\u0292\u0254ni\u02D0",
|
|
3156
|
+
\u0B2E\u0B3E\u0B32\u0B47: "ma\u02D0l\u032Ae",
|
|
3157
|
+
\u0B2E\u0B3F\u0B36\u0B41: "mi\u0283u",
|
|
3158
|
+
\u0B2E\u0B41\u0B16\u0B30\u0B3F\u0B24: "muk\u02B1\u0254\u027Eit\u032A\u0254",
|
|
3159
|
+
\u0B30\u0B19\u0B4D\u0B17\u0B2C\u0B24\u0B40: "\u027E\u0254\u014B\u0261\u0254b\u0254t\u032Ai\u02D0",
|
|
3160
|
+
\u0B30\u0B1C\u0B3E: "\u027E\u0254d\u0361\u0292a\u02D0",
|
|
3161
|
+
\u0B30\u0B3E\u0B1C\u0B3F\u0B24: "\u027Ea\u02D0d\u0361\u0292it\u032A\u0254",
|
|
3162
|
+
\u0B30\u0B3E\u0B28\u0B3F: "\u027Ea\u02D0ni",
|
|
3163
|
+
\u0B30\u0B4B\u0B26\u0B28: "\u027Eo\u02D0d\u032A\u0254n\u0254",
|
|
3164
|
+
\u0B36\u0B41\u0B23\u0B3E\u0B2F\u0B3E\u0B0F: "\u0283u\u0273a\u02D0ja\u02D0e",
|
|
3165
|
+
\u0B36\u0B41\u0B26\u0B4D\u0B27: "\u0283ud\u032Ad\u032A\u02B1\u0254",
|
|
3166
|
+
\u0B36\u0B41\u0B2D: "\u0283ub\u02B1\u0254",
|
|
3167
|
+
\u0B36\u0B41\u0B2D\u0B4D\u0B30: "\u0283ub\u02B1\u027E\u0254",
|
|
3168
|
+
\u0B36\u0B4B\u0B2D\u0B3E\u0B30: "\u0283ob\u02B1a\u02D0\u027E\u0254",
|
|
3169
|
+
\u0B38\u0B2A\u0B28: "s\u0254p\u0254n\u0254",
|
|
3170
|
+
\u0B38\u0B39\u0B41: "s\u0254hu",
|
|
3171
|
+
\u0B38\u0B3E\u0B1C\u0B47: "sa\u02D0d\u0361\u0292e",
|
|
3172
|
+
\u0B38\u0B41\u0B24\u0B3E: "sut\u032Aa\u02D0",
|
|
3173
|
+
\u0B38\u0B41\u0B2A\u0B4D\u0B30\u0B15\u0B3E\u0B36: "sup\u027E\u0254ka\u02D0\u0283\u0254",
|
|
3174
|
+
\u0B38\u0B47\u0B2E\u0B3E\u0B28\u0B19\u0B4D\u0B15\u0B20\u0B3E\u0B30\u0B47: "sema\u02D0n\u0254\u014Bk\u0254\u0288\u02B0a\u02D0\u027Ee",
|
|
3175
|
+
\u0B38\u0B4B\u0B26\u0B30: "so\u02D0d\u032A\u0254\u027E\u0254",
|
|
3176
|
+
\u0B38\u0B4D\u0B2B\u0B41\u0B30\u0B4D\u0B24\u0B4D\u0B24\u0B3F: "sp\u02B0u\u027Et\u032At\u032Ai",
|
|
3177
|
+
"\u0B38\u0B4D\u0B71\u0B3E\u0B27\u0B40\u0B28": "swa\u02D0d\u032A\u02B1i\u02D0n\u0254",
|
|
3178
|
+
\u0B39\u0B3E\u0B38\u0B2E\u0B5F\u0B40: "ha\u02D0s\u0254m\u0254ji\u02D0",
|
|
3179
|
+
\u0B39\u0B3F\u0B24\u0B47: "hit\u032Ae",
|
|
3180
|
+
\u0B39\u0B43\u0B26\u0B5F\u0B30\u0B47: "h\u027Eud\u032A\u0254j\u0254\u027Ee",
|
|
3181
|
+
\u0B39\u0B43\u0B26\u0B47: "h\u027Eud\u032Ae",
|
|
3182
|
+
\u0B39\u0B47\u0B32: "hel\u032A\u0254",
|
|
3183
|
+
\u0B39\u0B47\u0B32\u0B3E\u0B24: "hel\u032Aa\u02D0t\u032A\u0254"
|
|
3184
|
+
};
|
|
3185
|
+
|
|
3186
|
+
// src/overrides/pt.ts
|
|
3187
|
+
var pt = {
|
|
3188
|
+
a: "/a/",
|
|
3189
|
+
agora: "/a\u02C8\u0261o\u027Ea/",
|
|
3190
|
+
ainda: "/a\u02C8\u0129da/",
|
|
3191
|
+
alto: "/\u02C8awtu/",
|
|
3192
|
+
alvejei: "/awve\u02C8\u0292ej/",
|
|
3193
|
+
antes: "/\u02C8\xE3t\u0283is/",
|
|
3194
|
+
// New sample overrides
|
|
3195
|
+
Antigamente: "/\xE3t\u0283i\u0261a\u02C8m\u1EBDt\u0283i/",
|
|
3196
|
+
aparecesse: "/apa\u027Ee\u02C8sesi/",
|
|
3197
|
+
assembleia: "/as\u1EBD\u02C8bleja/",
|
|
3198
|
+
autom\u00F3veis: "/awto\u02C8m\u0254vejs/",
|
|
3199
|
+
autor: "/aw\u02C8to\u027E/",
|
|
3200
|
+
aventura: "/av\u1EBD\u02C8tu\u027Ea/",
|
|
3201
|
+
bater: "/ba\u02C8tex/",
|
|
3202
|
+
beija: "/\u02C8bej\u0292a/",
|
|
3203
|
+
bem: "/b\u1EBDj/",
|
|
3204
|
+
calmo: "/\u02C8kawmu/",
|
|
3205
|
+
cama: "/\u02C8k\xE3ma/",
|
|
3206
|
+
certa: "/\u02C8s\u025B\u027Eta/",
|
|
3207
|
+
chamam: "/\u02C8\u0283\xE3m\xE3w/",
|
|
3208
|
+
como: "/\u02C8komu/",
|
|
3209
|
+
contraparente: "/k\xF5t\u027Eapa\u02C8\u027E\u1EBDt\u0283i/",
|
|
3210
|
+
costume: "/kos\u02C8t\u0169mi/",
|
|
3211
|
+
cotovelo: "/koto\u02C8velu/",
|
|
3212
|
+
cruzarmos: "/k\u027Euz\u02C8a\u027Emus/",
|
|
3213
|
+
d: "/de/",
|
|
3214
|
+
// abbreviation for Dom
|
|
3215
|
+
de: "/d\u0292i/",
|
|
3216
|
+
dei: "/dej/",
|
|
3217
|
+
dele: "/\u02C8deli/",
|
|
3218
|
+
dentro: "/\u02C8d\u1EBDt\u027Eu/",
|
|
3219
|
+
deu: "/dew/",
|
|
3220
|
+
dia: "/\u02C8d\u0292ia/",
|
|
3221
|
+
direita: "/d\u0292i\u02C8\u027Eejta/",
|
|
3222
|
+
direito: "/d\u0292i\u02C8\u027Eejtu/",
|
|
3223
|
+
disco: "/\u02C8d\u0292isku/",
|
|
3224
|
+
disposto: "/d\u0292is\u02C8postu/",
|
|
3225
|
+
disse: "/\u02C8d\u0292isi/",
|
|
3226
|
+
dois: "/dojs/",
|
|
3227
|
+
\u00E9: "/\u025B/",
|
|
3228
|
+
emprenhou: "/\u1EBDp\u027Ee\u02C8\u0272ow/",
|
|
3229
|
+
encante: "/\u1EBD\u02C8k\xE3t\u0283i/",
|
|
3230
|
+
entre: "/\u02C8\u1EBDt\u027Ei/",
|
|
3231
|
+
escolherei: "/isko\u028Ee\u02C8\u027Eej/",
|
|
3232
|
+
esf\u00EDngico: "/es\u02C8f\u0129\u0292iku/",
|
|
3233
|
+
eterna: "/e\u02C8t\u025B\u027Ena/",
|
|
3234
|
+
eu: "/ew/",
|
|
3235
|
+
Europa: "/ew\u02C8\u027E\u0254pa/",
|
|
3236
|
+
face: "/\u02C8fasi/",
|
|
3237
|
+
faixas: "/\u02C8faj\u0283as/",
|
|
3238
|
+
falsa: "/\u02C8fawsa/",
|
|
3239
|
+
faz: "/fas/",
|
|
3240
|
+
fim: "/f\u0129/",
|
|
3241
|
+
Fita: "/\u02C8fita/",
|
|
3242
|
+
fita: "/\u02C8fita/",
|
|
3243
|
+
fitando: "/fi\u02C8t\xE3du/",
|
|
3244
|
+
flameja: "/fla\u02C8me\u0292a/",
|
|
3245
|
+
focinhando: "/fosi\u02C8\u0272\xE3du/",
|
|
3246
|
+
foi: "/foj/",
|
|
3247
|
+
fossem: "/\u02C8fos\u1EBDj/",
|
|
3248
|
+
fosses: "/\u02C8fosis/",
|
|
3249
|
+
fragorosas: "/f\u027Ea\u0261o\u02C8\u027Eozas/",
|
|
3250
|
+
francisco: "/f\u027E\xE3\u02C8sisku/",
|
|
3251
|
+
frente: "/\u02C8f\u027E\u1EBDt\u0283i/",
|
|
3252
|
+
f\u00FAlgidos: "/\u02C8fuw\u0292idus/",
|
|
3253
|
+
gente: "/\u02C8\u0292\u1EBDt\u0283i/",
|
|
3254
|
+
girassol: "/\u0292i\u027Ea\u02C8s\u0254w/",
|
|
3255
|
+
gosto: "/\u02C8\u0261ostu/",
|
|
3256
|
+
grandes: "/\u02C8\u0261\u027E\xE3d\u0292is/",
|
|
3257
|
+
havia: "/a\u02C8via/",
|
|
3258
|
+
hei: "/ej/",
|
|
3259
|
+
hesitei: "/ezi\u02C8tej/",
|
|
3260
|
+
homens: "/\u02C8\xF5m\u1EBDjs/",
|
|
3261
|
+
houve: "/\u02C8ovi/",
|
|
3262
|
+
individuais: "/\u0129d\u0292ividu\u02C8ajs/",
|
|
3263
|
+
instante: "/\u0129s\u02C8t\xE3t\u0283i/",
|
|
3264
|
+
instituir: "/\u0129st\u0283itu\u02C8i\u027E/",
|
|
3265
|
+
isso: "/\u02C8isu/",
|
|
3266
|
+
jos\u00E9: "/\u0292o\u02C8z\u025B/",
|
|
3267
|
+
liberdade: "/libe\u027E\u02C8dad\u0292i/",
|
|
3268
|
+
lisboa: "/liz\u02C8boa/",
|
|
3269
|
+
louca: "/\u02C8loka/",
|
|
3270
|
+
louvor: "/lo\u02C8vo\u027E/",
|
|
3271
|
+
lua: "/\u02C8lua/",
|
|
3272
|
+
maior: "/ma\u02C8jo\u027E/",
|
|
3273
|
+
mais: "/majs/",
|
|
3274
|
+
mal: "/maw/",
|
|
3275
|
+
margens: "/\u02C8ma\u027E\u0292\u1EBDjs/",
|
|
3276
|
+
maus: "/maws/",
|
|
3277
|
+
mesti\u00E7ara: "/mesti\u02C8sa\u027Ea/",
|
|
3278
|
+
m\u00E9todo: "/\u02C8m\u025Btudu/",
|
|
3279
|
+
mil: "/miw/",
|
|
3280
|
+
mim: "/m\u0129/",
|
|
3281
|
+
morder: "/mo\u027E\u02C8dex/",
|
|
3282
|
+
morte: "/\u02C8m\u0254\u027Et\u0283i/",
|
|
3283
|
+
morto: "/\u02C8mo\u027Etu/",
|
|
3284
|
+
mostra: "/\u02C8m\u0254st\u027Ea/",
|
|
3285
|
+
mulher: "/mu\u02C8\u028E\u025Bx/",
|
|
3286
|
+
nacional: "/nasio\u02C8naw/",
|
|
3287
|
+
nascimento: "/nasi\u02C8m\u1EBDtu/",
|
|
3288
|
+
nele: "/\u02C8neli/",
|
|
3289
|
+
nem: "/n\u1EBDj/",
|
|
3290
|
+
nesse: "/\u02C8nesi/",
|
|
3291
|
+
noite: "/\u02C8nojt\u0283i/",
|
|
3292
|
+
nonada: "/no\u02C8nada/",
|
|
3293
|
+
// nothing (Guimarães Rosa)
|
|
3294
|
+
nosso: "/\u02C8nosu/",
|
|
3295
|
+
novo: "/\u02C8novu/",
|
|
3296
|
+
num: "/n\u0169/",
|
|
3297
|
+
o: "/u/",
|
|
3298
|
+
\u00F3: "/\u0254/",
|
|
3299
|
+
ondas: "/\u02C8\xF5das/",
|
|
3300
|
+
onde: "/\u02C8\xF5d\u0292i/",
|
|
3301
|
+
ou: "/ow/",
|
|
3302
|
+
ousadia: "/oza\u02C8d\u0292ia/",
|
|
3303
|
+
outono: "/ow\u02C8tonu/",
|
|
3304
|
+
outra: "/\u02C8owt\u027Ea/",
|
|
3305
|
+
outro: "/\u02C8owt\u027Eu/",
|
|
3306
|
+
para: "/\u02C8pa\u027Ea/",
|
|
3307
|
+
parte: "/\u02C8pa\u027Et\u0283i/",
|
|
3308
|
+
passadeira: "/pasa\u02C8dej\u027Ea/",
|
|
3309
|
+
pode: "/\u02C8p\u0254d\u0292i/",
|
|
3310
|
+
portugal: "/po\u027Etu\u02C8\u0261aw/",
|
|
3311
|
+
posso: "/\u02C8posu/",
|
|
3312
|
+
posta: "/\u02C8p\u0254sta/",
|
|
3313
|
+
povo: "/\u02C8povu/",
|
|
3314
|
+
praia: "/\u02C8p\u027Eaja/",
|
|
3315
|
+
prometia: "/p\u027Eome\u02C8t\u0283ia/",
|
|
3316
|
+
quanto: "/\u02C8kw\xE3tu/",
|
|
3317
|
+
quarto: "/\u02C8kwa\u027Etu/",
|
|
3318
|
+
quer: "/k\u025B\u027E/",
|
|
3319
|
+
querer: "/ke\u02C8\u027Eex/",
|
|
3320
|
+
ramalhete: "/xama\u02C8\u028Eet\u0283i/",
|
|
3321
|
+
// mansion name (Eça de Queirós)
|
|
3322
|
+
real: "/xe\u02C8aw/",
|
|
3323
|
+
rebentavam: "/xeb\u1EBD\u02C8tav\xE3w/",
|
|
3324
|
+
rei: "/xej/",
|
|
3325
|
+
reino: "/\u02C8xejnu/",
|
|
3326
|
+
remota: "/xe\u02C8m\u0254ta/",
|
|
3327
|
+
retumbante: "/xet\u0169\u02C8b\xE3t\u0283i/",
|
|
3328
|
+
rir: "/xi\u027E/",
|
|
3329
|
+
riso: "/\u02C8xizu/",
|
|
3330
|
+
rosto: "/\u02C8xostu/",
|
|
3331
|
+
rua: "/\u02C8xua/",
|
|
3332
|
+
s: "/s\xE3w/",
|
|
3333
|
+
// abbreviation for São
|
|
3334
|
+
saber: "/sa\u02C8bex/",
|
|
3335
|
+
sal: "/saw/",
|
|
3336
|
+
sei: "/sej/",
|
|
3337
|
+
sem: "/s\u1EBDj/",
|
|
3338
|
+
sempre: "/\u02C8s\u1EBDp\u027Ei/",
|
|
3339
|
+
senhor: "/se\u02C8\u0272o\u027E/",
|
|
3340
|
+
sequer: "/se\u02C8k\u025B\u027E/",
|
|
3341
|
+
ser: "/\u02C8sex/",
|
|
3342
|
+
sim: "/s\u0129/",
|
|
3343
|
+
simples: "/\u02C8s\u0129plis/",
|
|
3344
|
+
Sob: "/s\u0254b/",
|
|
3345
|
+
sociais: "/sosi\u02C8ajs/",
|
|
3346
|
+
sol: "/s\u0254w/",
|
|
3347
|
+
sua: "/\u02C8sua/",
|
|
3348
|
+
sublimaram: "/subli\u02C8ma\u027E\xE3w/",
|
|
3349
|
+
tanto: "/\u02C8t\xE3tu/",
|
|
3350
|
+
taprobana: "/tap\u027Eo\u02C8b\xE3na/",
|
|
3351
|
+
// ancient Sri Lanka
|
|
3352
|
+
te: "/t\u0283i/",
|
|
3353
|
+
tempo: "/\u02C8t\u1EBDpu/",
|
|
3354
|
+
ter: "/tex/",
|
|
3355
|
+
tive: "/\u02C8t\u0283ivi/",
|
|
3356
|
+
"toldam-lhe": "/\u02C8towd\xE3w\u028Ei/",
|
|
3357
|
+
trapiche: "/t\u027Ea\u02C8pi\u0283i/",
|
|
3358
|
+
triste: "/\u02C8t\u027Eist\u0283i/",
|
|
3359
|
+
um: "/\u0169/",
|
|
3360
|
+
uma: "/\u02C8\u0169ma/",
|
|
3361
|
+
universo: "/uni\u02C8v\u025B\u027Esu/",
|
|
3362
|
+
uso: "/\u02C8uzu/",
|
|
3363
|
+
vale: "/\u02C8vali/",
|
|
3364
|
+
verde: "/\u02C8ve\u027Ed\u0292i/",
|
|
3365
|
+
verdes: "/\u02C8ve\u027Ed\u0292is/",
|
|
3366
|
+
vez: "/ves/",
|
|
3367
|
+
viagem: "/vi\u02C8a\u0292\u1EBDj/",
|
|
3368
|
+
"viv\xEA-lo": "/vi\u02C8velu/",
|
|
3369
|
+
"vou-me": "/\u02C8vomi/",
|
|
3370
|
+
zelo: "/\u02C8zelu/",
|
|
3371
|
+
zomba: "/\u02C8z\xF5ba/"
|
|
3372
|
+
};
|
|
3373
|
+
|
|
3374
|
+
// src/overrides/ro.ts
|
|
3375
|
+
var ro = {
|
|
3376
|
+
// New sample overrides
|
|
3377
|
+
admirabil\u0103: "/admira\u02C8bil\u0259/",
|
|
3378
|
+
alearg\u0103: "/a\u02C8le\u032Farg\u0259/",
|
|
3379
|
+
alergi: "/a\u02C8lerd\u0292\u02B2/",
|
|
3380
|
+
Armadia: "/ar\u02C8madja/",
|
|
3381
|
+
b\u0103ie\u0219a\u0219: "/b\u0259je\u02C8\u0283a\u0283/",
|
|
3382
|
+
"ca-ntr-un": "/kan\u02C8trun/",
|
|
3383
|
+
C\u00E2rlibaba: "/k\u0268rli\u02C8baba/",
|
|
3384
|
+
ciob\u0103nei: "/t\u0283ob\u0259\u02C8nej/",
|
|
3385
|
+
ciople\u0219te: "/t\u0283o\u02C8ple\u0283te/",
|
|
3386
|
+
cirezilor: "/t\u0283i\u02C8rezilor/",
|
|
3387
|
+
cl\u0103di: "/kl\u0259\u02C8d\u02B2i/",
|
|
3388
|
+
codrul: "/\u02C8kodrul/",
|
|
3389
|
+
crengi: "/krend\u0292\u02B2/",
|
|
3390
|
+
"croie\u0219te-\u021Bi": "/kro\u02C8je\u0283tet\u0283i/",
|
|
3391
|
+
curat\u0103: "/ku\u02C8rat\u0259/",
|
|
3392
|
+
deshame: "/des\u02C8hame/",
|
|
3393
|
+
desprind: "/des\u02C8prind/",
|
|
3394
|
+
"De\u0219teapt\u0103-te": "/de\u0283\u02C8te\u032Fapt\u0259te/",
|
|
3395
|
+
fecioara: "/fet\u0283o\u02C8ara/",
|
|
3396
|
+
ferestrelor: "/fe\u02C8restrelor/",
|
|
3397
|
+
furtunoas\u0103: "/furtu\u02C8no\u032Fas\u0259/",
|
|
3398
|
+
gem\u00E2nd: "/d\u0292e\u02C8m\u0268nd/",
|
|
3399
|
+
grupurile: "/\u02C8\u0261rupurile/",
|
|
3400
|
+
homer: "/\u02C8homer/",
|
|
3401
|
+
humule\u0219tii: "/humu\u02C8le\u0283ti.i/",
|
|
3402
|
+
\u00EEncepusem: "/\u0268nt\u0283e\u02C8pusem/",
|
|
3403
|
+
\u00EEntinsese: "/\u0268ntin\u02C8sese/",
|
|
3404
|
+
\u00EEntov\u0103r\u0103\u0219ind: "/\u0268ntov\u0259r\u0259\u02C8\u0283ind/",
|
|
3405
|
+
intrat\u0103: "/in\u02C8trat\u0259/",
|
|
3406
|
+
Jidovi\u021Ba: "/\u0292ido\u02C8vits\u0259/",
|
|
3407
|
+
"l-a": "/la/",
|
|
3408
|
+
"le-nt\xE2lnesc": "/lent\u0268l\u02C8nesk/",
|
|
3409
|
+
luasem: "/lu\u02C8asem/",
|
|
3410
|
+
lustruie\u0219te: "/lustru\u02C8je\u0283te/",
|
|
3411
|
+
"m\xE2n\u0103-n": "/\u02C8m\u0268n\u0259n/",
|
|
3412
|
+
"m\xE2ndr\u0103-n": "/\u02C8m\u0268ndr\u0259n/",
|
|
3413
|
+
morminte: "/mor\u02C8minte/",
|
|
3414
|
+
muceg\u0103it\u0103: "/mut\u0283e\u0261\u0259\u02C8it\u0259/",
|
|
3415
|
+
na\u021Bional\u0103: "/natsio\u02C8nal\u0259/",
|
|
3416
|
+
Obloanele: "/oblo\u032Fa\u02C8nele/",
|
|
3417
|
+
"ochiu-adormit": "/\u02C8okju ador\u02C8mit/",
|
|
3418
|
+
paraschi: "/para\u02C8ski/",
|
|
3419
|
+
p\u0103rin\u021Bilor: "/p\u0259\u02C8rintsilor/",
|
|
3420
|
+
"pe-ntinsele": "/pentin\u02C8sele/",
|
|
3421
|
+
Prahovei: "/pra\u02C8hovej/",
|
|
3422
|
+
r\u0103sar: "/r\u0259\u02C8sar/",
|
|
3423
|
+
r\u0103zuie\u0219te: "/r\u0259zu\u02C8je\u0283te/",
|
|
3424
|
+
r\u0103zvr\u0103tit\u0103: "/r\u0259zvr\u0259\u02C8tit\u0259/",
|
|
3425
|
+
sc\u00E2r\u021B\u00E2iau: "/sk\u0268rts\u0268\u02C8jaw/",
|
|
3426
|
+
sc\u00E2r\u021B\u00E2ie: "/sk\u0268r\u02C8ts\u0268je/",
|
|
3427
|
+
scoal\u0103: "/\u02C8sko\u032Fal\u0259/",
|
|
3428
|
+
scrumit: "/skru\u02C8mit/",
|
|
3429
|
+
scuipat: "/skui\u02C8pat/",
|
|
3430
|
+
"se-nchine": "/sen\u02C8kine/",
|
|
3431
|
+
sfin\u021Bi: "/sfints\u02B2/",
|
|
3432
|
+
sicriele: "/si\u02C8krijele/",
|
|
3433
|
+
socoate: "/so\u02C8ko\u032Fate/",
|
|
3434
|
+
Some\u0219ul: "/\u02C8some\u0283ul/",
|
|
3435
|
+
\u0219oseaua: "/\u0283o\u02C8se\u032Fawa/",
|
|
3436
|
+
stam: "/stam/",
|
|
3437
|
+
strivesc: "/stri\u02C8vesk/",
|
|
3438
|
+
sublocotenent: "/sublokote\u02C8nent/",
|
|
3439
|
+
t\u0103ind: "/t\u0259\u02C8ind/",
|
|
3440
|
+
tainele: "/\u02C8tajnele/",
|
|
3441
|
+
"te-ad\xE2ncir\u0103": "/tead\u0268n\u02C8t\u0283ir\u0259/",
|
|
3442
|
+
"toate-s": "/\u02C8to\u032Fates/",
|
|
3443
|
+
tr\u0103snesc: "/tr\u0259s\u02C8nesk/",
|
|
3444
|
+
ucid: "/u\u02C8t\u0283id/",
|
|
3445
|
+
uneltele: "/u\u02C8neltele/",
|
|
3446
|
+
v\u0103ii: "/\u02C8v\u0259ji/",
|
|
3447
|
+
v\u00E2nturat\u0103: "/v\u0268ntu\u02C8rat\u0259/",
|
|
3448
|
+
ve\u0219tm\u00E2nt: "/ve\u0283t\u02C8m\u0268nt/",
|
|
3449
|
+
"vijelia-ngrozitoare": "/vi\u02C8\u0292eli.a \u014B\u0261rozi\u02C8to\u032Fare/",
|
|
3450
|
+
"Vino-n": "/vi\u02C8non/"
|
|
3451
|
+
};
|
|
3452
|
+
|
|
3453
|
+
// src/overrides/sv.ts
|
|
3454
|
+
var sv = {
|
|
3455
|
+
\u00E4dlaste: "/\u02C8\u025B\u02D0dlast\u025B/",
|
|
3456
|
+
\u00E4ngder: "/\u02C8\u025B\u014B\u02D0d\u025Br/",
|
|
3457
|
+
// New entries for updated samples
|
|
3458
|
+
anl\u00E4nt: "/\u02C8an\u02D0l\u025Bnt/",
|
|
3459
|
+
arbetsrum: "/\u02C8ar\u02D0bets\u02CCr\u0275m/",
|
|
3460
|
+
arvedel: "/\u02C8ar\u02D0ve\u02CCde\u02D0l/",
|
|
3461
|
+
atenare: "/at\u025B\u02C8n\u0251\u02D0r\u025B/",
|
|
3462
|
+
\u00E5ttiotv\u00E5: "/\u02C8\u0254t\u02D0\u026A\u0254\u02CCtvo\u02D0/",
|
|
3463
|
+
\u00E5ttitalet: "/\u02C8\u0254t\u02D0\u026A\u02CCt\u0251\u02D0l\u025Bt/",
|
|
3464
|
+
barnkullarnas: "/\u02C8b\u0251\u02D0\u0273\u02CCk\u0275l\u02D0arnas/",
|
|
3465
|
+
// New sample overrides
|
|
3466
|
+
Birck: "/b\u026Ark/",
|
|
3467
|
+
b\u00F6nderna: "/\u02C8b\u0153nd\u025B\u0273a/",
|
|
3468
|
+
brynja: "/\u02C8bryn\u02D0ja/",
|
|
3469
|
+
bryt: "/bry\u02D0t/",
|
|
3470
|
+
d\u00E4rn\u00E4st: "/\u02C8d\xE6\u02D0\u0273\u025Bst/",
|
|
3471
|
+
dimmors: "/\u02C8d\u026Am\u02D0\u0254\u0282/",
|
|
3472
|
+
eget: "/\u02C8e\u02D0\u0261\u025Bt/",
|
|
3473
|
+
f\u00E5gels\u00E5ng: "/\u02C8fo\u02D0\u0261\u025Bl\u02CCs\u0254\u014B\u02D0/",
|
|
3474
|
+
fj\u00E4llh\u00F6ga: "/\u02C8fj\u025Bl\u02D0\u02CCh\xF8\u02D0\u0261a/",
|
|
3475
|
+
fjol\u00E5rets: "/\u02C8fju\u02D0l\u02CCo\u02D0r\u025Bts/",
|
|
3476
|
+
fornstora: "/\u02C8f\u0254\u02D0\u0273\u02CCstu\u02D0ra/",
|
|
3477
|
+
f\u00F6rt\u00E4tas: "/f\u0153r\u02C8t\u025B\u02D0tas/",
|
|
3478
|
+
fredagskv\u00E4ll: "/\u02C8fre\u02D0da\u0261s\u02CCkv\u025Bl\u02D0/",
|
|
3479
|
+
gamla: "/\u02C8\u0261am\u02D0la/",
|
|
3480
|
+
gl\u00E4djerika: "/\u02C8\u0261l\u025Bd\u02D0j\u025B\u02CCri\u02D0ka/",
|
|
3481
|
+
haga: "/\u02C8h\u0251\u02D0\u0261a/",
|
|
3482
|
+
// place name
|
|
3483
|
+
halvm\u00F6rkret: "/\u02C8halv\u02CCm\u0153rkr\u025Bt/",
|
|
3484
|
+
"hedeby-\xF6n": "/\u02C8he\u02D0d\u025Bby\u02D0\u02CC\xF8\u02D0n/",
|
|
3485
|
+
// place name
|
|
3486
|
+
henrik: "/\u02C8h\u025Bnr\u026Ak/",
|
|
3487
|
+
// proper name
|
|
3488
|
+
h\u00F6llo: "/\u02C8h\xF8l\u02D0\u0254/",
|
|
3489
|
+
improvisationer: "/\u026Ampr\u0254v\u026Asa\u02C8\u0267u\u02D0n\u025Br/",
|
|
3490
|
+
Ingmarss\u00F6nerna: "/\u02C8\u026A\u014B\u02D0mar\u02CCs\xF8\u02D0n\u025B\u0273a/",
|
|
3491
|
+
j\u00E4ttestaden: "/\u02C8j\u025Bt\u02D0\u025B\u02CCst\u0251\u02D0d\u025Bn/",
|
|
3492
|
+
kriminalkommissarie: "/kr\u026Am\u026A\u02C8n\u0251\u02D0lk\u0254m\u026A\u02CCs\u0251\u02D0r\u026A\u025B/",
|
|
3493
|
+
kristina: "/kr\u026A\u02C8sti\u02D0na/",
|
|
3494
|
+
// proper name
|
|
3495
|
+
kv\u00E4llsm\u00F6rkret: "/\u02C8kv\u025Bl\u02D0s\u02CCm\u0153rkr\u025Bt/",
|
|
3496
|
+
l\u00E5ngstrump: "/\u02C8l\u0254\u014B\u02D0\u02CCstr\u0275mp/",
|
|
3497
|
+
linh\u00E5rig: "/\u02C8li\u02D0n\u02CCho\u02D0r\u026A\u0261/",
|
|
3498
|
+
ljusl\u00E5gor: "/\u02C8j\u0289\u02D0s\u02CClo\u02D0\u0261\u0254r/",
|
|
3499
|
+
l\u00F6vsamlingar: "/\u02C8l\xF8\u02D0v\u02CCsam\u02D0l\u026A\u014B\u02D0ar/",
|
|
3500
|
+
lyfte: "/\u02C8l\u028Fft\u025B/",
|
|
3501
|
+
lyftes: "/\u02C8l\u028Fft\u025Bs/",
|
|
3502
|
+
m\u00E4nniskor: "/\u02C8m\u025Bn\u02D0\u026A\u02CC\u0267\u0254r/",
|
|
3503
|
+
m\u00E4nniskors: "/\u02C8m\u025Bn\u02D0\u026A\u02CC\u0267\u0254\u0282/",
|
|
3504
|
+
Martin: "/\u02C8mar\u02D0t\u026An/",
|
|
3505
|
+
m\u00E4ssfall: "/\u02C8m\u025Bs\u02D0\u02CCfal\u02D0/",
|
|
3506
|
+
mosebacke: "/\u02C8mu\u02D0s\u025B\u02CCbak\u02D0\u025B/",
|
|
3507
|
+
new: "/nju\u02D0/",
|
|
3508
|
+
nilsson: "/\u02C8n\u026Al\u02D0s\u0254n/",
|
|
3509
|
+
// proper name
|
|
3510
|
+
nordamerika: "/\u02C8nu\u02D0\u0256a\u02CCme\u02D0r\u026Aka/",
|
|
3511
|
+
numret: "/\u02C8n\u0275m\u02D0r\u025Bt/",
|
|
3512
|
+
nytt: "/n\u028Ft\u02D0/",
|
|
3513
|
+
odygd: "/\u02C8u\u02D0\u02CCdy\u0261d/",
|
|
3514
|
+
ofruktsamt: "/\u02C8u\u02D0\u02CCfr\u0275ktsamt/",
|
|
3515
|
+
\u00F6mt\u00E5ligare: "/\u02C8\xF8m\u02D0\u02CCto\u02D0l\u026A\u0261ar\u025B/",
|
|
3516
|
+
\u00F6ppnad: "/\u02C8\u0153p\u02D0nad/",
|
|
3517
|
+
orimligheter: "/u\u02D0\u02C8ri\u02D0ml\u026A\u0261\u02CChe\u02D0t\u025Br/",
|
|
3518
|
+
oskar: "/\u02C8\u0254skar/",
|
|
3519
|
+
// proper name
|
|
3520
|
+
page: "/\u02C8p\u0251\u02D0\u0267\u0259/",
|
|
3521
|
+
p\u00E4rontr\u00E4d: "/\u02C8p\xE6\u02D0r\u0254n\u02CCtr\u025B\u02D0d/",
|
|
3522
|
+
paulun: "/pa\u02C8l\u0289\u02D0n/",
|
|
3523
|
+
presentpappret: "/pr\u025B\u02C8s\u025Bnt\u02CCpap\u02D0r\u025Bt/",
|
|
3524
|
+
proportionerad: "/pr\u0254p\u0254\u0282\u0254\u02C8ne\u02D0rad/",
|
|
3525
|
+
roligast: "/\u02C8ru\u02D0l\u026A\u0261ast/",
|
|
3526
|
+
r\u00F6tm\u00E5nadshetta: "/\u02C8r\xF8\u02D0t\u02CCmo\u02D0nads\u02CCh\u025Bt\u02D0a/",
|
|
3527
|
+
runor: "/\u02C8r\u0289\u02D0n\u0254r/",
|
|
3528
|
+
saffransblommorna: "/\u02C8saf\u02D0rans\u02CCbl\u0254m\u02D0\u0254\u0273a/",
|
|
3529
|
+
scharlakanssol: "/\u02C8\u0267\u0251\u02D0rlakans\u02CCsu\u02D0l/",
|
|
3530
|
+
si: "/si\u02D0/",
|
|
3531
|
+
siljan: "/\u02C8s\u026Al\u02D0jan/",
|
|
3532
|
+
silverne: "/\u02C8s\u026Alv\u025B\u0273\u025B/",
|
|
3533
|
+
sk\u00E4ms: "/\u0267\u025Bm\u02D0s/",
|
|
3534
|
+
sn\u00F6dropparna: "/\u02C8sn\xF8\u02D0\u02CCdr\u0254p\u02D0a\u0273a/",
|
|
3535
|
+
sommarmorgon: "/\u02C8s\u0254mar\u02CCm\u0254r\u0261\u0254n/",
|
|
3536
|
+
spiralgalax: "/sp\u026A\u02C8r\u0251\u02D0l\u0261a\u02CClaks/",
|
|
3537
|
+
t\u00E4r: "/t\u025B\u02D0r/",
|
|
3538
|
+
telefonsamtal: "/t\u025Bl\u025B\u02C8fo\u02D0n\u02CCsam\u02CCt\u0251\u02D0l/",
|
|
3539
|
+
tillaga: "/t\u026Al\u02C8l\u0251\u02D0\u0261a/",
|
|
3540
|
+
tjugosex: "/\u02C8\u0255\u0289\u02D0\u0261\u0254\u02CCs\u025Bks/",
|
|
3541
|
+
upp\u00E5: "/\u02C8\u0275p\u02D0o\u02D0/",
|
|
3542
|
+
uppgr\u00E4vda: "/\u02C8\u0275p\u02D0\u02CC\u0261r\u025B\u02D0vda/",
|
|
3543
|
+
utflyttade: "/\u02C8\u0289\u02D0t\u02CCfl\u028Ft\u02D0ad\u025B/",
|
|
3544
|
+
utsiktspunkt: "/\u02C8\u0289\u02D0t\u02CCs\u026Akts\u02CCp\u0275\u014Bkt/",
|
|
3545
|
+
v\u00E4lvuxen: "/\u02C8v\u025B\u02D0l\u02CCv\u0275ks\u025Bn/",
|
|
3546
|
+
vanger: "/\u02C8va\u014B\u02D0\u025Br/",
|
|
3547
|
+
// surname
|
|
3548
|
+
verklighetsgrund: "/\u02C8v\u025B\u02D0kl\u026A\u0261\u02CChe\u02D0ts\u02CC\u0261r\u0275nd/",
|
|
3549
|
+
vilka: "/\u02C8v\u026Al\u02D0ka/",
|
|
3550
|
+
vingad: "/\u02C8v\u026A\u014B\u02D0ad/",
|
|
3551
|
+
voro: "/\u02C8vu\u02D0r\u0254/",
|
|
3552
|
+
york: "/j\u0254rk/",
|
|
3553
|
+
zephyrens: "/s\u025B\u02C8fy\u02D0r\u025Bns/"
|
|
3554
|
+
};
|
|
3555
|
+
|
|
3556
|
+
// src/overrides/sw.ts
|
|
3557
|
+
var sw = {
|
|
3558
|
+
// G2P handles most Swahili words. These overrides cover edge cases
|
|
3559
|
+
// that G2P can't handle (Arabic loanwords, foreign names).
|
|
3560
|
+
// Arabic loanword with "qi" — G2P doesn't have a rule for 'q'
|
|
3561
|
+
sadiqi: "/sa\u02C8di\u02D0ki/",
|
|
3562
|
+
ulithiqi: "/uli\u02C8\u03B8iki/",
|
|
3563
|
+
// Arabic "kh" = /x/ (voiceless velar fricative), not /kh/
|
|
3564
|
+
usiikhini: "/usii\u02C8xini/",
|
|
3565
|
+
// Foreign name — 'c' alone is not a Swahili phoneme
|
|
3566
|
+
Victoria: "/vikt\u0254\u02C8\u027Eia/"
|
|
3567
|
+
};
|
|
3568
|
+
|
|
3569
|
+
// src/overrides/vi.ts
|
|
3570
|
+
var vi = {
|
|
3571
|
+
chinh: "/t\u0255\u026A\u014B\u02E7/",
|
|
3572
|
+
li\u00EAu: "/li\u0259w\u02E7/",
|
|
3573
|
+
ng\u00E1c: "/\u014Bak\u02E7\u02E5/",
|
|
3574
|
+
tru\u00E2n: "/\u0288w\u0259n\u02E7/"
|
|
3575
|
+
};
|
|
3576
|
+
|
|
3577
|
+
// src/foreign.ts
|
|
3578
|
+
var IPA_SLASH_RE = /^\/|\/$/g;
|
|
3579
|
+
var WHITESPACE_SPLIT_RE = /(\s+)/;
|
|
3580
|
+
var WHITESPACE_RE = /^\s+$/;
|
|
3581
|
+
var LEADING_NON_LETTER_RE = /^[^\p{L}\p{M}]/u;
|
|
3582
|
+
var TRAILING_NON_LETTER_RE = /[^\p{L}\p{M}]$/u;
|
|
3583
|
+
var CONTRACTION_SPLIT_RE = /(?<=['-])|(?=['-])/;
|
|
3584
|
+
var khmerSegmenter = typeof Intl !== "undefined" && typeof Intl.Segmenter === "function" ? new Intl.Segmenter("km", { granularity: "word" }) : void 0;
|
|
3585
|
+
function segmentKhmerText(text) {
|
|
3586
|
+
if (khmerSegmenter === void 0) {
|
|
3587
|
+
return text;
|
|
3588
|
+
}
|
|
3589
|
+
const normalized = text.replaceAll("\u200B", " ");
|
|
3590
|
+
const segments = [...khmerSegmenter.segment(normalized)];
|
|
3591
|
+
let result = "";
|
|
3592
|
+
for (let i = 0; i < segments.length; i++) {
|
|
3593
|
+
const seg = segments[i];
|
|
3594
|
+
result += seg.segment;
|
|
3595
|
+
const next = segments[i + 1];
|
|
3596
|
+
if (seg.isWordLike === true && next?.isWordLike === true) {
|
|
3597
|
+
result += " ";
|
|
3598
|
+
}
|
|
3599
|
+
}
|
|
3600
|
+
return result;
|
|
3601
|
+
}
|
|
3602
|
+
var LANGUAGES = [
|
|
3603
|
+
{ code: "ar", label: "Arabic" },
|
|
3604
|
+
{ code: "yue", label: "Cantonese" },
|
|
3605
|
+
{ code: "nl", label: "Dutch" },
|
|
3606
|
+
{ code: "eo", label: "Esperanto" },
|
|
3607
|
+
{ code: "fi", label: "Finnish" },
|
|
3608
|
+
{ code: "fr", label: "French" },
|
|
3609
|
+
{ code: "de", label: "German" },
|
|
3610
|
+
{ code: "is", label: "Icelandic" },
|
|
3611
|
+
{ code: "ja", label: "Japanese" },
|
|
3612
|
+
{ code: "km", label: "Khmer" },
|
|
3613
|
+
{ code: "ko", label: "Korean" },
|
|
3614
|
+
{ code: "ma", label: "Malay" },
|
|
3615
|
+
{ code: "zh", label: "Mandarin" },
|
|
3616
|
+
{ code: "nb", label: "Norwegian" },
|
|
3617
|
+
{ code: "or", label: "Odia" },
|
|
3618
|
+
{ code: "fa", label: "Persian" },
|
|
3619
|
+
{ code: "pt", label: "Portuguese" },
|
|
3620
|
+
{ code: "ro", label: "Romanian" },
|
|
3621
|
+
{ code: "es", label: "Spanish" },
|
|
3622
|
+
{ code: "sw", label: "Swahili" },
|
|
3623
|
+
{ code: "sv", label: "Swedish" },
|
|
3624
|
+
{ code: "vi", label: "Vietnamese" }
|
|
3625
|
+
];
|
|
3626
|
+
var IPA_WORD_OVERRIDES = {
|
|
3627
|
+
ar,
|
|
3628
|
+
de,
|
|
3629
|
+
eo,
|
|
3630
|
+
es,
|
|
3631
|
+
fa,
|
|
3632
|
+
fi,
|
|
3633
|
+
fr,
|
|
3634
|
+
is,
|
|
3635
|
+
ja,
|
|
3636
|
+
km,
|
|
3637
|
+
ko,
|
|
3638
|
+
ma,
|
|
3639
|
+
nb,
|
|
3640
|
+
nl,
|
|
3641
|
+
or: or_,
|
|
3642
|
+
pt,
|
|
3643
|
+
ro,
|
|
3644
|
+
sv,
|
|
3645
|
+
sw,
|
|
3646
|
+
vi
|
|
3647
|
+
};
|
|
3648
|
+
function ipaToIngglish(ipa) {
|
|
3649
|
+
const clean = ipa.replaceAll(IPA_SLASH_RE, "").replaceAll(".", "");
|
|
3650
|
+
const arpabet = ipaToArpabet(clean);
|
|
3651
|
+
return (0, import_phonemes3.arpabetToIngglish)(arpabet);
|
|
3652
|
+
}
|
|
3653
|
+
function lookupIpa(dict, word) {
|
|
3654
|
+
const { entries, lang } = dict;
|
|
3655
|
+
const override = getIpaOverride(lang, word) ?? getIpaOverride(lang, word.toLowerCase());
|
|
3656
|
+
if (override) {
|
|
3657
|
+
return override;
|
|
3658
|
+
}
|
|
3659
|
+
const lower = word.toLowerCase();
|
|
3660
|
+
const title = lower.charAt(0).toUpperCase() + lower.slice(1);
|
|
3661
|
+
const stripped = (0, import_normalize.stripDiacritics)(lower);
|
|
3662
|
+
if (entries[word] ?? entries[lower] ?? entries[title] ?? entries[stripped]) {
|
|
3663
|
+
return entries[word] ?? entries[lower] ?? entries[title] ?? entries[stripped];
|
|
3664
|
+
}
|
|
3665
|
+
if (lower.includes("\xDF")) {
|
|
3666
|
+
const ssLower = lower.replaceAll("\xDF", "ss");
|
|
3667
|
+
const ssTitle = ssLower.charAt(0).toUpperCase() + ssLower.slice(1);
|
|
3668
|
+
return entries[ssLower] ?? entries[ssTitle];
|
|
3669
|
+
}
|
|
3670
|
+
if (word.includes("'")) {
|
|
3671
|
+
const curly = word.replaceAll("'", "\u2019");
|
|
3672
|
+
const curlyLower = curly.toLowerCase();
|
|
3673
|
+
const curlyResult = entries[curly] ?? entries[curlyLower];
|
|
3674
|
+
if (curlyResult) {
|
|
3675
|
+
return curlyResult;
|
|
3676
|
+
}
|
|
3677
|
+
}
|
|
3678
|
+
if (Object.hasOwn(LEMMATIZERS, lang)) {
|
|
3679
|
+
const lemmaResult = LEMMATIZERS[lang](entries, lower);
|
|
3680
|
+
if (lemmaResult) {
|
|
3681
|
+
return lemmaResult;
|
|
3682
|
+
}
|
|
3683
|
+
}
|
|
3684
|
+
if (lang === "km") {
|
|
3685
|
+
const compound = lookupKhmerCompound(entries, word);
|
|
3686
|
+
if (compound !== void 0) {
|
|
3687
|
+
return compound;
|
|
3688
|
+
}
|
|
3689
|
+
}
|
|
3690
|
+
if (Object.hasOwn(G2P_CONVERTERS, lang)) {
|
|
3691
|
+
return G2P_CONVERTERS[lang](lower);
|
|
3692
|
+
}
|
|
3693
|
+
return void 0;
|
|
3694
|
+
}
|
|
3695
|
+
var khmerMergedDict;
|
|
3696
|
+
var khmerDictKeys;
|
|
3697
|
+
function applyDefaultStress(arpabet) {
|
|
3698
|
+
const hasStress = arpabet.some((p) => (0, import_phonemes3.isVowel)(p) && (0, import_phonemes3.getStress)(p) !== null);
|
|
3699
|
+
if (hasStress) {
|
|
3700
|
+
return arpabet;
|
|
3701
|
+
}
|
|
3702
|
+
const result = [...arpabet];
|
|
3703
|
+
for (let i = result.length - 1; i >= 0; i--) {
|
|
3704
|
+
if ((0, import_phonemes3.isVowel)(result[i])) {
|
|
3705
|
+
result[i] = result[i] + "1";
|
|
3706
|
+
break;
|
|
3707
|
+
}
|
|
3708
|
+
}
|
|
3709
|
+
return result;
|
|
3710
|
+
}
|
|
3711
|
+
function decomposeKhmer(dict, keys, remaining, acc) {
|
|
3712
|
+
if (remaining.length === 0) {
|
|
3713
|
+
return acc;
|
|
3714
|
+
}
|
|
3715
|
+
for (const key of keys) {
|
|
3716
|
+
const ipa = dict[key];
|
|
3717
|
+
if (remaining.startsWith(key) && ipa !== void 0) {
|
|
3718
|
+
const result = decomposeKhmer(dict, keys, remaining.slice(key.length), [...acc, ipa]);
|
|
3719
|
+
if (result !== null) {
|
|
3720
|
+
return result;
|
|
3721
|
+
}
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
return null;
|
|
3725
|
+
}
|
|
3726
|
+
function getIpaOverride(lang, word) {
|
|
3727
|
+
return IPA_WORD_OVERRIDES[lang]?.[word];
|
|
3728
|
+
}
|
|
3729
|
+
function ipaToFormat(ipa, format, lang) {
|
|
3730
|
+
const clean = ipa.replaceAll(IPA_SLASH_RE, "").replaceAll(".", "");
|
|
3731
|
+
const overrides = lang ? IPA_LANGUAGE_OVERRIDES[lang] : void 0;
|
|
3732
|
+
const arpabet = applyDefaultStress(ipaToArpabet(clean, overrides));
|
|
3733
|
+
return (0, import_phonemes3.arpabetToFormat)(arpabet, format, { disableRColoring: true });
|
|
3734
|
+
}
|
|
3735
|
+
function lookupKhmerCompound(entries, word) {
|
|
3736
|
+
if (khmerMergedDict === void 0) {
|
|
3737
|
+
khmerMergedDict = { ...entries };
|
|
3738
|
+
const overrides = IPA_WORD_OVERRIDES.km;
|
|
3739
|
+
if (overrides) {
|
|
3740
|
+
for (const [k, v] of Object.entries(overrides)) {
|
|
3741
|
+
khmerMergedDict[k] = v;
|
|
3742
|
+
}
|
|
3743
|
+
}
|
|
3744
|
+
}
|
|
3745
|
+
khmerDictKeys ?? (khmerDictKeys = Object.keys(khmerMergedDict).toSorted((a, b) => b.length - a.length));
|
|
3746
|
+
const parts = decomposeKhmer(khmerMergedDict, khmerDictKeys, word, []);
|
|
3747
|
+
if (parts === null || parts.length < 2) {
|
|
3748
|
+
return void 0;
|
|
3749
|
+
}
|
|
3750
|
+
return parts.map((p) => p.replaceAll(IPA_SLASH_RE, "")).join(" ");
|
|
3751
|
+
}
|
|
3752
|
+
var NOT_FOUND_MARKER = "\uFFFD";
|
|
3753
|
+
var SENTENCE_END_RE = /[.!?。!?]$/;
|
|
3754
|
+
function translateForeign(text, dict, format = "ingglish") {
|
|
3755
|
+
const tokens = translateForeignWithMapping(text, dict, format);
|
|
3756
|
+
return tokens.map((t) => !t.matched && t.isWord ? NOT_FOUND_MARKER + t.original : t.translated).join("");
|
|
3757
|
+
}
|
|
3758
|
+
function translateForeignWithMapping(text, dict, format = "ingglish") {
|
|
3759
|
+
const { lang } = dict;
|
|
3760
|
+
let atSentenceStart = true;
|
|
3761
|
+
const processed = lang === "km" ? segmentKhmerText(text) : text;
|
|
3762
|
+
const tokens = [];
|
|
3763
|
+
for (const segment of (0, import_normalize.normalizeApostrophes)(processed).split(WHITESPACE_SPLIT_RE)) {
|
|
3764
|
+
if (WHITESPACE_RE.test(segment)) {
|
|
3765
|
+
tokens.push({ isWord: false, matched: true, original: segment, translated: segment });
|
|
3766
|
+
continue;
|
|
3767
|
+
}
|
|
3768
|
+
if (!segment) {
|
|
3769
|
+
continue;
|
|
3770
|
+
}
|
|
3771
|
+
const leading = [];
|
|
3772
|
+
const trailing = [];
|
|
3773
|
+
let core = segment;
|
|
3774
|
+
while (core.length > 0 && LEADING_NON_LETTER_RE.test(core)) {
|
|
3775
|
+
leading.push(core[0]);
|
|
3776
|
+
core = core.slice(1);
|
|
3777
|
+
}
|
|
3778
|
+
while (core.length > 0 && TRAILING_NON_LETTER_RE.test(core)) {
|
|
3779
|
+
trailing.unshift(core.at(-1));
|
|
3780
|
+
core = core.slice(0, -1);
|
|
3781
|
+
}
|
|
3782
|
+
if (!core) {
|
|
3783
|
+
tokens.push({ isWord: false, matched: true, original: segment, translated: segment });
|
|
3784
|
+
continue;
|
|
3785
|
+
}
|
|
3786
|
+
let casePattern = (0, import_normalize.detectCasePattern)(core);
|
|
3787
|
+
const preservesCase = (0, import_phonemes3.getFormatPreservesCase)(format);
|
|
3788
|
+
if (atSentenceStart && preservesCase && casePattern === "lower" && isCaselessWord(core)) {
|
|
3789
|
+
casePattern = "capitalized";
|
|
3790
|
+
}
|
|
3791
|
+
atSentenceStart = SENTENCE_END_RE.test(trailing.join(""));
|
|
3792
|
+
const leadStr = leading.join("");
|
|
3793
|
+
const trailStr = trailing.join("");
|
|
3794
|
+
const ipa = lookupIpa(dict, core);
|
|
3795
|
+
if (ipa) {
|
|
3796
|
+
const translated = ipaToFormat(ipa, format, lang);
|
|
3797
|
+
const cased = preservesCase ? (0, import_normalize.applyCasePattern)(translated, casePattern) : translated;
|
|
3798
|
+
tokens.push({
|
|
3799
|
+
isWord: true,
|
|
3800
|
+
matched: true,
|
|
3801
|
+
original: segment,
|
|
3802
|
+
translated: leadStr + cased + trailStr
|
|
3803
|
+
});
|
|
3804
|
+
continue;
|
|
3805
|
+
}
|
|
3806
|
+
const parts = core.split(CONTRACTION_SPLIT_RE);
|
|
3807
|
+
if (parts.length > 1) {
|
|
3808
|
+
const partIpas = parts.map((part, i) => {
|
|
3809
|
+
if (part === "'" || part === "-") {
|
|
3810
|
+
return;
|
|
3811
|
+
}
|
|
3812
|
+
let ipa2;
|
|
3813
|
+
if (parts[i + 1] === "'") {
|
|
3814
|
+
ipa2 = lookupIpa(dict, part + "'");
|
|
3815
|
+
}
|
|
3816
|
+
ipa2 ?? (ipa2 = lookupIpa(dict, part));
|
|
3817
|
+
return ipa2;
|
|
3818
|
+
});
|
|
3819
|
+
const allFound = parts.every(
|
|
3820
|
+
(part, i) => part === "'" || part === "-" || partIpas[i] !== void 0
|
|
3821
|
+
);
|
|
3822
|
+
if (allFound) {
|
|
3823
|
+
const groups = [[]];
|
|
3824
|
+
for (const [i, part_] of parts.entries()) {
|
|
3825
|
+
const part = part_;
|
|
3826
|
+
if (part === "'") {
|
|
3827
|
+
continue;
|
|
3828
|
+
}
|
|
3829
|
+
if (part === "-") {
|
|
3830
|
+
groups.push([]);
|
|
3831
|
+
continue;
|
|
3832
|
+
}
|
|
3833
|
+
const ipa2 = partIpas[i];
|
|
3834
|
+
groups.at(-1).push(ipa2.replaceAll(IPA_SLASH_RE, "").replaceAll(".", ""));
|
|
3835
|
+
}
|
|
3836
|
+
const translated2 = groups.map((ipas) => ipaToFormat(ipas.join(""), format, lang)).join("-");
|
|
3837
|
+
const cased = preservesCase ? (0, import_normalize.applyCasePattern)(translated2, casePattern) : translated2;
|
|
3838
|
+
tokens.push({
|
|
3839
|
+
isWord: true,
|
|
3840
|
+
matched: true,
|
|
3841
|
+
original: segment,
|
|
3842
|
+
translated: leadStr + cased + trailStr
|
|
3843
|
+
});
|
|
3844
|
+
continue;
|
|
3845
|
+
}
|
|
3846
|
+
let isFirstPart = true;
|
|
3847
|
+
const translated = parts.map((part, i) => {
|
|
3848
|
+
if (part === "'" || part === "-") {
|
|
3849
|
+
return part;
|
|
3850
|
+
}
|
|
3851
|
+
const partCase = isFirstPart ? casePattern : (0, import_normalize.detectCasePattern)(part);
|
|
3852
|
+
isFirstPart = false;
|
|
3853
|
+
const partIpa = partIpas[i];
|
|
3854
|
+
if (partIpa) {
|
|
3855
|
+
const partTranslated = ipaToFormat(partIpa, format, lang);
|
|
3856
|
+
return preservesCase ? (0, import_normalize.applyCasePattern)(partTranslated, partCase) : partTranslated;
|
|
3857
|
+
}
|
|
3858
|
+
return NOT_FOUND_MARKER + part;
|
|
3859
|
+
});
|
|
3860
|
+
if (translated.some(
|
|
3861
|
+
(t, i) => parts[i] !== "'" && parts[i] !== "-" && !t.startsWith(NOT_FOUND_MARKER)
|
|
3862
|
+
)) {
|
|
3863
|
+
const translatedText = translated.map((t) => t.replaceAll(NOT_FOUND_MARKER, "")).join("");
|
|
3864
|
+
tokens.push({
|
|
3865
|
+
isWord: true,
|
|
3866
|
+
matched: false,
|
|
3867
|
+
original: segment,
|
|
3868
|
+
translated: leadStr + translatedText + trailStr
|
|
3869
|
+
});
|
|
3870
|
+
continue;
|
|
3871
|
+
}
|
|
3872
|
+
}
|
|
3873
|
+
tokens.push({ isWord: true, matched: false, original: segment, translated: segment });
|
|
3874
|
+
}
|
|
3875
|
+
return tokens;
|
|
3876
|
+
}
|
|
3877
|
+
function isCaselessWord(word) {
|
|
3878
|
+
const ch = word[0];
|
|
3879
|
+
return ch !== void 0 && ch.toUpperCase() === ch.toLowerCase();
|
|
3880
|
+
}
|
|
3881
|
+
|
|
3882
|
+
// src/index.ts
|
|
3883
|
+
function registerIPA() {
|
|
3884
|
+
(0, import_phonemes4.registerFormat)("ipa", {
|
|
3885
|
+
forward: arpabetToIPARaw,
|
|
3886
|
+
isLatinScript: true,
|
|
3887
|
+
joinSeparator: " ",
|
|
3888
|
+
label: "IPA",
|
|
3889
|
+
preservesCase: false
|
|
3890
|
+
});
|
|
3891
|
+
}
|
|
3892
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3893
|
+
0 && (module.exports = {
|
|
3894
|
+
LANGUAGES,
|
|
3895
|
+
NOT_FOUND_MARKER,
|
|
3896
|
+
arpabetPhonemeToIPA,
|
|
3897
|
+
arpabetToIPARaw,
|
|
3898
|
+
ipaToArpabetClean,
|
|
3899
|
+
ipaToIngglish,
|
|
3900
|
+
lookupIpa,
|
|
3901
|
+
registerIPA,
|
|
3902
|
+
segmentKhmerText,
|
|
3903
|
+
translateForeign,
|
|
3904
|
+
translateForeignWithMapping
|
|
3905
|
+
});
|