@ingglish/g2p 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1362 @@
1
+ // src/g2p-rules.ts
2
+ import { arpabetToFormat } from "@ingglish/phonemes";
3
+
4
+ // src/stress.ts
5
+ import { stripStress } from "@ingglish/phonemes";
6
+ var REDUCIBLE_VOWELS = /* @__PURE__ */ new Set(["AE"]);
7
+ var SECONDARY_STRESS_VOWELS = /* @__PURE__ */ new Set(["AA", "AE", "AO", "AW", "AY", "EH", "EY", "OY", "UH"]);
8
+ var STRESS_ATTRACTING_SUFFIXES = [
9
+ "eer",
10
+ "ese",
11
+ "ette",
12
+ "esque",
13
+ "ique",
14
+ "oon",
15
+ "ade",
16
+ "aire",
17
+ "esce",
18
+ "ee"
19
+ ];
20
+ var PRE_STRESS_SUFFIXES = [
21
+ // Antepenultimate (3 from end) — check longer suffixes first
22
+ { stressFromEnd: 3, suffix: "ium" },
23
+ { stressFromEnd: 3, suffix: "ian" },
24
+ { stressFromEnd: 3, suffix: "ia" },
25
+ { stressFromEnd: 3, suffix: "io" },
26
+ { stressFromEnd: 3, suffix: "ical" },
27
+ { stressFromEnd: 3, suffix: "ious" },
28
+ { stressFromEnd: 3, suffix: "eous" },
29
+ { stressFromEnd: 3, suffix: "uous" },
30
+ { stressFromEnd: 3, suffix: "ular" },
31
+ { stressFromEnd: 3, suffix: "ophy" },
32
+ { stressFromEnd: 3, suffix: "osis" },
33
+ { stressFromEnd: 3, suffix: "itis" },
34
+ { stressFromEnd: 3, suffix: "athy" },
35
+ { stressFromEnd: 3, suffix: "ity" },
36
+ { stressFromEnd: 3, suffix: "ety" },
37
+ { stressFromEnd: 3, suffix: "ial" },
38
+ { stressFromEnd: 3, suffix: "ual" },
39
+ { stressFromEnd: 3, suffix: "ify" },
40
+ { stressFromEnd: 3, suffix: "ogy" },
41
+ { stressFromEnd: 3, suffix: "omy" },
42
+ { stressFromEnd: 3, suffix: "ony" },
43
+ { stressFromEnd: 3, suffix: "ory" },
44
+ // Penultimate (2 from end) — Italian/Polish names and common patterns
45
+ { stressFromEnd: 3, suffix: "iano" },
46
+ { stressFromEnd: 2, suffix: "ella" },
47
+ { stressFromEnd: 2, suffix: "elli" },
48
+ { stressFromEnd: 2, suffix: "ello" },
49
+ { stressFromEnd: 2, suffix: "etta" },
50
+ { stressFromEnd: 2, suffix: "etti" },
51
+ { stressFromEnd: 2, suffix: "owski" },
52
+ { stressFromEnd: 2, suffix: "ewski" },
53
+ { stressFromEnd: 2, suffix: "tion" },
54
+ { stressFromEnd: 2, suffix: "sion" },
55
+ { stressFromEnd: 2, suffix: "cian" },
56
+ { stressFromEnd: 3, suffix: "ative" },
57
+ { stressFromEnd: 3, suffix: "ally" },
58
+ { stressFromEnd: 2, suffix: "ics" },
59
+ { stressFromEnd: 2, suffix: "ic" },
60
+ { stressFromEnd: 3, suffix: "ate" },
61
+ { stressFromEnd: 2, suffix: "ive" },
62
+ { stressFromEnd: 3, suffix: "ible" },
63
+ { stressFromEnd: 2, suffix: "ment" }
64
+ ];
65
+ var UNSTRESSED_PREFIXES = [
66
+ // Tier 1: reliable
67
+ { minLength: 7, prefix: "dis" },
68
+ { minLength: 7, prefix: "mis" },
69
+ { minLength: 4, prefix: "be" },
70
+ { minLength: 4, prefix: "de" },
71
+ { minLength: 5, prefix: "re" },
72
+ // Tier 2: mostly reliable — check longer prefixes first
73
+ { minLength: 7, prefix: "under" },
74
+ { minLength: 7, prefix: "inter" },
75
+ { minLength: 6, prefix: "over" },
76
+ { minLength: 4, prefix: "un" },
77
+ { minLength: 4, prefix: "ex" },
78
+ { minLength: 7, prefix: "sur" },
79
+ { minLength: 5, prefix: "sub" },
80
+ { minLength: 6, prefix: "per" },
81
+ { minLength: 6, prefix: "con" },
82
+ { minLength: 6, prefix: "pro" },
83
+ // Tier 3: Latinate a- prefixes (abandon, absorb, accept, advance, etc.)
84
+ { minLength: 6, prefix: "ab" },
85
+ { minLength: 6, prefix: "ac" },
86
+ { minLength: 6, prefix: "ad" },
87
+ { minLength: 6, prefix: "af" },
88
+ { minLength: 6, prefix: "ap" },
89
+ { minLength: 6, prefix: "as" },
90
+ { minLength: 6, prefix: "at" }
91
+ ];
92
+ function applyStressPrediction(word, phonemes) {
93
+ const vowelPositions = [];
94
+ for (const [i, phoneme] of phonemes.entries()) {
95
+ const p = phoneme;
96
+ const lastChar = p.codePointAt(p.length - 1);
97
+ if (lastChar >= 48 && lastChar <= 50) {
98
+ vowelPositions.push(i);
99
+ }
100
+ }
101
+ if (vowelPositions.length <= 1) {
102
+ return phonemes;
103
+ }
104
+ const stressedSyllable = predictStressSyllable(word, vowelPositions.length);
105
+ const result = [...phonemes];
106
+ let actualStressed = stressedSyllable;
107
+ if (vowelPositions[actualStressed] !== void 0 && result[vowelPositions[actualStressed]] === "AH0") {
108
+ let found = false;
109
+ for (let j = actualStressed - 1; j >= 0; j--) {
110
+ if (result[vowelPositions[j]] !== "AH0") {
111
+ actualStressed = j;
112
+ found = true;
113
+ break;
114
+ }
115
+ }
116
+ if (!found) {
117
+ for (let j = actualStressed + 1; j < vowelPositions.length; j++) {
118
+ if (result[vowelPositions[j]] !== "AH0") {
119
+ actualStressed = j;
120
+ break;
121
+ }
122
+ }
123
+ }
124
+ }
125
+ for (const [i, vowelPosition] of vowelPositions.entries()) {
126
+ const pos = vowelPosition;
127
+ const phoneme = result[pos];
128
+ const base = stripStress(phoneme);
129
+ if (phoneme === "AH0") {
130
+ continue;
131
+ }
132
+ if (i === actualStressed) {
133
+ result[pos] = base + "1";
134
+ } else {
135
+ if (REDUCIBLE_VOWELS.has(base)) {
136
+ result[pos] = "AH0";
137
+ } else if (SECONDARY_STRESS_VOWELS.has(base)) {
138
+ result[pos] = base + "2";
139
+ } else {
140
+ result[pos] = base + "0";
141
+ }
142
+ }
143
+ }
144
+ return result;
145
+ }
146
+ function predictStressSyllable(word, syllableCount) {
147
+ const lower = word.toLowerCase();
148
+ for (const suffix of STRESS_ATTRACTING_SUFFIXES) {
149
+ if (lower.endsWith(suffix)) {
150
+ return syllableCount - 1;
151
+ }
152
+ }
153
+ for (const { stressFromEnd, suffix } of PRE_STRESS_SUFFIXES) {
154
+ if (lower.endsWith(suffix)) {
155
+ return Math.max(0, syllableCount - stressFromEnd);
156
+ }
157
+ }
158
+ let base = lower;
159
+ if (base.endsWith("ings")) {
160
+ base = base.slice(0, -4);
161
+ } else if (base.endsWith("ing")) {
162
+ base = base.slice(0, -3);
163
+ }
164
+ for (const word2 of base === lower ? [lower] : [lower, base]) {
165
+ for (const { minLength, prefix } of UNSTRESSED_PREFIXES) {
166
+ if (word2.startsWith(prefix) && word2.length >= minLength) {
167
+ return Math.min(1, syllableCount - 1);
168
+ }
169
+ }
170
+ }
171
+ return 0;
172
+ }
173
+
174
+ // src/g2p-rules.ts
175
+ var NRL_RULES = {
176
+ A: [
177
+ "[ALLO] =/AA L OW/",
178
+ "[ACCI] =/AA CH IY/",
179
+ " [AYE] =/AY/",
180
+ " [AGO] =/AH G OW/",
181
+ " [AND] =/AH N D/",
182
+ " [ARE] =/AA R/",
183
+ " [A]BO=/AH/",
184
+ "F[A]VO=/EY/",
185
+ "TR[A]VE=/AE/",
186
+ "F[A]TH=/AA/",
187
+ "[ASTE] =/EY S T/",
188
+ "[ASE] =/EY S/",
189
+ "[A] =/AX/",
190
+ " [AR]OU=/ER/",
191
+ " [AR]EN =/AA R/",
192
+ "EP[AR]AT=/ER/",
193
+ "[AR]#=/EH R/",
194
+ " [AR]O=/AX R/",
195
+ " ^[AS]#=/EY S/",
196
+ "[A]WA=/AX/",
197
+ "[AWL]=/AO L/",
198
+ "[AW]FU=/AA/",
199
+ "[AW]=/AO/",
200
+ " :[ANY]=/EH N IY/",
201
+ "[A]HE=/AH/",
202
+ "[A]^+#=/EY/",
203
+ "#:[ALLY]=/AX L IY/",
204
+ " [AL]#=/AX L/",
205
+ "[AGAIN]=/AX G EH N/",
206
+ "SS[AG]E=/AH JH/",
207
+ "#:[AG]E=/IH JH/",
208
+ " :[A]^+ =/EY/",
209
+ "[A]^%=/EY/",
210
+ "M[ARR]IE=/EH R/",
211
+ "[ARR]ES=/ER/",
212
+ "[ARR]IV=/ER/",
213
+ "[ARR]=/AE R/",
214
+ " :[AR] =/AA R/",
215
+ "ST[AR] =/AA R/",
216
+ "[AR] =/ER/",
217
+ "ST[AR]D=/ER/",
218
+ "[AR]=/AA R/",
219
+ "[AIR]=/EH R/",
220
+ "[AIGN]=/EY N/",
221
+ "RT[AI]NL=/AH/",
222
+ "[AI]=/EY/",
223
+ "[AY]=/EY/",
224
+ " L[AUGH]=/AE F/",
225
+ "[AUGH]=/AO/",
226
+ "[AUER]=/AW ER/",
227
+ "[AU]=/AO/",
228
+ "#:[AL] =/AX L/",
229
+ "#:[ALS] =/AX L Z/",
230
+ "[ALK]=/AO K/",
231
+ "[AL]B=/AE L/",
232
+ "[AL]C=/AE L/",
233
+ "H[ALF]=/AE F/",
234
+ "C[ALF]=/AE F/",
235
+ "[AL]F=/AE L/",
236
+ "[AL]G=/AE L/",
237
+ "[AL]P=/AE L/",
238
+ "[AL]V=/AE L/",
239
+ "[ALM] =/AA M/",
240
+ "[ALM]S =/AA M/",
241
+ " [AL]LE=/AE L/",
242
+ "SH[AL]L=/AE L/",
243
+ " [AL]LOT=/AH L/",
244
+ "[AL]^=/AO L/",
245
+ " :[ABLE]=/EY B AX L/",
246
+ "[ABLE]=/AX B AX L/",
247
+ "[ANG]+=/EY N JH/",
248
+ "#:[ANCE] =/AX N S/",
249
+ "#:[ANTS] =/AX N T S/",
250
+ "#:[ANT] =/AX N T/",
251
+ "[AA]=/AA/",
252
+ "[AE]=/EH/",
253
+ " [A]NO=/AH/",
254
+ "#:[AS] =/AH Z/",
255
+ "M[A]JO=/EY/",
256
+ "SW[A]LL=/AA/",
257
+ "SW[A]P=/AA/",
258
+ "SQU[A]=/AA/",
259
+ "QU[A]NT=/AA/",
260
+ "[A]=/AE/",
261
+ "#:[AN] =/AX N/"
262
+ ],
263
+ B: [
264
+ "[BEEN] =/B IH N/",
265
+ " [BRA] =/B R AA/",
266
+ " [BOW] =/B AW/",
267
+ " [BOTH] =/B OW TH/",
268
+ " [BE]NE=/B EH/",
269
+ " [BE]^#=/B IH/",
270
+ "[BEING]=/B IY IH NX/",
271
+ " [BUS]#=/B IH Z/",
272
+ "[BUIL]=/B IH L/",
273
+ "[BT]=/T/",
274
+ "#:[BERG] =/B ER G/",
275
+ "#:[BURG] =/B ER G/",
276
+ "#:[BURY] =/B EH R IY/",
277
+ "[BB]=/B/",
278
+ "M[B] =/ /",
279
+ "[B]=/B/"
280
+ ],
281
+ C: [
282
+ " [COS] =/K AO S/",
283
+ " [CHA]R=/CH AA/",
284
+ " [CH]^=/K/",
285
+ "^E[CH]=/K/",
286
+ "[CHEM]=/K EH M/",
287
+ "BA[CH] =/K/",
288
+ "TE[CH] =/K/",
289
+ "EI[CH] =/K/",
290
+ "LO[CH] =/K/",
291
+ "RO[CH] =/K/",
292
+ "MA[CH] =/K/",
293
+ "NE[CH] =/K/",
294
+ "NO[CH] =/K/",
295
+ "DO[CH] =/K/",
296
+ "ZE[CH] =/K/",
297
+ "I[CH]T=/K/",
298
+ "I[CH]EL=/K/",
299
+ "I[CH]OL=/K/",
300
+ "A[CH]EN=/K/",
301
+ "A[CH]TE=/K/",
302
+ "R[CH]IV=/K/",
303
+ "R[CH]IT=/K/",
304
+ "S[CH]KE=/K/",
305
+ "RI[CH]S=/K/",
306
+ "LI[CH] =/K/",
307
+ "EI[CH]ER=/K/",
308
+ "EI[CH]EN=/K/",
309
+ "EI[CH]MA=/K/",
310
+ "[CH]OR=/K/",
311
+ "#RI[CH] =/K/",
312
+ "TRI[CH] =/K/",
313
+ "BRI[CH] =/K/",
314
+ "MA[CH]ER =/K/",
315
+ "LA[CH]ER =/K/",
316
+ "RA[CH]ER =/K/",
317
+ "[CH]=/CH/",
318
+ " S[CI]#=/S AY/",
319
+ "[CIAL]=/SH AX L/",
320
+ "[CI]A=/SH/",
321
+ "[CI]O=/SH/",
322
+ "[CI]EN=/SH/",
323
+ "EX[C]IT=//",
324
+ "EX[C]E=//",
325
+ "M[C]E=/AH K/",
326
+ "[C]+=/S/",
327
+ "[CK]=/K/",
328
+ "[CZ]=/CH/",
329
+ "[CQU]=/K W/",
330
+ "I[CALLY] =/K L IY/",
331
+ "[CYCL]=/S AY K AH L/",
332
+ " [CON]^=/K AH N/",
333
+ "[COM]%=/K AH M/",
334
+ "[CCH]=/K/",
335
+ "M[CC]=/AH K/",
336
+ "[CC]+=/K S/",
337
+ "[CC]=/K/",
338
+ "M[C]G=/AH/",
339
+ "M[C]^=/AH K/",
340
+ "[C]=/K/"
341
+ ],
342
+ D: [
343
+ " [DOING]=/D UW IH NX/",
344
+ "#:[DED] =/D IH D/",
345
+ ".E[D] =/D/",
346
+ "#TE[D] =/IH D/",
347
+ "^TE[D] =/IH D/",
348
+ "#^:E[D] =/T/",
349
+ "[DIAG]=/D AY AH G/",
350
+ "[DIAL]=/D AY AH L/",
351
+ " [DE]^#=/D IH/",
352
+ "[DU]A=/JH UW/",
353
+ "[DG]+=/JH/",
354
+ "[DJ]=/JH/",
355
+ "#:[DDED] =/D IH D/",
356
+ "[DD]=/D/",
357
+ "[D]T =/ /",
358
+ "[D]=/D/"
359
+ ],
360
+ E: [
361
+ "[EAR]D=/ER/",
362
+ "H[EAR] =/IY R/",
363
+ "W[EAR]=/EH R/",
364
+ "B[EAR]=/EH R/",
365
+ "BR[EAK]F=/EH K/",
366
+ "H[EA]VY=/EH/",
367
+ "[EWSKI] =/EH F S K IY/",
368
+ "[ELLI] =/EH L IY/",
369
+ " [EVE] =/IY V/",
370
+ " [EAR] =/IY R/",
371
+ " [EVERY]^=/EH V R IY/",
372
+ " [EVE]N=/IY V/",
373
+ "[E]LECT=/IH/",
374
+ "[E]LIM=/IH/",
375
+ " [E]LE=/IH/",
376
+ " [E]NO=/IH/",
377
+ " [EX]#=/IH G Z/",
378
+ "#:[E] =/ /",
379
+ " :[E] =/IY/",
380
+ "#[ED] =/D/",
381
+ "D[ED] =/IH D/",
382
+ "#:[E]D =/ /",
383
+ "[EV]ER=/EH V/",
384
+ "[ER]%=/ER/",
385
+ "#:[EMENT]=/M AX N T/",
386
+ "#:[ENED] =/AX N D/",
387
+ "#:[ENING]=/AX N IH NX/",
388
+ "#:[ENESS] =/N AX S/",
389
+ "#:[EMAN] =/M AX N/",
390
+ "NT[ERR]UP=/ER/",
391
+ "[ERR]=/EH R/",
392
+ "XP[ER]IE=/IH R/",
393
+ "#:[ER]#=/ER/",
394
+ "S[ER]IO=/IH R/",
395
+ "[ER]#=/EH R/",
396
+ "[ER]=/ER/",
397
+ "@[EW]=/UW/",
398
+ "#:[E]W=/ /",
399
+ "[EW]=/Y UW/",
400
+ "G[E]OR=//",
401
+ "[E]O=/IY/",
402
+ "IC[ES] =/AH Z/",
403
+ "#:&[ES] =/IH Z/",
404
+ "#:[E]S =/ /",
405
+ "#:[ELY] =/L IY/",
406
+ "#:[EFUL] =/F AX L/",
407
+ "[EFUL]=/F UH L/",
408
+ "[EER]=/IH R/",
409
+ "[EE]=/IY/",
410
+ "[EARN]=/ER N/",
411
+ "[EAR]TH=/ER/",
412
+ "[EAR]L=/ER/",
413
+ "[EAR]CH=/ER/",
414
+ "[EAR]T=/ER/",
415
+ "[EAR] =/IH R/",
416
+ "[EAR]S =/IH R/",
417
+ "[EAD]=/EH D/",
418
+ "#:[EA] =/IY AX/",
419
+ "[EAUX]=/OW/",
420
+ "B[EAU]TI=/Y UW/",
421
+ "[EAU]=/OW/",
422
+ "[EA]LTH=/EH/",
423
+ "[EA]THER=/EH/",
424
+ "BR[EA]TH=/EH/",
425
+ "[EA]VEN=/EH/",
426
+ "ST[EA]K=/EY/",
427
+ "[EA]SU=/EH/",
428
+ "D[EA]TH=/EH/",
429
+ "BR[EA]K=/EY/",
430
+ "W[EA]PO=/EH/",
431
+ "[EA]=/IY/",
432
+ "[EIGN]=/EY N/",
433
+ "[EIGH]=/EY/",
434
+ "C[EI]=/IY/",
435
+ "W[EI]RD=/IH/",
436
+ "[EI]=/AY/",
437
+ "[EU]RO=/Y UW/",
438
+ "[EY]E=/AY/",
439
+ "[EY]=/IY/",
440
+ "[EUR] =/ER/",
441
+ "[EU]=/UW/",
442
+ "#:[ENCE] =/AX N S/",
443
+ "#:[ENTS] =/AX N T S/",
444
+ "#:[ENT] =/AX N T/",
445
+ "I[ELD]=/L D/",
446
+ "I[ELS] =/L Z/",
447
+ "I[EL] =/L/",
448
+ "#:[ENS] =/AX N Z/",
449
+ "GR[EN] =/EH N/",
450
+ "#:[EN] =/AX N/",
451
+ "OT[EL]=/EH L/",
452
+ "#:[EL] =/AX L/",
453
+ "#:[EST] =/AX S T/",
454
+ "[EX]TR=/EH K S/",
455
+ " [EX]^=/IH K S/",
456
+ "#:[ELS] =/AX L Z/",
457
+ "#:[ETS] =/AH T S/",
458
+ "#:[EMS] =/AX M Z/",
459
+ "#:[EM] =/AX M/",
460
+ "#:[ET] =/AH T/",
461
+ "P[E]TE=/IY/",
462
+ "D[E]FE=/IH/",
463
+ "D[E]FI=/IH/",
464
+ "R[E]CE=/IH/",
465
+ "D[E]CI=/IH/",
466
+ "R[E]SP=/IH/",
467
+ "R[E]PR=/IY/",
468
+ "S[E]CR=/IY/",
469
+ "S[E]CU=/IH/",
470
+ "R[E]TR=/IY/",
471
+ "D[E]PR=/IH/",
472
+ "D[E]STR=/IH/",
473
+ "D[E]PL=/IH/",
474
+ "D[E]CL=/IH/",
475
+ "D[E]PE=/IH/",
476
+ "D[E]SI=/IH/",
477
+ "D[E]TE=/IH/",
478
+ "R[E]SU=/IH/",
479
+ "R[E]GA=/IH/",
480
+ "[E]VOL=/IH/",
481
+ "CK[E]TT=/IH/",
482
+ "NN[E]LL=/AH/",
483
+ "SS[E]LL=/AH/",
484
+ "DD[E]LL=/AH/",
485
+ "PP[E]LL=/AH/",
486
+ "RR[E]LL=/AH/",
487
+ "TT[E]LL=/AH/",
488
+ "NK[E]TT=/IH/",
489
+ "FF[E]LL=/AH/",
490
+ "LL[E]TT=/IH/",
491
+ "RR[E]TT=/IH/",
492
+ "MM[E]LL=/AH/",
493
+ "NN[E]TT=/IH/",
494
+ "NN[E]SS=/IH/",
495
+ "MM[E]TT=/IH/",
496
+ "GG[E]TT=/IH/",
497
+ "LL[E]NG=/IH/",
498
+ "TL[E]ME=//",
499
+ "B[E]TW=/IH/",
500
+ "PR[E]TT=/IH/",
501
+ "PR[E]TE=/IY/",
502
+ "PR[E]PA=/IY/",
503
+ "PR[E]VE=/IY/",
504
+ "PR[E]CA=/IY/",
505
+ "PR[E]MI=/IY/",
506
+ "PR[E]SC=/IY/",
507
+ "PR[E]VI=/IY/",
508
+ "CR[E]TE=/IY/",
509
+ "PL[E]TE=/IY/",
510
+ "TL[E]ME=//",
511
+ "ST[E]VE=/IY/",
512
+ " [E]QU=/IH/",
513
+ " [E]FF=/IH/",
514
+ " [E]MI=/IH/",
515
+ "D[E]SC=/IH/",
516
+ "EL[E]BR=/AH/",
517
+ "EL[E]PH=/AH/",
518
+ "[E]NB=/AH/",
519
+ "[E]NFEL=/AH/",
520
+ "[E]NSTE=/AH/",
521
+ "SS[E]L=/AH/",
522
+ "CI[E]NT=/AH/",
523
+ "SS[E]NG=/AH/",
524
+ "TN[E]SS=/AH/",
525
+ "[ETTE] =/EH T/",
526
+ "[E]^E =/IY/",
527
+ "[E]=/EH/"
528
+ ],
529
+ F: [
530
+ "[FROM] =/F R AH M/",
531
+ " [FIX]=/F IH K S/",
532
+ "#:[FORD] =/F ER D/",
533
+ "#:[FULLY] =/F AX L IY/",
534
+ "#:[FUL] =/F AX L/",
535
+ "[FUL]=/F UH L/",
536
+ "[FF]=/F/",
537
+ "[F]=/F/"
538
+ ],
539
+ G: [
540
+ " [GIN] =/JH IH N/",
541
+ " [GHOST]=/G OW S T/",
542
+ " [GAS] =/G AE S/",
543
+ " [GN]=/N/",
544
+ "[G]EI=/G/",
545
+ "[GIV]=/G IH V/",
546
+ " [G]I^=/G/",
547
+ "[GEON]=/JH AX N/",
548
+ "[GE]T=/G EH/",
549
+ "SU[GGES]=/G JH EH S/",
550
+ "[GU]I=/G/",
551
+ "EXA[GG]ER=/JH/",
552
+ "[GG]=/G/",
553
+ " B#[G]=/G/",
554
+ "ER[G]ER=/G/",
555
+ "[G]ERN=/G/",
556
+ "L[G]ER=/G/",
557
+ "[G]ER =/G/",
558
+ "[G]ERS=/G/",
559
+ "[G]ERT=/G/",
560
+ "[G]ERD=/G/",
561
+ "[G]ERH=/G/",
562
+ "[G]EL=/G/",
563
+ "IE[G]EL=/G/",
564
+ "[G]+=/JH/",
565
+ "[GREAT]=/G R EY T/",
566
+ "#[GH]=/ /",
567
+ "[G]=/G/"
568
+ ],
569
+ H: [
570
+ " [HAVE] =/HH AE V/",
571
+ " [HAVING]=/HH AE V IH NG/",
572
+ "[HERE] =/HH IY R/",
573
+ "[HELLO]=/HH AH L OW/",
574
+ " [HONEY]=/HH AH N IY/",
575
+ " [HEIR]=/EH R/",
576
+ " [HON]OR=/AA N/",
577
+ " [HON]EST=/AA N/",
578
+ " [HMM]=/HH M/",
579
+ " [HOUR]=/AW ER/",
580
+ " [HEY] =/HH EY/",
581
+ " [HER] =/HH ER/",
582
+ " [HIM] =/HH IH M/",
583
+ " [HIS] =/HH IH Z/",
584
+ "[HOW]=/HH AW/",
585
+ "[H]#=/HH/",
586
+ "[H]=/ /"
587
+ ],
588
+ I: [
589
+ " [INTO] =/IH N T UW/",
590
+ "[ICCI] =/IY CH IY/",
591
+ "CH[ILD]RE=/IH L D/",
592
+ "LL[I]ON=/Y/",
593
+ "M[I]CR=/AY/",
594
+ "WH[I]TE=/AY/",
595
+ "[I]DEA=/AY/",
596
+ "[INI] =/IY N IY/",
597
+ "[INO] =/IY N OW/",
598
+ "[INA] =/IY N AH/",
599
+ "[ISA] =/IY S AH/",
600
+ "[IVA] =/IY V AH/",
601
+ "[IMA] =/IY M AH/",
602
+ "[ITO] =/IY T OW/",
603
+ "[ITA] =/IY T AH/",
604
+ "#:[IALLY]=/IY AX L IY/",
605
+ "#:[IAL]=/IY AX L/",
606
+ "#:[IATE]=/IY EY T/",
607
+ "[INDS] =/AY N D Z/",
608
+ "[IN]D =/AY N/",
609
+ "#:R[IED] =/IY D/",
610
+ "[IED] =/AY D/",
611
+ "FR[IEN]=/EH N/",
612
+ "[IE]F=/IY/",
613
+ "[IE]G=/IY/",
614
+ "[IE]V=/IY/",
615
+ "[IE]C=/IY/",
616
+ "[IE]W=/IY/",
617
+ "[IE]P=/IY/",
618
+ "[IE]B=/IY/",
619
+ "[IE]L=/IY/",
620
+ "[IE]^%=/IY/",
621
+ " :[I]%=/AY/",
622
+ "QU[I]ET=/AY/",
623
+ "[I]%=/IY/",
624
+ "[IE]=/IY/",
625
+ "[IAN] =/IY AX N/",
626
+ "[IUM] =/IY AX M/",
627
+ "[IA] =/IY AX/",
628
+ "[IO] =/IY OW/",
629
+ "[IFY]=/AX F AY/",
630
+ "[ITY] =/AX T IY/",
631
+ "[IBLE]=/AX B AX L/",
632
+ "[IOUS]=/IY AX S/",
633
+ " [I]DE=/AY/",
634
+ "L[I]KE=/AY/",
635
+ "W[I]DE=/AY/",
636
+ "W[I]SE=/AY/",
637
+ "T[I]RE=/AY/",
638
+ "R[I]PE=/AY/",
639
+ "PR[I]CE=/AY/",
640
+ "SP[I]TE=/AY/",
641
+ "FF[I]CU=/AH/",
642
+ "SP[IR]IT=/IH R/",
643
+ "[I]^EM=/AY/",
644
+ "OT[I]CE=/AH/",
645
+ "RV[I]CE=/AH/",
646
+ "EV[I]DE=/AH/",
647
+ "CT[I]CE=/AH/",
648
+ "ACR[I]F=/AH/",
649
+ "LT[I]PL=/AH/",
650
+ "DR[I]VE=/AY/",
651
+ "GU[I]LD=/IH/",
652
+ "UN[I]VE=/AH/",
653
+ "[I]^+:#=/IH/",
654
+ "[IRR]=/ER/",
655
+ "[IR]#=/AY R/",
656
+ "[IZ]%=/AY Z/",
657
+ "ES[I]DE=/AH/",
658
+ "[I]D%=/AY/",
659
+ "[I]T%=/AY/",
660
+ "T[I]VE=/IH/",
661
+ "S[I]VE=/IH/",
662
+ "AL[I]VE=/AY/",
663
+ "L[I]VI=/IH/",
664
+ "AM[I]LY=/AH/",
665
+ "FF[I]CER=/AH/",
666
+ "OM[I]NI=/AH/",
667
+ "PRES[I]DE=/AH/",
668
+ "UT[I]FU=/AH/",
669
+ "D[I]VER=/AY/",
670
+ "[I]^+=/AY/",
671
+ "[IR]=/ER/",
672
+ "[IGH]=/AY/",
673
+ "[ILD]=/AY L D/",
674
+ "[IGN] =/AY N/",
675
+ "[IGN]^=/AY N/",
676
+ "[IGN]%=/AY N/",
677
+ "[IQUE]=/IY K/",
678
+ "^[I] =/IY/",
679
+ "#:[ISMS] =/IH Z AX M Z/",
680
+ "#:[ISM] =/IH Z AX M/",
681
+ "RM[I]NA=/AH/",
682
+ "IM[I]NA=/AH/",
683
+ "OM[I]NA=/AH/",
684
+ "EM[I]NA=/AH/",
685
+ "ED[I]CA=/AH/",
686
+ "UN[I]CA=/AH/",
687
+ "AM[I]NA=/AH/",
688
+ "UM[I]NA=/AH/",
689
+ "OL[I]DA=/AH/",
690
+ "AN[I]MA=/AH/",
691
+ "IN[I]ST=/AH/",
692
+ "NT[I]MA=/AH/",
693
+ "IC[I]PA=/AH/",
694
+ "EC[I]FI=/AH/",
695
+ "IL[I]TA=/AH/",
696
+ "IM[I]TA=/AH/",
697
+ "IG[I]TA=/AH/",
698
+ "ON[I]TO=/AH/",
699
+ "F[I]NA=/AY/",
700
+ "ST[I]GA=/AH/",
701
+ "[I]BLY=/AH/",
702
+ "MP[I]ON=/IY/",
703
+ "[I]NIZE=/AH/",
704
+ "SS[I]BI=/AH/",
705
+ "[ITLE] =/AY T AH L/",
706
+ "[IDLE] =/AY D AH L/",
707
+ "[I]=/IH/"
708
+ ],
709
+ J: ["[J]=/JH/"],
710
+ K: [" [K]N=/ /", "[KK]=/K/", "[K]=/K/"],
711
+ L: [
712
+ " [LOG] =/L AO G/",
713
+ " [LAS] =/L AA S/",
714
+ "[LO]C#=/L OW/",
715
+ "L[L]=/ /",
716
+ "#:[LINE] =/L AY N/",
717
+ "#:[LESSLY] =/L AX S L IY/",
718
+ "UN[LESS]=/L EH S/",
719
+ "#:[LESS] =/L AX S/",
720
+ "[LING]=/L IH NX/",
721
+ "#^:[L]ER=/L/",
722
+ "#^:[L]EY=/L/",
723
+ "[LEAD]=/L IY D/",
724
+ "OB[L]EM=/L/",
725
+ "#^:[L]ET=/L/",
726
+ "#^:[L]%=/AX L/",
727
+ "[LDT] =/L T/",
728
+ "[LL]=/L/",
729
+ "EA[L]IZ=/AH L/",
730
+ "[L]=/L/"
731
+ ],
732
+ M: [
733
+ " [MONEY]=/M AH N IY/",
734
+ " [MON] =/M OW N/",
735
+ " [MRS]=/M IH S IH Z/",
736
+ " [MIX]=/M IH K S/",
737
+ "[MOV]=/M UW V/",
738
+ "[MBS] =/M Z/",
739
+ "[MB] =/M/",
740
+ " [MN]=/N/",
741
+ "[MN]ING=/M/",
742
+ "[MN]ED=/M/",
743
+ "[MN]S=/M/",
744
+ "[MN] =/M/",
745
+ "#:[MENTS] =/M AX N T S/",
746
+ "#:[MENT] =/M AX N T/",
747
+ "#:[MAN] =/M AX N/",
748
+ "#:[MEN] =/M AX N/",
749
+ "[MM]=/M/",
750
+ "[M]=/M/"
751
+ ],
752
+ N: [
753
+ " [NAH] =/N AA/",
754
+ " [NOW]=/N AW/",
755
+ "E[NG]+=/N JH/",
756
+ "[NG]ING=/NX/",
757
+ "[NG]ED=/NX/",
758
+ "LI[NG]ER=/NX/",
759
+ "NI[NG]ER=/NX/",
760
+ "DI[NG]ER=/NX/",
761
+ "RI[NG]ER=/NX/",
762
+ "SI[NG]ER=/NX/",
763
+ "TI[NG]ER=/NX/",
764
+ "ZI[NG]ER=/NX/",
765
+ "MI[NG]ER=/NX/",
766
+ "BI[NG]ER=/NX/",
767
+ "[NG]R=/NX G/",
768
+ "[NG]#=/NX G/",
769
+ "[NGL]%=/NX G AX L/",
770
+ "[NG]=/NX/",
771
+ "[NKC]=/NX K/",
772
+ "[NK]=/NX K/",
773
+ "#:[NESS] =/N AX S/",
774
+ "[NDT] =/N T/",
775
+ "[NN]=/N/",
776
+ "[N]CT=/NG/",
777
+ "[N]X=/NG/",
778
+ "[N]=/N/"
779
+ ],
780
+ O: [
781
+ " [ONCE]=/W AH N S/",
782
+ " [ONES] =/W AH N Z/",
783
+ "D[OE]S=/AH/",
784
+ "D[ONE] =/AH N/",
785
+ "G[ONE] =/AO N/",
786
+ "#[ONE] =/W AH N/",
787
+ "[OWSKI] =/AO F S K IY/",
788
+ " [O]CC=/AH/",
789
+ " [O]PP=/AH/",
790
+ " [ONLY]=/OW N L IY/",
791
+ " [ONE] =/W AH N/",
792
+ " [OUR]=/AW ER/",
793
+ "[OROUGH]=/ER OW/",
794
+ "#:[ORY] =/ER IY/",
795
+ "#:[OR] =/ER/",
796
+ "#:[ORS] =/ER Z/",
797
+ "[ORR]=/AO R/",
798
+ "[OR]=/AO R/",
799
+ "T[OWN]=/AW N/",
800
+ "D[OWN]=/AW N/",
801
+ "R[OWN]=/AW N/",
802
+ "G[OWN]=/AW N/",
803
+ "P[OW]=/AW/",
804
+ "C[OW]=/AW/",
805
+ "V[OW]=/AW/",
806
+ "F[OW]L=/AW/",
807
+ "FL[OW]ER=/AW/",
808
+ "T[OW]ER=/AW/",
809
+ "[OW]=/OW/",
810
+ "I[ONED] =/AX N D/",
811
+ "I[ONING]=/AX N IH NX/",
812
+ "I[ONER]=/AX N ER/",
813
+ "I[ONERS]=/AX N ER Z/",
814
+ "C[O]MP=/AH/",
815
+ "C[O]MM=/AH/",
816
+ "C[O]LL=/AH/",
817
+ "M[O]N%=/AH/",
818
+ "L[O]V=/AH/",
819
+ "N[O]TH=/AH/",
820
+ "T[O]GE=/AH/",
821
+ "W[O]ND=/AH/",
822
+ "PR[O]VE=/UW/",
823
+ "C[O]MF=/AH/",
824
+ "C[O]VE=/AH/",
825
+ "PR[O]TE=/AH/",
826
+ "PR[O]VI=/AH/",
827
+ "PR[O]PO=/AH/",
828
+ "PR[O]FE=/AH/",
829
+ "PR[O]DU=/AH/",
830
+ "PR[O]NO=/AH/",
831
+ "G[O]VE=/AH/",
832
+ "P[O]LI=/AH/",
833
+ "SH[O]VE=/AH/",
834
+ "AB[O]VE=/AH/",
835
+ "M[O]DE=/AA/",
836
+ " [O]BS=/AH/",
837
+ "D[O]G=/AO/",
838
+ "N[O]MI=/AA/",
839
+ " [O]PEN=/OW/",
840
+ " [O]PE=/AA/",
841
+ "M[O]NTH=/AH/",
842
+ "N[O]NE=/AH/",
843
+ "PR[O]PE=/AA/",
844
+ "IS[O]NE=/AH/",
845
+ "S[O]NE=/AH/",
846
+ "PR[O]CE=/AH/",
847
+ "[O]^%=/OW/",
848
+ "[O]^EN=/OW/",
849
+ "[O]^I#=/OW/",
850
+ "[OLK]=/OW K/",
851
+ "[OL]T=/OW L/",
852
+ "[OL]D=/OW L/",
853
+ "[OL]Z=/OW L/",
854
+ "[OL]S=/OW L/",
855
+ "[OL]B=/OW L/",
856
+ "[OL]M=/OW L/",
857
+ "[OL]N=/OW L/",
858
+ "[OL]P=/OW L/",
859
+ "[OUGHT]=/AO T/",
860
+ "DR[OUGH]T=/AW/",
861
+ "D[OUGH]=/OW/",
862
+ "B[OUGH] =/AW/",
863
+ "C[OUGH]=/AA F/",
864
+ "T[OUGH] =/AH F/",
865
+ "R[OUGH] =/AH F/",
866
+ "N[OUGH] =/AH F/",
867
+ "[OUGH] =/AW/",
868
+ "[OUGH]=/AH F/",
869
+ "[OUSE]=/AW S/",
870
+ "H[OU]S#=/AW/",
871
+ "[OUS]=/AX S/",
872
+ "#:[OUR] =/ER/",
873
+ "[OUR]=/AO R/",
874
+ "W[OULD] =/UH D/",
875
+ "C[OULD] =/UH D/",
876
+ "SH[OULD] =/UH D/",
877
+ "[OULD]=/OW L D/",
878
+ "^[OU]^L=/AH/",
879
+ "[OUP]=/UW P/",
880
+ "T[OU]CH=/AH/",
881
+ "Y[OU]NG=/AH/",
882
+ "[OU]=/AW/",
883
+ "[OY]=/OY/",
884
+ "[OING]=/OW IH NX/",
885
+ "[OI]=/OY/",
886
+ "[OOR]=/AO R/",
887
+ "[OOSE]=/UW S/",
888
+ "[OOK]=/UH K/",
889
+ "BL[OOD]=/AH D/",
890
+ "F[OOD]=/UW D/",
891
+ "FL[OOD]=/AH D/",
892
+ "M[OOD]=/UW D/",
893
+ "BR[OOD]=/UW D/",
894
+ "[OOD]=/UH D/",
895
+ "[OO]=/UW/",
896
+ "SH[OE]=/UW/",
897
+ "[OE]=/OW/",
898
+ "#:[OH] =/OW/",
899
+ "[O] =/OW/",
900
+ "[OAR]=/AO R/",
901
+ "[OA]=/OW/",
902
+ "[O]NG=/AO/",
903
+ "I[ON]=/AX N/",
904
+ "#:[ON] =/AX N/",
905
+ "#^[ON]=/AX N/",
906
+ "[OSIS]=/OW S IH S/",
907
+ "[O]ST =/OW/",
908
+ "[O]S =/OW/",
909
+ "[OFF]=/AO F/",
910
+ "[OTHER]=/AH DH ER/",
911
+ "#^:[OM]=/AH M/",
912
+ "[OLOGY]=/AA L AH JH IY/",
913
+ "T[O]DA=/AH/",
914
+ "W[O]MA=/UH/",
915
+ "[O]^A=/OW/",
916
+ "C[O]LO=/AH/",
917
+ "[O]^O=/OW/",
918
+ "[O]BI=/OW/",
919
+ "[O]TI=/OW/",
920
+ "[O]SI=/OW/",
921
+ "[O]GL=/OW/",
922
+ "[O]FI=/OW/",
923
+ "[O]KI=/OW/",
924
+ "P[O]ST=/OW/",
925
+ "CR[O]SS=/AO/",
926
+ "FR[O]NT=/AH/",
927
+ "#:[ONS] =/AH N Z/",
928
+ "#:[OL] =/AO L/",
929
+ "D[O]ZEN=/AH/",
930
+ "TR[O]DU=/AH/",
931
+ "M[O]NK=/AH/",
932
+ "W[O]MAN=/UH/",
933
+ "M[O]NGO=/AA/",
934
+ "W[O]LF=/UH/",
935
+ "L[O]FT=/AO/",
936
+ "CR[O]FT=/AO/",
937
+ "[O]BLIG=/AH/",
938
+ "D[O]MEST=/AH/",
939
+ "ST[O]ME=/AH/",
940
+ "[O]LOGI=/AH/",
941
+ "[O]NIZE=/AH/",
942
+ "[O]NOMI=/AH/",
943
+ "[O]CRAT=/AH/",
944
+ "[O]SCOPE=/AH/",
945
+ "[O]SCOPI=/AH/",
946
+ "[O]PHOB=/AH/",
947
+ "[O]LITH=/AH/",
948
+ "DR[O]GEN=/AH/",
949
+ "[OBLE] =/OW B AH L/",
950
+ "[O]=/AA/",
951
+ "#:[OT] =/AH T/"
952
+ ],
953
+ P: [
954
+ " [PF]=/F/",
955
+ " [PN]=/N/",
956
+ " [PS]=/S/",
957
+ " [PT]=/T/",
958
+ "[PH]=/F/",
959
+ "[PEOP]=/P IY P/",
960
+ "[POW]=/P AW/",
961
+ "[PUT] =/P UH T/",
962
+ "[PP]=/P/",
963
+ "[P]=/P/"
964
+ ],
965
+ Q: ["[QUAR]=/K W AO R/", "[QUA]L=/K W AA/", "[QUE] =/K/", "[QU]=/K W/", "[Q]=/K/"],
966
+ R: [
967
+ "[REALLY]=/R IH L IY/",
968
+ " [RAW] =/R AA/",
969
+ " [RE]ME=/R IH/",
970
+ " [RE]SP=/R IH/",
971
+ " [RE]QU=/R IH/",
972
+ " [RE]MA=/R IH/",
973
+ " [RE]TU=/R IH/",
974
+ " [RE]FU=/R IH/",
975
+ " [RE]GA=/R IH/",
976
+ " [RE]VE=/R IH/",
977
+ " [RE]GI=/R EH/",
978
+ " [RE]^#=/R IY/",
979
+ "[ROLL]=/R OW L/",
980
+ "[RH]=/R/",
981
+ "[RDT] =/R T/",
982
+ "[RR]=/R/",
983
+ "[R]=/R/"
984
+ ],
985
+ S: [
986
+ " [SURE] =/SH UH R/",
987
+ "[SORRY]=/S AA R IY/",
988
+ " [SWOR]D=/S AO R/",
989
+ " [SON] =/S AH N/",
990
+ "[SH]=/SH/",
991
+ "#[SION]=/ZH AX N/",
992
+ "#:[SIDE] =/S AY D/",
993
+ "[SCH]OO=/S K/",
994
+ "[SOME]=/S AH M/",
995
+ "#[SUR]#=/ZH ER/",
996
+ "[SUR]#=/SH ER/",
997
+ "#[SU]#=/ZH UW/",
998
+ "#[SSU]#=/SH UW/",
999
+ "#[SED] =/Z D/",
1000
+ "[S]IVE=/S/",
1001
+ "IU[S] =/S/",
1002
+ " RE[S]#=/S/",
1003
+ " DI[S]I=/S/",
1004
+ " DI[S]O=/S/",
1005
+ " DI[S]AR=/S/",
1006
+ " DI[S]T=/S/",
1007
+ " DI[S]AG=/S/",
1008
+ " DI[S]EM=/S/",
1009
+ " DI[S]AP=/S/",
1010
+ " DI[S]AF=/S/",
1011
+ "MY[S]EL=/S/",
1012
+ "I[S]O=/S/",
1013
+ "O[S]O=/S/",
1014
+ "E[S]A=/S/",
1015
+ "O[S]A=/S/",
1016
+ "A[S]I=/S/",
1017
+ "Y[S]O=/S/",
1018
+ "E[S]O=/S/",
1019
+ "U[S]O=/S/",
1020
+ "Y[S]E=/S/",
1021
+ "E[S]E=/S/",
1022
+ "A[S]A=/S/",
1023
+ "I[S]A=/S/",
1024
+ "U[S]A=/S/",
1025
+ "E[S]I=/S/",
1026
+ "O[S]I=/S/",
1027
+ "#[S]#=/Z/",
1028
+ "[SAID]=/S EH D/",
1029
+ "^[SION]=/SH AX N/",
1030
+ "[S]S=/ /",
1031
+ "SE[S] =/AH Z/",
1032
+ "GE[S] =/AH Z/",
1033
+ ".[S] =/Z/",
1034
+ "#:.E[S] =/Z/",
1035
+ "#^:##[S] =/Z/",
1036
+ "A[S] =/Z/",
1037
+ "O[S] =/Z/",
1038
+ "#^:#[S] =/S/",
1039
+ "U[S] =/S/",
1040
+ " :#[S] =/Z/",
1041
+ "AN[SWER]=/S ER/",
1042
+ "[SCH]=/SH/",
1043
+ "[S]C+=/ /",
1044
+ "[STLE]=/S AX L/",
1045
+ "#:[STEN]=/S AX N/",
1046
+ "#:[STEIN] =/S T AY N/",
1047
+ "#:[SON] =/S AX N/",
1048
+ "[SS]=/S/",
1049
+ "OB[S]ER=/Z/",
1050
+ "AB[S]OR=/Z/",
1051
+ "AB[S]OL=/Z/",
1052
+ "TRAN[S]#=/Z/",
1053
+ "[S]=/S/"
1054
+ ],
1055
+ T: [
1056
+ " [THY] =/DH AY/",
1057
+ " [THE] =/DH AX/",
1058
+ " [THIS] =/DH IH S/",
1059
+ " [THEY]=/DH EY/",
1060
+ " [THERE]=/DH EH R/",
1061
+ " [THEN]=/DH EH N/",
1062
+ " [THAN] =/DH AE N/",
1063
+ " [THEM] =/DH EH M/",
1064
+ " [TWO]=/T UW/",
1065
+ "[THAT] =/DH AE T/",
1066
+ "[THER]=/DH ER/",
1067
+ "[THEIR]=/DH EH R/",
1068
+ "[THESE] =/DH IY Z/",
1069
+ "[THROUGH]=/TH R UW/",
1070
+ "[THOSE]=/DH OW Z/",
1071
+ "[THOUGH] =/DH OW/",
1072
+ "[TH]=/TH/",
1073
+ "#:[TED] =/T IH D/",
1074
+ "S[TI]#N=/CH/",
1075
+ "[TI]O=/SH/",
1076
+ "[TIAL]=/SH AX L/",
1077
+ "[TI]A=/SH/",
1078
+ "[TIEN]=/SH AX N/",
1079
+ "[TUR]#=/CH ER/",
1080
+ "[TU]A=/CH UW/",
1081
+ "[TZ]=/T S/",
1082
+ "[TSCH]=/CH/",
1083
+ "#:[TOWN] =/T AW N/",
1084
+ "#:[TIME] =/T AY M/",
1085
+ "[TCH]=/CH/",
1086
+ "#:[TTED] =/T IH D/",
1087
+ "[TT]=/T/",
1088
+ "[T]=/T/"
1089
+ ],
1090
+ U: [
1091
+ "[UCCIO] =/UW CH IY OW/",
1092
+ "[UCCI] =/UW CH IY/",
1093
+ "[ULLO] =/UW L OW/",
1094
+ "[UZZI] =/UW T S IY/",
1095
+ "[ULLI] =/UW L IY/",
1096
+ "[USSO] =/UW S OW/",
1097
+ " [USE] =/Y UW S/",
1098
+ "IN[U]TE=/AH/",
1099
+ "TR[U]TH=/UW/",
1100
+ "P[U]LL=/UH/",
1101
+ "B[U]LL=/UH/",
1102
+ "P[U]SH=/UH/",
1103
+ " [UGH] =/AH G/",
1104
+ " [UN]IN=/AH N/",
1105
+ " [UN]I=/Y UW N/",
1106
+ " [UN]#=/AH N/",
1107
+ " [UN]^=/AH N/",
1108
+ "[UIT]=/UW T/",
1109
+ "[URR]=/ER/",
1110
+ "@[UR]#=/UH R/",
1111
+ "[UR]#=/Y UH R/",
1112
+ "[UR]=/ER/",
1113
+ "[U]^ =/AH/",
1114
+ "C[U]SH=/UH/",
1115
+ "B[U]SH=/UH/",
1116
+ "P[U]NI=/AH/",
1117
+ "[U]^^=/AH/",
1118
+ "[UY]=/AY/",
1119
+ " G[U]#=/ /",
1120
+ "G[U]%=/ /",
1121
+ "G[U]#=/W/",
1122
+ "#N[U]=/Y UW/",
1123
+ "[ULAT]=/Y AX L EY T/",
1124
+ "[ULOUS]=/Y AX L AH S/",
1125
+ "[ULUS]=/Y AX L AH S/",
1126
+ "[ULAR]=/Y AX L ER/",
1127
+ "ST[U]DY=/AH/",
1128
+ "@[U]=/UW/",
1129
+ "[U] =/UW/",
1130
+ "[U]=/Y UW/"
1131
+ ],
1132
+ V: [" [VON] =/V AO N/", "[VIEW]=/V Y UW/", "[VIOL]=/V AY AH L/", "[V]=/V/"],
1133
+ W: [
1134
+ " [WANTED]=/W AO N T IH D/",
1135
+ "[WATER]=/W AO T ER/",
1136
+ " [WON] =/W AH N/",
1137
+ "[WA]STE=/W EY/",
1138
+ "[WA]S=/W AA/",
1139
+ "[WA]T=/W AA/",
1140
+ "[WA]N=/W AA/",
1141
+ "[WA]M=/W AA/",
1142
+ "[WHERE]=/WH EH R/",
1143
+ "[WHAT]=/WH AH T/",
1144
+ "[WHOL]=/HH OW L/",
1145
+ "[WHO]=/HH UW/",
1146
+ "[WH]=/WH/",
1147
+ "#:[WARDS] =/W ER D Z/",
1148
+ "#:[WARD] =/W ER D/",
1149
+ "[WAR]=/W AO R/",
1150
+ "[WOR]^=/W ER/",
1151
+ " [WOW]=/W AW/",
1152
+ "[WR]=/R/",
1153
+ "[W]=/W/"
1154
+ ],
1155
+ X: [" [X]#=/Z/", "[X]=/K S/"],
1156
+ Y: [
1157
+ "[YOUR]=/Y AO R/",
1158
+ "[YEAH] =/Y AE/",
1159
+ " [YET] =/Y EH T/",
1160
+ " [YES] =/Y EH S/",
1161
+ "H[Y]DR=/AY/",
1162
+ "H[Y]PE=/AY/",
1163
+ "#^:[Y] =/IY/",
1164
+ "#^:[Y]I=/IY/",
1165
+ " :[Y] =/AY/",
1166
+ " [YOU] =/Y UW/",
1167
+ " [Y]#=/Y/",
1168
+ " :[Y]#=/AY/",
1169
+ " :[Y]^+:#=/IH/",
1170
+ " :[Y]^#=/AY/",
1171
+ "[Y] =/IY/",
1172
+ "AW[Y]ER=/Y/",
1173
+ "DB[Y]E=/AY/",
1174
+ "[Y]=/IH/"
1175
+ ],
1176
+ Z: [" [ZERO]=/Z IH R OW/", "[ZZ]=/Z/", "[Z]=/Z/"]
1177
+ };
1178
+ var VOWELS = "AEIOUY";
1179
+ var CONSONANTS = "BCDFGHJKLMNPQRSTVWXZ";
1180
+ var CLASSES = {
1181
+ "#": `[${VOWELS}]+`,
1182
+ "%": "(?:ER|E|ES|ED|ING|ELY)",
1183
+ "&": "(?:S|C|G|Z|X|J|CH|SH)",
1184
+ "+": "[EIY]",
1185
+ ".": "[BDVGJLMNRWZ]",
1186
+ ":": `[${CONSONANTS}]*`,
1187
+ "@": "(?:T|S|R|D|L|Z|N|J|TH|CH|SH)",
1188
+ "^": `[${CONSONANTS}]`
1189
+ };
1190
+ var SPECIAL_CHARS = new Set(Object.keys(CLASSES));
1191
+ var NRL_VOWELS = /* @__PURE__ */ new Set([
1192
+ "AA",
1193
+ "AE",
1194
+ "AH",
1195
+ "AO",
1196
+ "AW",
1197
+ "AY",
1198
+ "EH",
1199
+ "ER",
1200
+ "EY",
1201
+ "IH",
1202
+ "IY",
1203
+ "OW",
1204
+ "OY",
1205
+ "UH",
1206
+ "UW"
1207
+ ]);
1208
+ function compileRule(ruleStr) {
1209
+ const m = /^([^[]*)\[([^\]]+)\]([^=]*)=\/(.*)\/$/.exec(ruleStr);
1210
+ if (m === null) {
1211
+ return null;
1212
+ }
1213
+ const leftCtx = m[1];
1214
+ const target = m[2];
1215
+ const rightCtx = m[3];
1216
+ const phonemeStr = m[4];
1217
+ const leftPattern = expandContext(leftCtx);
1218
+ const leftRe = leftPattern.length > 0 ? new RegExp(leftPattern + "$") : null;
1219
+ const rightPattern = expandContext(rightCtx);
1220
+ const rightRe = new RegExp(escapeRegex(target) + rightPattern, "y");
1221
+ const phonemes = phonemeStr.trim().split(/\s+/).filter((p) => p.length > 0).map((p) => nrlToArpabet(p));
1222
+ const fullMatch = isLiteralContext(rightCtx) ? target + rightCtx : null;
1223
+ const leftLiteral = leftCtx.length > 0 && isLiteralContext(leftCtx) ? leftCtx : null;
1224
+ return {
1225
+ fullMatch,
1226
+ leftLiteral,
1227
+ leftRe,
1228
+ phonemes,
1229
+ rightRe,
1230
+ ruleStr,
1231
+ target,
1232
+ targetLen: target.length
1233
+ };
1234
+ }
1235
+ function escapeRegex(s) {
1236
+ return s.replaceAll(/[.*+?^${}()|[\]\\]/g, String.raw`\$&`);
1237
+ }
1238
+ function expandContext(ctx) {
1239
+ let result = "";
1240
+ for (const ch of ctx) {
1241
+ result += SPECIAL_CHARS.has(ch) ? CLASSES[ch] : escapeRegex(ch);
1242
+ }
1243
+ return result;
1244
+ }
1245
+ function isLiteralContext(ctx) {
1246
+ for (const ch of ctx) {
1247
+ if (SPECIAL_CHARS.has(ch)) {
1248
+ return false;
1249
+ }
1250
+ }
1251
+ return true;
1252
+ }
1253
+ function nrlToArpabet(phoneme) {
1254
+ if (phoneme === "AX") {
1255
+ return "AH0";
1256
+ }
1257
+ if (phoneme === "NX") {
1258
+ return "NG";
1259
+ }
1260
+ if (phoneme === "WH") {
1261
+ return "W";
1262
+ }
1263
+ if (NRL_VOWELS.has(phoneme)) {
1264
+ return phoneme + "1";
1265
+ }
1266
+ return phoneme;
1267
+ }
1268
+ var COMPILED_RULES = {};
1269
+ for (const [letter, rules] of Object.entries(NRL_RULES)) {
1270
+ const compiled = [];
1271
+ for (const rule of rules) {
1272
+ const c = compileRule(rule);
1273
+ if (c !== null) {
1274
+ compiled.push(c);
1275
+ }
1276
+ }
1277
+ COMPILED_RULES[letter] = compiled;
1278
+ }
1279
+ function wordToArpabet(word) {
1280
+ return wordToArpabetTraced(word).phonemes;
1281
+ }
1282
+ function wordToArpabetTraced(word) {
1283
+ const text = " " + word.toUpperCase() + " ";
1284
+ const rawPhonemes = [];
1285
+ const rawSteps = [];
1286
+ let pos = 1;
1287
+ while (pos < text.length - 1) {
1288
+ const ch = text[pos];
1289
+ const rules = COMPILED_RULES[ch];
1290
+ if (rules === void 0) {
1291
+ pos++;
1292
+ } else {
1293
+ let matched = false;
1294
+ let parsed = null;
1295
+ for (const rule of rules) {
1296
+ if (rule.fullMatch === null) {
1297
+ if (rule.targetLen > 1 && !text.startsWith(rule.target, pos)) {
1298
+ continue;
1299
+ }
1300
+ rule.rightRe.lastIndex = pos;
1301
+ if (rule.leftRe !== null) {
1302
+ parsed ?? (parsed = text.slice(0, pos));
1303
+ if (!rule.leftRe.test(parsed)) {
1304
+ continue;
1305
+ }
1306
+ }
1307
+ if (!rule.rightRe.test(text)) {
1308
+ continue;
1309
+ }
1310
+ } else {
1311
+ if (!text.startsWith(rule.fullMatch, pos)) {
1312
+ continue;
1313
+ }
1314
+ if (rule.leftRe !== null) {
1315
+ if (rule.leftLiteral === null) {
1316
+ parsed ?? (parsed = text.slice(0, pos));
1317
+ if (!rule.leftRe.test(parsed)) {
1318
+ continue;
1319
+ }
1320
+ } else if (!text.endsWith(rule.leftLiteral, pos)) {
1321
+ continue;
1322
+ }
1323
+ }
1324
+ }
1325
+ rawPhonemes.push(...rule.phonemes);
1326
+ rawSteps.push({
1327
+ count: rule.phonemes.length,
1328
+ letters: rule.target,
1329
+ ruleStr: rule.ruleStr
1330
+ });
1331
+ pos += rule.targetLen;
1332
+ matched = true;
1333
+ break;
1334
+ }
1335
+ if (!matched) {
1336
+ pos++;
1337
+ }
1338
+ }
1339
+ }
1340
+ const phonemes = applyStressPrediction(word, rawPhonemes);
1341
+ const steps = [];
1342
+ let offset = 0;
1343
+ for (const raw of rawSteps) {
1344
+ steps.push({
1345
+ letters: raw.letters,
1346
+ phonemes: phonemes.slice(offset, offset + raw.count),
1347
+ rule: raw.ruleStr
1348
+ });
1349
+ offset += raw.count;
1350
+ }
1351
+ return { phonemes, steps };
1352
+ }
1353
+ function wordToPhonetic(word, format = "ingglish") {
1354
+ const arpabet = wordToArpabet(word);
1355
+ return arpabetToFormat(arpabet, format);
1356
+ }
1357
+ export {
1358
+ applyStressPrediction,
1359
+ wordToArpabet,
1360
+ wordToArpabetTraced,
1361
+ wordToPhonetic
1362
+ };