@ingglish/fallback 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,654 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ KNOWN_INITIALISMS: () => KNOWN_INITIALISMS,
24
+ LETTER_PHONEMES: () => LETTER_PHONEMES,
25
+ diagnoseUnknown: () => diagnoseUnknown,
26
+ isInitialism: () => isInitialism,
27
+ matchBritish: () => matchBritish,
28
+ parseInitialismWithSuffix: () => parseInitialismWithSuffix,
29
+ translateAsAcronym: () => translateAsAcronym,
30
+ translateUnknown: () => translateUnknown
31
+ });
32
+ module.exports = __toCommonJS(index_exports);
33
+ var import_dictionary4 = require("@ingglish/dictionary");
34
+ var import_g2p = require("@ingglish/g2p");
35
+ var import_phonemes5 = require("@ingglish/phonemes");
36
+
37
+ // src/acronyms.ts
38
+ var import_phonemes = require("@ingglish/phonemes");
39
+ var INITIALISM_EXPANSIONS = {
40
+ ac: ["alternating", "current"],
41
+ ad: ["anno", "domini"],
42
+ afk: ["away", "from", "keyboard"],
43
+ // AI / ML
44
+ ai: ["artificial", "intelligence"],
45
+ aka: ["also", "known", "as"],
46
+ am: ["ante", "meridiem"],
47
+ api: ["application", "programming", "interface"],
48
+ asap: ["as", "soon", "as", "possible"],
49
+ atm: ["automated", "teller", "machine"],
50
+ aws: ["amazon", "web", "services"],
51
+ b2b: ["business", "to", "business"],
52
+ b2c: ["business", "to", "consumer"],
53
+ bc: ["before", "christ"],
54
+ bdd: ["behavior", "driven", "development"],
55
+ brb: ["be", "right", "back"],
56
+ btw: ["by", "the", "way"],
57
+ byob: ["bring", "your", "own", "bottle"],
58
+ cd: ["continuous", "deployment"],
59
+ cdn: ["content", "delivery", "network"],
60
+ // Business / titles
61
+ ceo: ["chief", "executive", "officer"],
62
+ cfo: ["chief", "financial", "officer"],
63
+ ci: ["continuous", "integration"],
64
+ cia: ["central", "intelligence", "agency"],
65
+ cio: ["chief", "information", "officer"],
66
+ cli: ["command", "line", "interface"],
67
+ cmo: ["chief", "marketing", "officer"],
68
+ cms: ["content", "management", "system"],
69
+ cnn: ["convolutional", "neural", "network"],
70
+ coo: ["chief", "operating", "officer"],
71
+ cors: ["cross", "origin", "resource", "sharing"],
72
+ cpr: ["cardiopulmonary", "resuscitation"],
73
+ cpu: ["central", "processing", "unit"],
74
+ crm: ["customer", "relationship", "management"],
75
+ crud: ["create", "read", "update", "delete"],
76
+ cso: ["chief", "security", "officer"],
77
+ css: ["cascading", "style", "sheets"],
78
+ csv: ["comma", "separated", "values"],
79
+ cto: ["chief", "technology", "officer"],
80
+ dc: ["direct", "current"],
81
+ ddos: ["distributed", "denial", "of", "service"],
82
+ dea: ["drug", "enforcement", "administration"],
83
+ diy: ["do", "it", "yourself"],
84
+ dj: ["disc", "jockey"],
85
+ dmv: ["department", "of", "motor", "vehicles"],
86
+ // Medical / science
87
+ dna: ["deoxyribonucleic", "acid"],
88
+ dns: ["domain", "name", "system"],
89
+ eod: ["end", "of", "day"],
90
+ epa: ["environmental", "protection", "agency"],
91
+ er: ["emergency", "room"],
92
+ erp: ["enterprise", "resource", "planning"],
93
+ eta: ["estimated", "time", "of", "arrival"],
94
+ etl: ["extract", "transform", "load"],
95
+ eu: ["european", "union"],
96
+ faq: ["frequently", "asked", "questions"],
97
+ // Government / organizations
98
+ fbi: ["federal", "bureau", "of", "investigation"],
99
+ fda: ["food", "and", "drug", "administration"],
100
+ ftp: ["file", "transfer", "protocol"],
101
+ fyi: ["for", "your", "information"],
102
+ gan: ["generative", "adversarial", "network"],
103
+ gcp: ["google", "cloud", "platform"],
104
+ gif: ["graphics", "interchange", "format"],
105
+ gps: ["global", "positioning", "system"],
106
+ gpt: ["generative", "pre-trained", "transformer"],
107
+ gpu: ["graphics", "processing", "unit"],
108
+ gui: ["graphical", "user", "interface"],
109
+ hdd: ["hard", "disk", "drive"],
110
+ hiv: ["human", "immunodeficiency", "virus"],
111
+ hr: ["human", "resources"],
112
+ html: ["hypertext", "markup", "language"],
113
+ http: ["hypertext", "transfer", "protocol"],
114
+ https: ["hypertext", "transfer", "protocol", "secure"],
115
+ iaas: ["infrastructure", "as", "a", "service"],
116
+ icu: ["intensive", "care", "unit"],
117
+ // General / common
118
+ id: ["identification"],
119
+ ide: ["integrated", "development", "environment"],
120
+ idk: ["i", "don't", "know"],
121
+ imo: ["in", "my", "opinion"],
122
+ io: ["input", "output"],
123
+ ip: ["internet", "protocol"],
124
+ iq: ["intelligence", "quotient"],
125
+ irs: ["internal", "revenue", "service"],
126
+ isp: ["internet", "service", "provider"],
127
+ it: ["information", "technology"],
128
+ jpeg: ["joint", "photographic", "experts", "group"],
129
+ // Media
130
+ jpg: ["joint", "photographic", "experts", "group"],
131
+ json: ["javascript", "object", "notation"],
132
+ jwt: ["json", "web", "token"],
133
+ kpi: ["key", "performance", "indicator"],
134
+ // Networking
135
+ lan: ["local", "area", "network"],
136
+ lcd: ["liquid", "crystal", "display"],
137
+ led: ["light", "emitting", "diode"],
138
+ llm: ["large", "language", "model"],
139
+ mc: ["master", "of", "ceremonies"],
140
+ // Security
141
+ mfa: ["multi", "factor", "authentication"],
142
+ ml: ["machine", "learning"],
143
+ mp3: ["moving", "picture", "experts", "group", "audio", "layer"],
144
+ mp4: ["moving", "picture", "experts", "group"],
145
+ mph: ["miles", "per", "hour"],
146
+ mri: ["magnetic", "resonance", "imaging"],
147
+ mvp: ["minimum", "viable", "product"],
148
+ nasa: ["national", "aeronautics", "space", "administration"],
149
+ // Acronyms pronounced as words (pass through unchanged like initialisms)
150
+ nato: ["north", "atlantic", "treaty", "organization"],
151
+ nda: ["non", "disclosure", "agreement"],
152
+ nic: ["network", "interface", "card"],
153
+ nlp: ["natural", "language", "processing"],
154
+ nosql: ["not", "only", "sql"],
155
+ nsa: ["national", "security", "agency"],
156
+ nsfw: ["not", "safe", "for", "work"],
157
+ nyc: ["new", "york", "city"],
158
+ omg: ["oh", "my", "god"],
159
+ ooo: ["out", "of", "office"],
160
+ // Development
161
+ oop: ["object", "oriented", "programming"],
162
+ orm: ["object", "relational", "mapping"],
163
+ os: ["operating", "system"],
164
+ otp: ["one", "time", "password"],
165
+ paas: ["platform", "as", "a", "service"],
166
+ pc: ["personal", "computer"],
167
+ pdf: ["portable", "document", "format"],
168
+ php: ["hypertext", "preprocessor"],
169
+ pm: ["post", "meridiem"],
170
+ png: ["portable", "network", "graphics"],
171
+ pov: ["point", "of", "view"],
172
+ pr: ["public", "relations"],
173
+ pto: ["paid", "time", "off"],
174
+ qa: ["quality", "assurance"],
175
+ rag: ["retrieval", "augmented", "generation"],
176
+ ram: ["random", "access", "memory"],
177
+ rfp: ["request", "for", "proposal"],
178
+ rip: ["rest", "in", "peace"],
179
+ rna: ["ribonucleic", "acid"],
180
+ rnn: ["recurrent", "neural", "network"],
181
+ roi: ["return", "on", "investment"],
182
+ rom: ["read", "only", "memory"],
183
+ rpm: ["revolutions", "per", "minute"],
184
+ rsvp: ["please", "respond"],
185
+ rv: ["recreational", "vehicle"],
186
+ // Cloud / services
187
+ saas: ["software", "as", "a", "service"],
188
+ sdk: ["software", "development", "kit"],
189
+ sftp: ["secure", "file", "transfer", "protocol"],
190
+ sla: ["service", "level", "agreement"],
191
+ // Database
192
+ sql: ["structured", "query", "language"],
193
+ ssd: ["solid", "state", "drive"],
194
+ ssh: ["secure", "shell"],
195
+ ssl: ["secure", "sockets", "layer"],
196
+ suv: ["sport", "utility", "vehicle"],
197
+ svg: ["scalable", "vector", "graphics"],
198
+ tba: ["to", "be", "announced"],
199
+ tbd: ["to", "be", "determined"],
200
+ tcp: ["transmission", "control", "protocol"],
201
+ tdd: ["test", "driven", "development"],
202
+ tldr: ["too", "long", "didn't", "read"],
203
+ tls: ["transport", "layer", "security"],
204
+ tv: ["television"],
205
+ uat: ["user", "acceptance", "testing"],
206
+ udp: ["user", "datagram", "protocol"],
207
+ // User interface / experience
208
+ ui: ["user", "interface"],
209
+ uk: ["united", "kingdom"],
210
+ un: ["united", "nations"],
211
+ uri: ["uniform", "resource", "identifier"],
212
+ // Tech / web
213
+ url: ["uniform", "resource", "locator"],
214
+ us: ["united", "states"],
215
+ usa: ["united", "states", "of", "america"],
216
+ // Hardware
217
+ usb: ["universal", "serial", "bus"],
218
+ uv: ["ultraviolet"],
219
+ ux: ["user", "experience"],
220
+ vp: ["vice", "president"],
221
+ vpn: ["virtual", "private", "network"],
222
+ wan: ["wide", "area", "network"],
223
+ xml: ["extensible", "markup", "language"],
224
+ xss: ["cross", "site", "scripting"]
225
+ };
226
+ var MAX_INITIALISM_LENGTH = 5;
227
+ var LETTER_PHONEMES = {
228
+ a: ["EY1"],
229
+ b: ["B", "IY1"],
230
+ c: ["S", "IY1"],
231
+ d: ["D", "IY1"],
232
+ e: ["IY1"],
233
+ f: ["EH1", "F"],
234
+ g: ["JH", "IY1"],
235
+ h: ["EY1", "CH"],
236
+ i: ["AY1"],
237
+ j: ["JH", "EY1"],
238
+ k: ["K", "EY1"],
239
+ l: ["EH1", "L"],
240
+ m: ["EH1", "M"],
241
+ n: ["EH1", "N"],
242
+ o: ["OW1"],
243
+ p: ["P", "IY1"],
244
+ q: ["K", "Y", "UW1"],
245
+ r: ["AA1", "R"],
246
+ s: ["EH1", "S"],
247
+ t: ["T", "IY1"],
248
+ u: ["Y", "UW1"],
249
+ v: ["V", "IY1"],
250
+ w: ["D", "AH1", "B", "AH0", "L", "Y", "UW0"],
251
+ x: ["EH1", "K", "S"],
252
+ y: ["W", "AY1"],
253
+ z: ["Z", "IY1"]
254
+ };
255
+ var KNOWN_INITIALISMS = new Set(Object.keys(INITIALISM_EXPANSIONS));
256
+ function isInitialism(word) {
257
+ if (word.length > MAX_INITIALISM_LENGTH) {
258
+ return false;
259
+ }
260
+ return KNOWN_INITIALISMS.has(word.toLowerCase());
261
+ }
262
+ var INITIALISM_SUFFIXES = ["'s", "s"];
263
+ function parseInitialismWithSuffix(word) {
264
+ for (const suffix of INITIALISM_SUFFIXES) {
265
+ if (word.length > suffix.length && word.endsWith(suffix)) {
266
+ const base = word.slice(0, -suffix.length);
267
+ if (isInitialism(base)) {
268
+ return { base, suffix };
269
+ }
270
+ }
271
+ }
272
+ return null;
273
+ }
274
+ function translateAsAcronym(word, format = "ingglish") {
275
+ const arpabet = [];
276
+ for (const char of word.toLowerCase()) {
277
+ const letterArpabet = LETTER_PHONEMES[char];
278
+ if (letterArpabet !== void 0) {
279
+ arpabet.push(...letterArpabet);
280
+ }
281
+ }
282
+ return (0, import_phonemes.arpabetToFormat)(arpabet, format);
283
+ }
284
+
285
+ // src/british.ts
286
+ var import_dictionary = require("@ingglish/dictionary");
287
+ var import_phonemes2 = require("@ingglish/phonemes");
288
+ var BRITISH_TO_AMERICAN = [
289
+ // -isation → -ization (must come before -ise)
290
+ { pattern: /isation$/, replacement: "ization" },
291
+ // -ise → -ize (realise→realize, organise→organize)
292
+ { pattern: /ise$/, replacement: "ize" },
293
+ // -our → -or (colour→color, favour→favor)
294
+ { pattern: /our$/, replacement: "or" },
295
+ // -oured → -ored (coloured→colored, favoured→favored)
296
+ { pattern: /oured$/, replacement: "ored" },
297
+ // -ouring → -oring (colouring→coloring)
298
+ { pattern: /ouring$/, replacement: "oring" },
299
+ // -ourable → -orable (favourable→favorable)
300
+ { pattern: /ourable$/, replacement: "orable" },
301
+ // -re → -er (centre→center, theatre→theater)
302
+ // Only after consonants to avoid matching normal -re words
303
+ { pattern: /([a-z])re$/, replacement: "$1er" },
304
+ // -lled → -led (travelled→traveled, cancelled→canceled)
305
+ { pattern: /lled$/, replacement: "led" },
306
+ // -lling → -ling (travelling→traveling, cancelling→canceling)
307
+ { pattern: /lling$/, replacement: "ling" },
308
+ // -ller → -ler (traveller→traveler)
309
+ { pattern: /ller$/, replacement: "ler" },
310
+ // -ence → -ense (defence→defense, offence→offense)
311
+ { pattern: /ence$/, replacement: "ense" },
312
+ // -ogue → -og (catalogue→catalog, dialogue→dialog)
313
+ { pattern: /ogue$/, replacement: "og" },
314
+ // -ae- → -e- (anaesthetic→anesthetic, paediatric→pediatric)
315
+ { pattern: /ae/, replacement: "e" },
316
+ // -oe- → -e- (foetus→fetus, oestrogen→estrogen)
317
+ { pattern: /oe/, replacement: "e" },
318
+ // -ey → -y (curtsey→curtsy)
319
+ { pattern: /ey$/, replacement: "y" },
320
+ // grey → gray
321
+ { pattern: /grey/, replacement: "gray" }
322
+ ];
323
+ function matchBritish(word) {
324
+ const lower = word.toLowerCase();
325
+ for (const { pattern, replacement } of BRITISH_TO_AMERICAN) {
326
+ if (pattern.test(lower)) {
327
+ const american = lower.replace(pattern, replacement);
328
+ if (american !== lower) {
329
+ const phonemes = (0, import_dictionary.lookupPronunciation)(american);
330
+ if (phonemes) {
331
+ return { american, phonemes };
332
+ }
333
+ }
334
+ }
335
+ }
336
+ return null;
337
+ }
338
+ function translateAsBritish(word, format = "ingglish") {
339
+ const match = matchBritish(word);
340
+ if (match === null) {
341
+ return null;
342
+ }
343
+ return (0, import_phonemes2.arpabetToFormat)(match.phonemes, format);
344
+ }
345
+
346
+ // src/compounds.ts
347
+ var import_dictionary2 = require("@ingglish/dictionary");
348
+ var import_phonemes3 = require("@ingglish/phonemes");
349
+ function capitalize(str) {
350
+ if (str.length === 0) {
351
+ return str;
352
+ }
353
+ return str.charAt(0).toUpperCase() + str.slice(1);
354
+ }
355
+ function isUpperCase(char) {
356
+ return char === char.toUpperCase() && char !== char.toLowerCase();
357
+ }
358
+ var MIN_PART_LENGTH = 3;
359
+ var MIN_PART_FREQUENCY = 500;
360
+ var MAX_PART_LENGTH = 15;
361
+ function dpDecompose(word) {
362
+ const n = word.length;
363
+ const dp = Array.from({
364
+ length: n + 1
365
+ });
366
+ dp[0] = { parts: [], score: 0 };
367
+ for (let i = MIN_PART_LENGTH; i <= n; i++) {
368
+ for (let j = Math.max(0, i - MAX_PART_LENGTH); j <= i - MIN_PART_LENGTH; j++) {
369
+ if (j === 0 && i === n) {
370
+ continue;
371
+ }
372
+ const prev = dp[j];
373
+ if (prev === void 0) {
374
+ continue;
375
+ }
376
+ const chunk = word.slice(j, i);
377
+ const phonemes = lookupWord(chunk);
378
+ if (!phonemes) {
379
+ continue;
380
+ }
381
+ const freq = (0, import_dictionary2.getWordFrequency)(chunk);
382
+ if (freq === void 0 || freq < MIN_PART_FREQUENCY) {
383
+ continue;
384
+ }
385
+ const newScore = prev.score + freq;
386
+ const newParts = prev.parts.length + 1;
387
+ const current = dp[i];
388
+ if (current === void 0 || newParts < current.parts.length || newParts === current.parts.length && newScore > current.score) {
389
+ dp[i] = { parts: [...prev.parts, chunk], score: newScore };
390
+ }
391
+ }
392
+ }
393
+ const result = dp[n];
394
+ if (result === void 0 || result.parts.length < 2) {
395
+ return null;
396
+ }
397
+ return result.parts;
398
+ }
399
+ function translateAsCompound(word, format = "ingglish") {
400
+ const lowerWord = word.toLowerCase();
401
+ if (lowerWord.length < 6) {
402
+ return null;
403
+ }
404
+ const parts = dpDecompose(lowerWord);
405
+ if (!parts) {
406
+ return null;
407
+ }
408
+ const translations = [];
409
+ let pos = 0;
410
+ for (const part of parts) {
411
+ const phonemes = lookupWord(part);
412
+ if (!phonemes) {
413
+ return null;
414
+ }
415
+ let translated = (0, import_phonemes3.arpabetToFormat)(phonemes, format);
416
+ if ((0, import_phonemes3.getFormatPreservesCase)(format)) {
417
+ const originalPart = word.slice(pos, pos + part.length);
418
+ if (originalPart.length > 0 && isUpperCase(originalPart[0])) {
419
+ translated = capitalize(translated);
420
+ }
421
+ }
422
+ translations.push(translated);
423
+ pos += part.length;
424
+ }
425
+ return translations.join((0, import_phonemes3.getFormatJoinSeparator)(format));
426
+ }
427
+ function lookupWord(word) {
428
+ return (0, import_dictionary2.lookupPronunciation)(word);
429
+ }
430
+
431
+ // src/stemming.ts
432
+ var import_dictionary3 = require("@ingglish/dictionary");
433
+ var import_phonemes4 = require("@ingglish/phonemes");
434
+ var VOICELESS = /* @__PURE__ */ new Set(["CH", "F", "HH", "K", "P", "S", "SH", "T", "TH"]);
435
+ var SIBILANTS = /* @__PURE__ */ new Set(["CH", "JH", "S", "SH", "Z", "ZH"]);
436
+ function selectEdPhonemes(lastPhoneme) {
437
+ const base = (0, import_phonemes4.stripStress)(lastPhoneme);
438
+ if (base === "T" || base === "D") {
439
+ return ["IH0", "D"];
440
+ }
441
+ if (VOICELESS.has(base)) {
442
+ return ["T"];
443
+ }
444
+ return ["D"];
445
+ }
446
+ function selectSPhonemes(lastPhoneme) {
447
+ const base = (0, import_phonemes4.stripStress)(lastPhoneme);
448
+ if (SIBILANTS.has(base)) {
449
+ return ["IH0", "Z"];
450
+ }
451
+ if (VOICELESS.has(base)) {
452
+ return ["S"];
453
+ }
454
+ return ["Z"];
455
+ }
456
+ var INFLECTIONAL_SUFFIXES = /* @__PURE__ */ new Set([
457
+ "ed",
458
+ "er",
459
+ "es",
460
+ "est",
461
+ "ification",
462
+ "ify",
463
+ "ifying",
464
+ "ing",
465
+ "or",
466
+ "s"
467
+ ]);
468
+ var SUFFIX_PHONEMES = [
469
+ // Long suffixes first (must come before shorter matches: -ification before -tion, -ifying before -ing)
470
+ { phonemes: ["IH0", "F", "IH0", "K", "EY1", "SH", "AH0", "N"], suffix: "ification" },
471
+ { phonemes: ["IH0", "F", "AY1", "IH0", "NG"], suffix: "ifying" },
472
+ { phonemes: ["IH0", "F", "AY1"], suffix: "ify" },
473
+ // Verb suffixes
474
+ { phonemes: ["IH0", "NG"], suffix: "ing" },
475
+ { phonemes: null, suffix: "ed" },
476
+ // allomorph: T/D/IH0 D (selected dynamically)
477
+ { phonemes: null, suffix: "es" },
478
+ // allomorph: same as -s (S/Z/IH0 Z based on stem)
479
+ { phonemes: null, suffix: "s" },
480
+ // allomorph: S/Z/IH0 Z (selected dynamically)
481
+ // Noun suffixes
482
+ { phonemes: ["SH", "AH0", "N"], suffix: "tion" },
483
+ { phonemes: ["ZH", "AH0", "N"], suffix: "sion" },
484
+ { phonemes: ["N", "AH0", "S"], suffix: "ness" },
485
+ { phonemes: ["M", "AH0", "N", "T"], suffix: "ment" },
486
+ { phonemes: ["IH0", "T", "IY0"], suffix: "ity" },
487
+ { phonemes: ["ER0"], suffix: "er" },
488
+ { phonemes: ["ER0"], suffix: "or" },
489
+ { phonemes: ["IH0", "S", "T"], suffix: "ist" },
490
+ { phonemes: ["IH0", "Z", "AH0", "M"], suffix: "ism" },
491
+ // Adjective suffixes
492
+ { phonemes: ["L", "IY0"], suffix: "ly" },
493
+ { phonemes: ["F", "AH0", "L"], suffix: "ful" },
494
+ { phonemes: ["L", "AH0", "S"], suffix: "less" },
495
+ { phonemes: ["AH0", "B", "AH0", "L"], suffix: "able" },
496
+ { phonemes: ["AH0", "B", "AH0", "L"], suffix: "ible" },
497
+ { phonemes: ["AH0", "S"], suffix: "ous" },
498
+ { phonemes: ["IH0", "V"], suffix: "ive" },
499
+ { phonemes: ["AH0", "L"], suffix: "al" },
500
+ { phonemes: ["IH0", "K"], suffix: "ic" },
501
+ // Comparative/superlative
502
+ { phonemes: ["AH0", "S", "T"], suffix: "est" },
503
+ // Additional suffixes
504
+ { phonemes: ["AH0", "L", "IY0"], suffix: "ally" },
505
+ { phonemes: ["AA1", "L", "AH0", "JH", "IY0"], suffix: "ology" },
506
+ { phonemes: ["AY1", "Z"], suffix: "ize" },
507
+ { phonemes: ["AY1", "Z"], suffix: "ise" }
508
+ ];
509
+ var PREFIX_PHONEMES = [
510
+ { phonemes: ["AH0", "N"], prefix: "un" },
511
+ { phonemes: ["R", "IY0"], prefix: "re" },
512
+ { phonemes: ["P", "R", "IY0"], prefix: "pre" },
513
+ { phonemes: ["D", "IH0", "S"], prefix: "dis" },
514
+ { phonemes: ["M", "IH0", "S"], prefix: "mis" },
515
+ { phonemes: ["OW1", "V", "ER0"], prefix: "over" },
516
+ { phonemes: ["AH1", "N", "D", "ER0"], prefix: "under" },
517
+ { phonemes: ["AW1", "T"], prefix: "out" },
518
+ { phonemes: ["AE1", "N", "T", "IY0"], prefix: "anti" },
519
+ { phonemes: ["S", "UW1", "P", "ER0"], prefix: "super" }
520
+ ];
521
+ function matchStemming(word) {
522
+ const lowerWord = word.toLowerCase();
523
+ for (const { phonemes: suffixArpabet, suffix } of SUFFIX_PHONEMES) {
524
+ if (lowerWord.endsWith(suffix) && lowerWord.length > suffix.length + 2) {
525
+ const stem = lowerWord.slice(0, -suffix.length);
526
+ for (const variant of getStemVariants(stem, suffix)) {
527
+ const baseArpabet = (0, import_dictionary3.lookupPronunciation)(variant);
528
+ if (baseArpabet) {
529
+ const resolvedSuffix = resolveSuffixPhonemes(suffix, suffixArpabet, baseArpabet);
530
+ return {
531
+ phonemes: [...baseArpabet, ...resolvedSuffix],
532
+ stem: variant,
533
+ suffix
534
+ };
535
+ }
536
+ }
537
+ }
538
+ }
539
+ for (const { phonemes: prefixArpabet, prefix } of PREFIX_PHONEMES) {
540
+ if (lowerWord.startsWith(prefix) && lowerWord.length > prefix.length + 2) {
541
+ const stem = lowerWord.slice(prefix.length);
542
+ const baseArpabet = (0, import_dictionary3.lookupPronunciation)(stem);
543
+ if (baseArpabet) {
544
+ return {
545
+ phonemes: [...prefixArpabet, ...baseArpabet],
546
+ prefix,
547
+ stem
548
+ };
549
+ }
550
+ }
551
+ }
552
+ return null;
553
+ }
554
+ function translateWithStemming(word, format = "ingglish") {
555
+ const match = matchStemming(word);
556
+ if (match === null) {
557
+ return null;
558
+ }
559
+ return (0, import_phonemes4.arpabetToFormat)(match.phonemes, format);
560
+ }
561
+ function getStemVariants(stem, suffix) {
562
+ const variants = [stem];
563
+ if (INFLECTIONAL_SUFFIXES.has(suffix)) {
564
+ variants.push(
565
+ stem + "e",
566
+ // hoping -> hope
567
+ stem.length > 1 ? stem.slice(0, -1) : stem,
568
+ // running -> run (double consonant)
569
+ stem.length > 0 ? stem + stem.at(-1) : stem
570
+ // big -> bigg (for adding -er)
571
+ );
572
+ }
573
+ if (stem.endsWith("i")) {
574
+ variants.push(stem.slice(0, -1) + "y");
575
+ }
576
+ variants.push(stem + "y");
577
+ return variants;
578
+ }
579
+ function resolveSuffixPhonemes(suffix, suffixArpabet, baseArpabet) {
580
+ if (suffixArpabet !== null) {
581
+ return suffixArpabet;
582
+ }
583
+ const lastPhoneme = baseArpabet.at(-1);
584
+ if (suffix === "ed") {
585
+ return selectEdPhonemes(lastPhoneme);
586
+ }
587
+ return selectSPhonemes(lastPhoneme);
588
+ }
589
+
590
+ // src/index.ts
591
+ function diagnoseUnknown(word) {
592
+ if (/(.)\1\1/.test(word) || !/[aeiouy]/i.test(word)) {
593
+ return null;
594
+ }
595
+ const { strategy } = translateUnknownCore(word, "ingglish");
596
+ switch (strategy) {
597
+ case "british": {
598
+ const m = matchBritish(word);
599
+ return { americanSpelling: m.american, phonemes: m.phonemes, strategy: "british" };
600
+ }
601
+ case "compound": {
602
+ return { parts: dpDecompose(word.toLowerCase()), strategy: "compound" };
603
+ }
604
+ case "custom": {
605
+ return { phonemes: (0, import_dictionary4.getCustomPronunciation)(word), strategy: "custom" };
606
+ }
607
+ case "g2p": {
608
+ return { strategy: "g2p", trace: (0, import_g2p.wordToArpabetTraced)(word) };
609
+ }
610
+ case "initialism": {
611
+ return { strategy: "initialism" };
612
+ }
613
+ case "stemming": {
614
+ const m = matchStemming(word);
615
+ return { prefix: m.prefix, stem: m.stem, strategy: "stemming", suffix: m.suffix };
616
+ }
617
+ }
618
+ }
619
+ function translateUnknown(word, format = "ingglish") {
620
+ return translateUnknownCore(word, format).translated;
621
+ }
622
+ function translateUnknownCore(word, format) {
623
+ const customPhonemes = (0, import_dictionary4.getCustomPronunciation)(word);
624
+ if (customPhonemes !== void 0) {
625
+ return { strategy: "custom", translated: (0, import_phonemes5.arpabetToFormat)(customPhonemes, format) };
626
+ }
627
+ if (isInitialism(word)) {
628
+ return { strategy: "initialism", translated: translateAsAcronym(word, format) };
629
+ }
630
+ const britishResult = translateAsBritish(word, format);
631
+ if (britishResult !== null && britishResult.length > 0) {
632
+ return { strategy: "british", translated: britishResult };
633
+ }
634
+ const compoundResult = translateAsCompound(word, format);
635
+ if (compoundResult !== null && compoundResult.length > 0) {
636
+ return { strategy: "compound", translated: compoundResult };
637
+ }
638
+ const stemmedResult = translateWithStemming(word, format);
639
+ if (stemmedResult !== null && stemmedResult.length > 0) {
640
+ return { strategy: "stemming", translated: stemmedResult };
641
+ }
642
+ return { strategy: "g2p", translated: (0, import_g2p.wordToPhonetic)(word, format) };
643
+ }
644
+ // Annotate the CommonJS export names for ESM import in node:
645
+ 0 && (module.exports = {
646
+ KNOWN_INITIALISMS,
647
+ LETTER_PHONEMES,
648
+ diagnoseUnknown,
649
+ isInitialism,
650
+ matchBritish,
651
+ parseInitialismWithSuffix,
652
+ translateAsAcronym,
653
+ translateUnknown
654
+ });