npm - @ingglish/fallback - Versions diffs - 0.1.0 - Mend

@ingglish/fallback 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js ADDED Viewed

@@ -0,0 +1,626 @@
+// src/index.ts
+import { getCustomPronunciation } from "@ingglish/dictionary";
+import { wordToArpabetTraced, wordToPhonetic } from "@ingglish/g2p";
+import { arpabetToFormat as arpabetToFormat5 } from "@ingglish/phonemes";
+// src/acronyms.ts
+import { arpabetToFormat } from "@ingglish/phonemes";
+var INITIALISM_EXPANSIONS = {
+  ac: ["alternating", "current"],
+  ad: ["anno", "domini"],
+  afk: ["away", "from", "keyboard"],
+  // AI / ML
+  ai: ["artificial", "intelligence"],
+  aka: ["also", "known", "as"],
+  am: ["ante", "meridiem"],
+  api: ["application", "programming", "interface"],
+  asap: ["as", "soon", "as", "possible"],
+  atm: ["automated", "teller", "machine"],
+  aws: ["amazon", "web", "services"],
+  b2b: ["business", "to", "business"],
+  b2c: ["business", "to", "consumer"],
+  bc: ["before", "christ"],
+  bdd: ["behavior", "driven", "development"],
+  brb: ["be", "right", "back"],
+  btw: ["by", "the", "way"],
+  byob: ["bring", "your", "own", "bottle"],
+  cd: ["continuous", "deployment"],
+  cdn: ["content", "delivery", "network"],
+  // Business / titles
+  ceo: ["chief", "executive", "officer"],
+  cfo: ["chief", "financial", "officer"],
+  ci: ["continuous", "integration"],
+  cia: ["central", "intelligence", "agency"],
+  cio: ["chief", "information", "officer"],
+  cli: ["command", "line", "interface"],
+  cmo: ["chief", "marketing", "officer"],
+  cms: ["content", "management", "system"],
+  cnn: ["convolutional", "neural", "network"],
+  coo: ["chief", "operating", "officer"],
+  cors: ["cross", "origin", "resource", "sharing"],
+  cpr: ["cardiopulmonary", "resuscitation"],
+  cpu: ["central", "processing", "unit"],
+  crm: ["customer", "relationship", "management"],
+  crud: ["create", "read", "update", "delete"],
+  cso: ["chief", "security", "officer"],
+  css: ["cascading", "style", "sheets"],
+  csv: ["comma", "separated", "values"],
+  cto: ["chief", "technology", "officer"],
+  dc: ["direct", "current"],
+  ddos: ["distributed", "denial", "of", "service"],
+  dea: ["drug", "enforcement", "administration"],
+  diy: ["do", "it", "yourself"],
+  dj: ["disc", "jockey"],
+  dmv: ["department", "of", "motor", "vehicles"],
+  // Medical / science
+  dna: ["deoxyribonucleic", "acid"],
+  dns: ["domain", "name", "system"],
+  eod: ["end", "of", "day"],
+  epa: ["environmental", "protection", "agency"],
+  er: ["emergency", "room"],
+  erp: ["enterprise", "resource", "planning"],
+  eta: ["estimated", "time", "of", "arrival"],
+  etl: ["extract", "transform", "load"],
+  eu: ["european", "union"],
+  faq: ["frequently", "asked", "questions"],
+  // Government / organizations
+  fbi: ["federal", "bureau", "of", "investigation"],
+  fda: ["food", "and", "drug", "administration"],
+  ftp: ["file", "transfer", "protocol"],
+  fyi: ["for", "your", "information"],
+  gan: ["generative", "adversarial", "network"],
+  gcp: ["google", "cloud", "platform"],
+  gif: ["graphics", "interchange", "format"],
+  gps: ["global", "positioning", "system"],
+  gpt: ["generative", "pre-trained", "transformer"],
+  gpu: ["graphics", "processing", "unit"],
+  gui: ["graphical", "user", "interface"],
+  hdd: ["hard", "disk", "drive"],
+  hiv: ["human", "immunodeficiency", "virus"],
+  hr: ["human", "resources"],
+  html: ["hypertext", "markup", "language"],
+  http: ["hypertext", "transfer", "protocol"],
+  https: ["hypertext", "transfer", "protocol", "secure"],
+  iaas: ["infrastructure", "as", "a", "service"],
+  icu: ["intensive", "care", "unit"],
+  // General / common
+  id: ["identification"],
+  ide: ["integrated", "development", "environment"],
+  idk: ["i", "don't", "know"],
+  imo: ["in", "my", "opinion"],
+  io: ["input", "output"],
+  ip: ["internet", "protocol"],
+  iq: ["intelligence", "quotient"],
+  irs: ["internal", "revenue", "service"],
+  isp: ["internet", "service", "provider"],
+  it: ["information", "technology"],
+  jpeg: ["joint", "photographic", "experts", "group"],
+  // Media
+  jpg: ["joint", "photographic", "experts", "group"],
+  json: ["javascript", "object", "notation"],
+  jwt: ["json", "web", "token"],
+  kpi: ["key", "performance", "indicator"],
+  // Networking
+  lan: ["local", "area", "network"],
+  lcd: ["liquid", "crystal", "display"],
+  led: ["light", "emitting", "diode"],
+  llm: ["large", "language", "model"],
+  mc: ["master", "of", "ceremonies"],
+  // Security
+  mfa: ["multi", "factor", "authentication"],
+  ml: ["machine", "learning"],
+  mp3: ["moving", "picture", "experts", "group", "audio", "layer"],
+  mp4: ["moving", "picture", "experts", "group"],
+  mph: ["miles", "per", "hour"],
+  mri: ["magnetic", "resonance", "imaging"],
+  mvp: ["minimum", "viable", "product"],
+  nasa: ["national", "aeronautics", "space", "administration"],
+  // Acronyms pronounced as words (pass through unchanged like initialisms)
+  nato: ["north", "atlantic", "treaty", "organization"],
+  nda: ["non", "disclosure", "agreement"],
+  nic: ["network", "interface", "card"],
+  nlp: ["natural", "language", "processing"],
+  nosql: ["not", "only", "sql"],
+  nsa: ["national", "security", "agency"],
+  nsfw: ["not", "safe", "for", "work"],
+  nyc: ["new", "york", "city"],
+  omg: ["oh", "my", "god"],
+  ooo: ["out", "of", "office"],
+  // Development
+  oop: ["object", "oriented", "programming"],
+  orm: ["object", "relational", "mapping"],
+  os: ["operating", "system"],
+  otp: ["one", "time", "password"],
+  paas: ["platform", "as", "a", "service"],
+  pc: ["personal", "computer"],
+  pdf: ["portable", "document", "format"],
+  php: ["hypertext", "preprocessor"],
+  pm: ["post", "meridiem"],
+  png: ["portable", "network", "graphics"],
+  pov: ["point", "of", "view"],
+  pr: ["public", "relations"],
+  pto: ["paid", "time", "off"],
+  qa: ["quality", "assurance"],
+  rag: ["retrieval", "augmented", "generation"],
+  ram: ["random", "access", "memory"],
+  rfp: ["request", "for", "proposal"],
+  rip: ["rest", "in", "peace"],
+  rna: ["ribonucleic", "acid"],
+  rnn: ["recurrent", "neural", "network"],
+  roi: ["return", "on", "investment"],
+  rom: ["read", "only", "memory"],
+  rpm: ["revolutions", "per", "minute"],
+  rsvp: ["please", "respond"],
+  rv: ["recreational", "vehicle"],
+  // Cloud / services
+  saas: ["software", "as", "a", "service"],
+  sdk: ["software", "development", "kit"],
+  sftp: ["secure", "file", "transfer", "protocol"],
+  sla: ["service", "level", "agreement"],
+  // Database
+  sql: ["structured", "query", "language"],
+  ssd: ["solid", "state", "drive"],
+  ssh: ["secure", "shell"],
+  ssl: ["secure", "sockets", "layer"],
+  suv: ["sport", "utility", "vehicle"],
+  svg: ["scalable", "vector", "graphics"],
+  tba: ["to", "be", "announced"],
+  tbd: ["to", "be", "determined"],
+  tcp: ["transmission", "control", "protocol"],
+  tdd: ["test", "driven", "development"],
+  tldr: ["too", "long", "didn't", "read"],
+  tls: ["transport", "layer", "security"],
+  tv: ["television"],
+  uat: ["user", "acceptance", "testing"],
+  udp: ["user", "datagram", "protocol"],
+  // User interface / experience
+  ui: ["user", "interface"],
+  uk: ["united", "kingdom"],
+  un: ["united", "nations"],
+  uri: ["uniform", "resource", "identifier"],
+  // Tech / web
+  url: ["uniform", "resource", "locator"],
+  us: ["united", "states"],
+  usa: ["united", "states", "of", "america"],
+  // Hardware
+  usb: ["universal", "serial", "bus"],
+  uv: ["ultraviolet"],
+  ux: ["user", "experience"],
+  vp: ["vice", "president"],
+  vpn: ["virtual", "private", "network"],
+  wan: ["wide", "area", "network"],
+  xml: ["extensible", "markup", "language"],
+  xss: ["cross", "site", "scripting"]
+};
+var MAX_INITIALISM_LENGTH = 5;
+var LETTER_PHONEMES = {
+  a: ["EY1"],
+  b: ["B", "IY1"],
+  c: ["S", "IY1"],
+  d: ["D", "IY1"],
+  e: ["IY1"],
+  f: ["EH1", "F"],
+  g: ["JH", "IY1"],
+  h: ["EY1", "CH"],
+  i: ["AY1"],
+  j: ["JH", "EY1"],
+  k: ["K", "EY1"],
+  l: ["EH1", "L"],
+  m: ["EH1", "M"],
+  n: ["EH1", "N"],
+  o: ["OW1"],
+  p: ["P", "IY1"],
+  q: ["K", "Y", "UW1"],
+  r: ["AA1", "R"],
+  s: ["EH1", "S"],
+  t: ["T", "IY1"],
+  u: ["Y", "UW1"],
+  v: ["V", "IY1"],
+  w: ["D", "AH1", "B", "AH0", "L", "Y", "UW0"],
+  x: ["EH1", "K", "S"],
+  y: ["W", "AY1"],
+  z: ["Z", "IY1"]
+};
+var KNOWN_INITIALISMS = new Set(Object.keys(INITIALISM_EXPANSIONS));
+function isInitialism(word) {
+  if (word.length > MAX_INITIALISM_LENGTH) {
+    return false;
+  }
+  return KNOWN_INITIALISMS.has(word.toLowerCase());
+}
+var INITIALISM_SUFFIXES = ["'s", "s"];
+function parseInitialismWithSuffix(word) {
+  for (const suffix of INITIALISM_SUFFIXES) {
+    if (word.length > suffix.length && word.endsWith(suffix)) {
+      const base = word.slice(0, -suffix.length);
+      if (isInitialism(base)) {
+        return { base, suffix };
+      }
+    }
+  }
+  return null;
+}
+function translateAsAcronym(word, format = "ingglish") {
+  const arpabet = [];
+  for (const char of word.toLowerCase()) {
+    const letterArpabet = LETTER_PHONEMES[char];
+    if (letterArpabet !== void 0) {
+      arpabet.push(...letterArpabet);
+    }
+  }
+  return arpabetToFormat(arpabet, format);
+}
+// src/british.ts
+import { lookupPronunciation } from "@ingglish/dictionary";
+import { arpabetToFormat as arpabetToFormat2 } from "@ingglish/phonemes";
+var BRITISH_TO_AMERICAN = [
+  // -isation → -ization (must come before -ise)
+  { pattern: /isation$/, replacement: "ization" },
+  // -ise → -ize (realise→realize, organise→organize)
+  { pattern: /ise$/, replacement: "ize" },
+  // -our → -or (colour→color, favour→favor)
+  { pattern: /our$/, replacement: "or" },
+  // -oured → -ored (coloured→colored, favoured→favored)
+  { pattern: /oured$/, replacement: "ored" },
+  // -ouring → -oring (colouring→coloring)
+  { pattern: /ouring$/, replacement: "oring" },
+  // -ourable → -orable (favourable→favorable)
+  { pattern: /ourable$/, replacement: "orable" },
+  // -re → -er (centre→center, theatre→theater)
+  // Only after consonants to avoid matching normal -re words
+  { pattern: /([a-z])re$/, replacement: "$1er" },
+  // -lled → -led (travelled→traveled, cancelled→canceled)
+  { pattern: /lled$/, replacement: "led" },
+  // -lling → -ling (travelling→traveling, cancelling→canceling)
+  { pattern: /lling$/, replacement: "ling" },
+  // -ller → -ler (traveller→traveler)
+  { pattern: /ller$/, replacement: "ler" },
+  // -ence → -ense (defence→defense, offence→offense)
+  { pattern: /ence$/, replacement: "ense" },
+  // -ogue → -og (catalogue→catalog, dialogue→dialog)
+  { pattern: /ogue$/, replacement: "og" },
+  // -ae- → -e- (anaesthetic→anesthetic, paediatric→pediatric)
+  { pattern: /ae/, replacement: "e" },
+  // -oe- → -e- (foetus→fetus, oestrogen→estrogen)
+  { pattern: /oe/, replacement: "e" },
+  // -ey → -y (curtsey→curtsy)
+  { pattern: /ey$/, replacement: "y" },
+  // grey → gray
+  { pattern: /grey/, replacement: "gray" }
+];
+function matchBritish(word) {
+  const lower = word.toLowerCase();
+  for (const { pattern, replacement } of BRITISH_TO_AMERICAN) {
+    if (pattern.test(lower)) {
+      const american = lower.replace(pattern, replacement);
+      if (american !== lower) {
+        const phonemes = lookupPronunciation(american);
+        if (phonemes) {
+          return { american, phonemes };
+        }
+      }
+    }
+  }
+  return null;
+}
+function translateAsBritish(word, format = "ingglish") {
+  const match = matchBritish(word);
+  if (match === null) {
+    return null;
+  }
+  return arpabetToFormat2(match.phonemes, format);
+}
+// src/compounds.ts
+import { lookupPronunciation as lookupPronunciation2, getWordFrequency } from "@ingglish/dictionary";
+import {
+  arpabetToFormat as arpabetToFormat3,
+  getFormatJoinSeparator,
+  getFormatPreservesCase
+} from "@ingglish/phonemes";
+function capitalize(str) {
+  if (str.length === 0) {
+    return str;
+  }
+  return str.charAt(0).toUpperCase() + str.slice(1);
+}
+function isUpperCase(char) {
+  return char === char.toUpperCase() && char !== char.toLowerCase();
+}
+var MIN_PART_LENGTH = 3;
+var MIN_PART_FREQUENCY = 500;
+var MAX_PART_LENGTH = 15;
+function dpDecompose(word) {
+  const n = word.length;
+  const dp = Array.from({
+    length: n + 1
+  });
+  dp[0] = { parts: [], score: 0 };
+  for (let i = MIN_PART_LENGTH; i <= n; i++) {
+    for (let j = Math.max(0, i - MAX_PART_LENGTH); j <= i - MIN_PART_LENGTH; j++) {
+      if (j === 0 && i === n) {
+        continue;
+      }
+      const prev = dp[j];
+      if (prev === void 0) {
+        continue;
+      }
+      const chunk = word.slice(j, i);
+      const phonemes = lookupWord(chunk);
+      if (!phonemes) {
+        continue;
+      }
+      const freq = getWordFrequency(chunk);
+      if (freq === void 0 || freq < MIN_PART_FREQUENCY) {
+        continue;
+      }
+      const newScore = prev.score + freq;
+      const newParts = prev.parts.length + 1;
+      const current = dp[i];
+      if (current === void 0 || newParts < current.parts.length || newParts === current.parts.length && newScore > current.score) {
+        dp[i] = { parts: [...prev.parts, chunk], score: newScore };
+      }
+    }
+  }
+  const result = dp[n];
+  if (result === void 0 || result.parts.length < 2) {
+    return null;
+  }
+  return result.parts;
+}
+function translateAsCompound(word, format = "ingglish") {
+  const lowerWord = word.toLowerCase();
+  if (lowerWord.length < 6) {
+    return null;
+  }
+  const parts = dpDecompose(lowerWord);
+  if (!parts) {
+    return null;
+  }
+  const translations = [];
+  let pos = 0;
+  for (const part of parts) {
+    const phonemes = lookupWord(part);
+    if (!phonemes) {
+      return null;
+    }
+    let translated = arpabetToFormat3(phonemes, format);
+    if (getFormatPreservesCase(format)) {
+      const originalPart = word.slice(pos, pos + part.length);
+      if (originalPart.length > 0 && isUpperCase(originalPart[0])) {
+        translated = capitalize(translated);
+      }
+    }
+    translations.push(translated);
+    pos += part.length;
+  }
+  return translations.join(getFormatJoinSeparator(format));
+}
+function lookupWord(word) {
+  return lookupPronunciation2(word);
+}
+// src/stemming.ts
+import { lookupPronunciation as lookupPronunciation3 } from "@ingglish/dictionary";
+import { arpabetToFormat as arpabetToFormat4, stripStress } from "@ingglish/phonemes";
+var VOICELESS = /* @__PURE__ */ new Set(["CH", "F", "HH", "K", "P", "S", "SH", "T", "TH"]);
+var SIBILANTS = /* @__PURE__ */ new Set(["CH", "JH", "S", "SH", "Z", "ZH"]);
+function selectEdPhonemes(lastPhoneme) {
+  const base = stripStress(lastPhoneme);
+  if (base === "T" || base === "D") {
+    return ["IH0", "D"];
+  }
+  if (VOICELESS.has(base)) {
+    return ["T"];
+  }
+  return ["D"];
+}
+function selectSPhonemes(lastPhoneme) {
+  const base = stripStress(lastPhoneme);
+  if (SIBILANTS.has(base)) {
+    return ["IH0", "Z"];
+  }
+  if (VOICELESS.has(base)) {
+    return ["S"];
+  }
+  return ["Z"];
+}
+var INFLECTIONAL_SUFFIXES = /* @__PURE__ */ new Set([
+  "ed",
+  "er",
+  "es",
+  "est",
+  "ification",
+  "ify",
+  "ifying",
+  "ing",
+  "or",
+  "s"
+]);
+var SUFFIX_PHONEMES = [
+  // Long suffixes first (must come before shorter matches: -ification before -tion, -ifying before -ing)
+  { phonemes: ["IH0", "F", "IH0", "K", "EY1", "SH", "AH0", "N"], suffix: "ification" },
+  { phonemes: ["IH0", "F", "AY1", "IH0", "NG"], suffix: "ifying" },
+  { phonemes: ["IH0", "F", "AY1"], suffix: "ify" },
+  // Verb suffixes
+  { phonemes: ["IH0", "NG"], suffix: "ing" },
+  { phonemes: null, suffix: "ed" },
+  // allomorph: T/D/IH0 D (selected dynamically)
+  { phonemes: null, suffix: "es" },
+  // allomorph: same as -s (S/Z/IH0 Z based on stem)
+  { phonemes: null, suffix: "s" },
+  // allomorph: S/Z/IH0 Z (selected dynamically)
+  // Noun suffixes
+  { phonemes: ["SH", "AH0", "N"], suffix: "tion" },
+  { phonemes: ["ZH", "AH0", "N"], suffix: "sion" },
+  { phonemes: ["N", "AH0", "S"], suffix: "ness" },
+  { phonemes: ["M", "AH0", "N", "T"], suffix: "ment" },
+  { phonemes: ["IH0", "T", "IY0"], suffix: "ity" },
+  { phonemes: ["ER0"], suffix: "er" },
+  { phonemes: ["ER0"], suffix: "or" },
+  { phonemes: ["IH0", "S", "T"], suffix: "ist" },
+  { phonemes: ["IH0", "Z", "AH0", "M"], suffix: "ism" },
+  // Adjective suffixes
+  { phonemes: ["L", "IY0"], suffix: "ly" },
+  { phonemes: ["F", "AH0", "L"], suffix: "ful" },
+  { phonemes: ["L", "AH0", "S"], suffix: "less" },
+  { phonemes: ["AH0", "B", "AH0", "L"], suffix: "able" },
+  { phonemes: ["AH0", "B", "AH0", "L"], suffix: "ible" },
+  { phonemes: ["AH0", "S"], suffix: "ous" },
+  { phonemes: ["IH0", "V"], suffix: "ive" },
+  { phonemes: ["AH0", "L"], suffix: "al" },
+  { phonemes: ["IH0", "K"], suffix: "ic" },
+  // Comparative/superlative
+  { phonemes: ["AH0", "S", "T"], suffix: "est" },
+  // Additional suffixes
+  { phonemes: ["AH0", "L", "IY0"], suffix: "ally" },
+  { phonemes: ["AA1", "L", "AH0", "JH", "IY0"], suffix: "ology" },
+  { phonemes: ["AY1", "Z"], suffix: "ize" },
+  { phonemes: ["AY1", "Z"], suffix: "ise" }
+];
+var PREFIX_PHONEMES = [
+  { phonemes: ["AH0", "N"], prefix: "un" },
+  { phonemes: ["R", "IY0"], prefix: "re" },
+  { phonemes: ["P", "R", "IY0"], prefix: "pre" },
+  { phonemes: ["D", "IH0", "S"], prefix: "dis" },
+  { phonemes: ["M", "IH0", "S"], prefix: "mis" },
+  { phonemes: ["OW1", "V", "ER0"], prefix: "over" },
+  { phonemes: ["AH1", "N", "D", "ER0"], prefix: "under" },
+  { phonemes: ["AW1", "T"], prefix: "out" },
+  { phonemes: ["AE1", "N", "T", "IY0"], prefix: "anti" },
+  { phonemes: ["S", "UW1", "P", "ER0"], prefix: "super" }
+];
+function matchStemming(word) {
+  const lowerWord = word.toLowerCase();
+  for (const { phonemes: suffixArpabet, suffix } of SUFFIX_PHONEMES) {
+    if (lowerWord.endsWith(suffix) && lowerWord.length > suffix.length + 2) {
+      const stem = lowerWord.slice(0, -suffix.length);
+      for (const variant of getStemVariants(stem, suffix)) {
+        const baseArpabet = lookupPronunciation3(variant);
+        if (baseArpabet) {
+          const resolvedSuffix = resolveSuffixPhonemes(suffix, suffixArpabet, baseArpabet);
+          return {
+            phonemes: [...baseArpabet, ...resolvedSuffix],
+            stem: variant,
+            suffix
+          };
+        }
+      }
+    }
+  }
+  for (const { phonemes: prefixArpabet, prefix } of PREFIX_PHONEMES) {
+    if (lowerWord.startsWith(prefix) && lowerWord.length > prefix.length + 2) {
+      const stem = lowerWord.slice(prefix.length);
+      const baseArpabet = lookupPronunciation3(stem);
+      if (baseArpabet) {
+        return {
+          phonemes: [...prefixArpabet, ...baseArpabet],
+          prefix,
+          stem
+        };
+      }
+    }
+  }
+  return null;
+}
+function translateWithStemming(word, format = "ingglish") {
+  const match = matchStemming(word);
+  if (match === null) {
+    return null;
+  }
+  return arpabetToFormat4(match.phonemes, format);
+}
+function getStemVariants(stem, suffix) {
+  const variants = [stem];
+  if (INFLECTIONAL_SUFFIXES.has(suffix)) {
+    variants.push(
+      stem + "e",
+      // hoping -> hope
+      stem.length > 1 ? stem.slice(0, -1) : stem,
+      // running -> run (double consonant)
+      stem.length > 0 ? stem + stem.at(-1) : stem
+      // big -> bigg (for adding -er)
+    );
+  }
+  if (stem.endsWith("i")) {
+    variants.push(stem.slice(0, -1) + "y");
+  }
+  variants.push(stem + "y");
+  return variants;
+}
+function resolveSuffixPhonemes(suffix, suffixArpabet, baseArpabet) {
+  if (suffixArpabet !== null) {
+    return suffixArpabet;
+  }
+  const lastPhoneme = baseArpabet.at(-1);
+  if (suffix === "ed") {
+    return selectEdPhonemes(lastPhoneme);
+  }
+  return selectSPhonemes(lastPhoneme);
+}
+// src/index.ts
+function diagnoseUnknown(word) {
+  if (/(.)\1\1/.test(word) || !/[aeiouy]/i.test(word)) {
+    return null;
+  }
+  const { strategy } = translateUnknownCore(word, "ingglish");
+  switch (strategy) {
+    case "british": {
+      const m = matchBritish(word);
+      return { americanSpelling: m.american, phonemes: m.phonemes, strategy: "british" };
+    }
+    case "compound": {
+      return { parts: dpDecompose(word.toLowerCase()), strategy: "compound" };
+    }
+    case "custom": {
+      return { phonemes: getCustomPronunciation(word), strategy: "custom" };
+    }
+    case "g2p": {
+      return { strategy: "g2p", trace: wordToArpabetTraced(word) };
+    }
+    case "initialism": {
+      return { strategy: "initialism" };
+    }
+    case "stemming": {
+      const m = matchStemming(word);
+      return { prefix: m.prefix, stem: m.stem, strategy: "stemming", suffix: m.suffix };
+    }
+  }
+}
+function translateUnknown(word, format = "ingglish") {
+  return translateUnknownCore(word, format).translated;
+}
+function translateUnknownCore(word, format) {
+  const customPhonemes = getCustomPronunciation(word);
+  if (customPhonemes !== void 0) {
+    return { strategy: "custom", translated: arpabetToFormat5(customPhonemes, format) };
+  }
+  if (isInitialism(word)) {
+    return { strategy: "initialism", translated: translateAsAcronym(word, format) };
+  }
+  const britishResult = translateAsBritish(word, format);
+  if (britishResult !== null && britishResult.length > 0) {
+    return { strategy: "british", translated: britishResult };
+  }
+  const compoundResult = translateAsCompound(word, format);
+  if (compoundResult !== null && compoundResult.length > 0) {
+    return { strategy: "compound", translated: compoundResult };
+  }
+  const stemmedResult = translateWithStemming(word, format);
+  if (stemmedResult !== null && stemmedResult.length > 0) {
+    return { strategy: "stemming", translated: stemmedResult };
+  }
+  return { strategy: "g2p", translated: wordToPhonetic(word, format) };
+}
+export {
+  KNOWN_INITIALISMS,
+  LETTER_PHONEMES,
+  diagnoseUnknown,
+  isInitialism,
+  matchBritish,
+  parseInitialismWithSuffix,
+  translateAsAcronym,
+  translateUnknown
+};

package/package.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+  "name": "@ingglish/fallback",
+  "version": "0.1.0",
+  "description": "Unknown word translation strategies for Ingglish (G2P, stemming, compounds, etc.)",
+  "type": "module",
+  "main": "./dist/index.js",
+  "module": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "source": "./src/index.ts",
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      },
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      }
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "sideEffects": false,
+  "engines": {
+    "node": ">=16"
+  },
+  "scripts": {
+    "build": "tsup",
+    "build:fast": "tsup src/index.ts --format esm",
+    "lint": "eslint --cache src",
+    "test": "vitest run --no-color",
+    "bench": "vitest bench --no-color",
+    "prepublishOnly": "npm run build"
+  },
+  "dependencies": {
+    "@ingglish/phonemes": "^0.1.0",
+    "@ingglish/ipa": "^0.1.0",
+    "@ingglish/dictionary": "^0.1.0",
+    "@ingglish/g2p": "^0.1.0"
+  },
+  "author": "Paul Tarjan",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ptarjan/ingglish.git",
+    "directory": "packages/fallback"
+  },
+  "homepage": "https://github.com/ptarjan/ingglish#readme",
+  "bugs": {
+    "url": "https://github.com/ptarjan/ingglish/issues"
+  }
+}