npm - @ingglish/phonemes - Versions diffs - 0.1.0 - Mend

@ingglish/phonemes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js ADDED Viewed

@@ -0,0 +1,649 @@
+// src/arpabet.ts
+var ARPABET_VOWELS = [
+  "AA",
+  // father, hot
+  "AE",
+  // cat, bat
+  "AH",
+  // but, cup (stressed) / schwa (unstressed)
+  "AO",
+  // thought, law
+  "AW",
+  // cow, how
+  "AY",
+  // my, time
+  "EH",
+  // bed, red
+  "ER",
+  // bird, her
+  "EY",
+  // say, day
+  "IH",
+  // bit, sit
+  "IY",
+  // bee, see
+  "OW",
+  // go, show
+  "OY",
+  // boy, toy
+  "UH",
+  // book, put
+  "UW"
+  // too, blue
+];
+var ARPABET_CONSONANTS = [
+  // Stops
+  "B",
+  "D",
+  "G",
+  "K",
+  "P",
+  "T",
+  // Fricatives
+  "DH",
+  // the, this
+  "F",
+  "HH",
+  // hat
+  "S",
+  "SH",
+  // ship
+  "TH",
+  // think
+  "V",
+  "Z",
+  "ZH",
+  // measure
+  // Affricates
+  "CH",
+  // chat
+  "JH",
+  // just
+  // Nasals
+  "M",
+  "N",
+  "NG",
+  // sing
+  // Liquids
+  "L",
+  "R",
+  // Glides
+  "W",
+  "Y"
+];
+var VOWELS_SET = new Set(ARPABET_VOWELS);
+var CONSONANTS_SET = new Set(ARPABET_CONSONANTS);
+var STRESS_MARKER_REGEX = /[012]$/;
+function getStress(phoneme) {
+  const lastChar = phoneme.codePointAt(phoneme.length - 1);
+  if (lastChar >= 48 && lastChar <= 50) {
+    return lastChar - 48;
+  }
+  return null;
+}
+function isVowel(phoneme) {
+  const base = stripStress(phoneme);
+  return VOWELS_SET.has(base);
+}
+function stripStress(phoneme) {
+  const lastChar = phoneme.codePointAt(phoneme.length - 1);
+  if (lastChar >= 48 && lastChar <= 50) {
+    return phoneme.slice(0, -1);
+  }
+  return phoneme;
+}
+// src/phonotactics.ts
+var VALID_ONSETS = /* @__PURE__ */ new Set([
+  // Single consonants (all are valid onsets except NG)
+  "B",
+  // Two-consonant clusters: consonant + liquid (L, R)
+  "B L",
+  "B R",
+  // Two-consonant clusters: consonant + glide (W, Y)
+  "B Y",
+  "CH",
+  "D",
+  "DH",
+  "D R",
+  "D W",
+  "D Y",
+  "F",
+  "F L",
+  "F R",
+  "F Y",
+  "G",
+  "G L",
+  "G R",
+  "G W",
+  "G Y",
+  "HH",
+  "HH W",
+  "HH Y",
+  "JH",
+  "K",
+  "K L",
+  "K R",
+  "K W",
+  "K Y",
+  "L",
+  "L Y",
+  "M",
+  "M Y",
+  "N",
+  "N Y",
+  "P",
+  "P L",
+  "P R",
+  "P Y",
+  "R",
+  "S",
+  "SH",
+  "SH R",
+  // Two-consonant clusters: s + consonant
+  "S K",
+  // Three-consonant clusters: s + stop + liquid/glide
+  "S K R",
+  "S K W",
+  "S K Y",
+  "S L",
+  "S M",
+  "S N",
+  "S P",
+  "S P L",
+  "S P R",
+  "S P Y",
+  "S T",
+  "S T R",
+  "S T Y",
+  "S W",
+  "S Y",
+  "T",
+  "TH",
+  "TH R",
+  "TH W",
+  "T R",
+  "T W",
+  "T Y",
+  "V",
+  "V Y",
+  "W",
+  "Y",
+  "Z",
+  "ZH"
+]);
+function findOnsetStart(consonants) {
+  if (consonants.length === 0) {
+    return 0;
+  }
+  for (let start = 0; start < consonants.length; start++) {
+    const candidate = consonants.slice(start);
+    if (isValidOnset(candidate)) {
+      return start;
+    }
+  }
+  return consonants.length - 1;
+}
+function isValidOnset(consonants) {
+  if (consonants.length === 0) {
+    return true;
+  }
+  const key = consonants.join(" ");
+  return VALID_ONSETS.has(key);
+}
+// src/format-registry.ts
+var registry = /* @__PURE__ */ new Map();
+var isLatinScriptCache = /* @__PURE__ */ new Map();
+var preservesCaseCache = /* @__PURE__ */ new Map();
+function getFormatHandler(name) {
+  return registry.get(name);
+}
+function getFormatIsLatinScript(name) {
+  return isLatinScriptCache.get(name) ?? true;
+}
+function getFormatJoinSeparator(name) {
+  return registry.get(name)?.joinSeparator ?? "";
+}
+function getFormatLabel(name) {
+  return registry.get(name)?.label ?? name;
+}
+function getFormatNativeLabel(name) {
+  const handler = registry.get(name);
+  return handler?.nativeLabel ?? handler?.label ?? name;
+}
+function getFormatPreservesCase(name) {
+  return preservesCaseCache.get(name) ?? true;
+}
+function registerFormat(name, handler) {
+  const existing = registry.get(name);
+  const merged = { ...existing, ...handler };
+  registry.set(name, merged);
+  isLatinScriptCache.set(name, merged.isLatinScript ?? true);
+  preservesCaseCache.set(name, merged.preservesCase ?? merged.isLatinScript ?? true);
+}
+// src/ingglish-maps.ts
+var INGGLISH_VOWEL_MAP = {
+  // Monophthongs
+  AA: "o",
+  // father, hot, rock (but AA+R → 'ar' in star, car)
+  AE: "a",
+  // cat, bat, had (but AE+R → 'arr' in arrow, barrow)
+  AH: "uh",
+  // but, cup, son (stressed /ʌ/; unstressed /ə/ AH0 → 'a' in conversion)
+  AO: "aw",
+  // thought, caught, law (but AO+R → 'or' in store, more)
+  // Diphthongs
+  AW: "ou",
+  // cow, how, out
+  AY: "ai",
+  // my, eye, time
+  EH: "e",
+  // bed, red, said (but EH+R → 'air' in air, care, there)
+  ER: "er",
+  // bird, her, nurse
+  EY: "ay",
+  // say, day, make
+  IH: "i",
+  // bit, sit, gym
+  IY: "ee",
+  // bee, see, machine
+  OW: "oh",
+  // go, show, coat
+  OY: "oi",
+  // boy, toy, coin
+  UH: "u",
+  // book, put, could
+  UW: "oo"
+  // too, blue, food
+};
+var INGGLISH_CONSONANT_MAP = {
+  // Stops (plosives)
+  B: "b",
+  // bat, cab
+  // Affricates
+  CH: "ch",
+  // chat, batch
+  D: "d",
+  // dog, bed
+  // Fricatives
+  DH: "dh",
+  // the, this (voiced) - distinguishes from TH
+  F: "f",
+  // fat, laugh
+  G: "g",
+  // go, big
+  // Aspirate
+  HH: "h",
+  // hat, ahead
+  JH: "j",
+  // just, edge
+  K: "k",
+  // cat, back
+  // Liquids
+  L: "l",
+  // let, well
+  // Nasals
+  M: "m",
+  // man, come
+  N: "n",
+  // no, pen
+  NG: "ng",
+  // sing, thing
+  P: "p",
+  // pat, cup
+  R: "r",
+  // run, car
+  S: "s",
+  // sat, miss
+  SH: "sh",
+  // she, push
+  T: "t",
+  // top, cat
+  TH: "th",
+  // think, bath (voiceless)
+  V: "v",
+  // van, love
+  // Semivowels (glides)
+  W: "w",
+  // wet, away
+  Y: "y",
+  // yes, you
+  Z: "z",
+  // zoo, is
+  ZH: "zh"
+  // measure, beige
+};
+var ARPABET_TO_INGGLISH_MAP = {
+  ...INGGLISH_VOWEL_MAP,
+  ...INGGLISH_CONSONANT_MAP
+};
+var INGGLISH_TO_ARPABET_MAP = Object.fromEntries(
+  Object.entries(ARPABET_TO_INGGLISH_MAP).map(([arpabet, ingglish]) => [ingglish, arpabet])
+);
+var R_COLORED_VOWELS = [
+  { arpabet: "AA", prefix: "a" },
+  // star, car, far → 'ar'
+  { arpabet: "AO", prefix: "o" },
+  // store, more, for → 'or'
+  { arpabet: "EH", prefix: "ai" },
+  // air, care, there → 'air'
+  { arpabet: "AE", prefix: "ar" },
+  // arrow, barrow, carrot → 'arr'
+  { arpabet: "IH", prefix: "ee" },
+  // beer, beard, fear → 'eer'
+  { arpabet: "UH", prefix: "u" },
+  // tour, cure, pure → 'ur' (CURE vowel, experimentable)
+  { arpabet: "AH", prefix: "uh" }
+  // curry, burroughs → 'uhr' (AH=uh, prevents AH0+R collision with 'ar')
+];
+var R_COLORED_FORWARD = new Map(
+  R_COLORED_VOWELS.map(({ arpabet, prefix }) => [arpabet, prefix])
+);
+var R_COLORED_REVERSE_3CHAR = Object.fromEntries(
+  R_COLORED_VOWELS.filter(({ prefix }) => prefix.length === 2).map(({ arpabet, prefix }) => [
+    prefix + "r",
+    [arpabet, "R"]
+  ])
+);
+var R_COLORED_REVERSE_2CHAR = Object.fromEntries(
+  R_COLORED_VOWELS.filter(({ prefix }) => prefix.length === 1).map(({ arpabet, prefix }) => [
+    prefix + "r",
+    [arpabet, "R"]
+  ])
+);
+// src/to-ingglish.ts
+function arpabetPhonemeToIngglish(phoneme) {
+  if (phoneme === "AH0") {
+    return "a";
+  }
+  const base = stripStress(phoneme);
+  return ARPABET_TO_INGGLISH_MAP[base] ?? phoneme.toLowerCase();
+}
+function convertArpabet(arpabet, phonemeMap, rColoredMap, stressOverrides) {
+  let result = "";
+  const len = arpabet.length;
+  for (let i = 0; i < len; i++) {
+    const phoneme = arpabet[i];
+    const base = stripStress(phoneme);
+    if (i + 1 < len && arpabet[i + 1] === "R") {
+      const rPrefix = rColoredMap.get(base);
+      if (rPrefix !== void 0) {
+        result += rPrefix;
+        continue;
+      }
+    }
+    const stressOverride = stressOverrides.get(phoneme);
+    if (stressOverride !== void 0) {
+      result += stressOverride;
+      continue;
+    }
+    result += phonemeMap[base] ?? phoneme.toLowerCase();
+  }
+  return result;
+}
+var INGGLISH_FULL_MAP = {};
+for (const [base, spelling] of Object.entries(ARPABET_TO_INGGLISH_MAP)) {
+  INGGLISH_FULL_MAP[base] = spelling;
+  INGGLISH_FULL_MAP[base + "0"] = spelling;
+  INGGLISH_FULL_MAP[base + "1"] = spelling;
+  INGGLISH_FULL_MAP[base + "2"] = spelling;
+}
+INGGLISH_FULL_MAP.AH0 = "a";
+function arpabetToIngglish(arpabet) {
+  let result = "";
+  const len = arpabet.length;
+  for (let i = 0; i < len; i++) {
+    const phoneme = arpabet[i];
+    if (i + 1 < len && arpabet[i + 1] === "R") {
+      const base = stripStress(phoneme);
+      const rPrefix = R_COLORED_FORWARD.get(base);
+      if (rPrefix !== void 0) {
+        result += rPrefix;
+        continue;
+      }
+    }
+    result += INGGLISH_FULL_MAP[phoneme] ?? phoneme.toLowerCase();
+  }
+  return result;
+}
+registerFormat("ingglish", {
+  forward: arpabetToIngglish,
+  isLatinScript: true,
+  label: "Ingglish",
+  preservesCase: true
+});
+var EMPTY_R_COLORED = /* @__PURE__ */ new Map();
+var INGGLISH_STRESS_OVERRIDES = /* @__PURE__ */ new Map([["AH0", "a"]]);
+function arpabetToFormat(arpabet, format = "ingglish", options) {
+  if (format === "ingglish") {
+    if (options?.disableRColoring === true) {
+      return convertArpabet(
+        arpabet,
+        ARPABET_TO_INGGLISH_MAP,
+        EMPTY_R_COLORED,
+        INGGLISH_STRESS_OVERRIDES
+      );
+    }
+    return arpabetToIngglish(arpabet);
+  }
+  const handler = getFormatHandler(format);
+  if (handler?.forward) {
+    return handler.forward(arpabet, options);
+  }
+  return arpabetToIngglish(arpabet);
+}
+// src/to-pronunciation.ts
+var GUIDE_MAP = {};
+for (const [base, spelling] of Object.entries(ARPABET_TO_INGGLISH_MAP)) {
+  GUIDE_MAP[base] = spelling;
+  GUIDE_MAP[base + "0"] = spelling;
+  GUIDE_MAP[base + "1"] = spelling;
+  GUIDE_MAP[base + "2"] = spelling;
+}
+GUIDE_MAP.AH0 = "a";
+function arpabetToPronunciation(arpabet) {
+  const syllables = syllabify(arpabet);
+  return syllables.map(({ phonemes, stress }) => {
+    const spelling = syllableToSpelling(phonemes);
+    return stress >= 1 ? spelling.toUpperCase() : spelling;
+  }).join("-");
+}
+function registerPronunciation() {
+  registerFormat("pronunciation", {
+    forward: arpabetToPronunciation,
+    isLatinScript: true,
+    label: "Guide",
+    preservesCase: false
+  });
+}
+function syllabify(arpabet) {
+  const vowelIndices = [];
+  for (const [i, phoneme] of arpabet.entries()) {
+    if (isVowel(phoneme)) {
+      vowelIndices.push(i);
+    }
+  }
+  if (vowelIndices.length === 0) {
+    return [{ phonemes: arpabet, stress: 0 }];
+  }
+  const syllables = [];
+  let start = 0;
+  for (let vi = 0; vi < vowelIndices.length; vi++) {
+    const vowelIdx = vowelIndices[vi];
+    const stress = getStress(arpabet[vowelIdx]) ?? 0;
+    if (vi < vowelIndices.length - 1) {
+      const nextVowelIdx = vowelIndices[vi + 1];
+      const consonantStart = vowelIdx + 1;
+      const consonants = [];
+      for (let j = consonantStart; j < nextVowelIdx; j++) {
+        consonants.push(stripStress(arpabet[j]));
+      }
+      if (consonants.length === 0) {
+        syllables.push({ phonemes: arpabet.slice(start, consonantStart), stress });
+        start = consonantStart;
+      } else {
+        const onsetIdx = findOnsetStart(consonants);
+        const boundary = consonantStart + onsetIdx;
+        syllables.push({ phonemes: arpabet.slice(start, boundary), stress });
+        start = boundary;
+      }
+    } else {
+      syllables.push({ phonemes: arpabet.slice(start), stress });
+    }
+  }
+  return syllables;
+}
+function syllableToSpelling(phonemes) {
+  let result = "";
+  for (let i = 0; i < phonemes.length; i++) {
+    const phoneme = phonemes[i];
+    if (i + 1 < phonemes.length && phonemes[i + 1] === "R") {
+      const base = stripStress(phoneme);
+      const rPrefix = R_COLORED_FORWARD.get(base);
+      if (rPrefix !== void 0) {
+        result += rPrefix;
+        continue;
+      }
+    }
+    result += GUIDE_MAP[phoneme] ?? phoneme.toLowerCase();
+  }
+  return result;
+}
+// src/from-ingglish.ts
+var ARPABET_ALTERNATIVES = {
+  AE: [["AH"]],
+  // "a" could be AE (cat) or AH (schwa: about, the)
+  ER: [["EH", "R"]],
+  SH: [["S", "HH"]]
+  // "sh" could be SH (ship) or S+HH (exhume)
+};
+var ARPABET_ALTERNATIVES_ENTRIES = Object.entries(ARPABET_ALTERNATIVES);
+function expandArpabetAlternatives(arpabet) {
+  const results = [arpabet];
+  for (let i = 0; i < arpabet.length; i++) {
+    const alternatives = ARPABET_ALTERNATIVES[arpabet[i]];
+    if (alternatives !== void 0) {
+      for (const alt of alternatives) {
+        const expanded = [...arpabet.slice(0, i), ...alt, ...arpabet.slice(i + 1)];
+        results.push(expanded);
+      }
+    }
+  }
+  for (const [phoneme, alts] of ARPABET_ALTERNATIVES_ENTRIES) {
+    for (const alt of alts) {
+      if (alt.length === 1) {
+        let count = 0;
+        for (const p of arpabet) {
+          if (p === phoneme) {
+            count++;
+          }
+        }
+        if (count >= 2) {
+          results.push(arpabet.map((p) => p === phoneme ? alt[0] : p));
+        }
+      }
+    }
+  }
+  return results;
+}
+var TWO_CHAR_SPELLINGS = new Set(
+  Object.keys(INGGLISH_TO_ARPABET_MAP).filter((s) => s.length === 2)
+);
+var ONE_CHAR_SPELLINGS = new Set(
+  Object.keys(INGGLISH_TO_ARPABET_MAP).filter((s) => s.length === 1)
+);
+function ingglishToArpabet(ingglish) {
+  const result = [];
+  const str = ingglish.toLowerCase();
+  const len = str.length;
+  let pos = 0;
+  while (pos < len) {
+    if (pos + 3 <= len) {
+      const threeChar = str.slice(pos, pos + 3);
+      if (threeChar in R_COLORED_REVERSE_3CHAR) {
+        result.push(...R_COLORED_REVERSE_3CHAR[threeChar]);
+        pos += 3;
+        continue;
+      }
+    }
+    if (pos + 2 <= len) {
+      const twoChar = str.slice(pos, pos + 2);
+      if (twoChar in R_COLORED_REVERSE_2CHAR) {
+        result.push(...R_COLORED_REVERSE_2CHAR[twoChar]);
+        pos += 2;
+        continue;
+      }
+      if (TWO_CHAR_SPELLINGS.has(twoChar)) {
+        result.push(INGGLISH_TO_ARPABET_MAP[twoChar]);
+        pos += 2;
+        continue;
+      }
+    }
+    const oneChar = str[pos];
+    if (ONE_CHAR_SPELLINGS.has(oneChar)) {
+      result.push(INGGLISH_TO_ARPABET_MAP[oneChar]);
+      pos += 1;
+      continue;
+    }
+    pos += 1;
+  }
+  return result.length > 0 ? result : null;
+}
+// src/custom-format.ts
+function createCustomConverter(config) {
+  const mergedMap = { ...ARPABET_TO_INGGLISH_MAP, ...config.phonemeMap };
+  const mergedRColored = new Map(R_COLORED_FORWARD);
+  for (const [vowel] of R_COLORED_FORWARD) {
+    if (vowel in config.phonemeMap && !(vowel in config.rColoredPrefixes)) {
+      mergedRColored.set(vowel, config.phonemeMap[vowel]);
+    }
+  }
+  for (const [key, value] of Object.entries(config.rColoredPrefixes)) {
+    mergedRColored.set(key, value);
+  }
+  const stressOverrides = /* @__PURE__ */ new Map([["AH0", "a"]]);
+  for (const [key, value] of Object.entries(config.phonemeMap)) {
+    const lastChar = key.codePointAt(key.length - 1);
+    if (lastChar >= 48 && lastChar <= 50) {
+      stressOverrides.set(key, value);
+    }
+  }
+  const emptyRColored = /* @__PURE__ */ new Map();
+  return (arpabet, options) => convertArpabet(
+    arpabet,
+    mergedMap,
+    options?.disableRColoring === true ? emptyRColored : mergedRColored,
+    stressOverrides
+  );
+}
+export {
+  ARPABET_CONSONANTS,
+  ARPABET_TO_INGGLISH_MAP,
+  ARPABET_VOWELS,
+  R_COLORED_FORWARD,
+  STRESS_MARKER_REGEX,
+  arpabetPhonemeToIngglish,
+  arpabetToFormat,
+  arpabetToIngglish,
+  createCustomConverter,
+  expandArpabetAlternatives,
+  findOnsetStart,
+  getFormatHandler,
+  getFormatIsLatinScript,
+  getFormatJoinSeparator,
+  getFormatLabel,
+  getFormatNativeLabel,
+  getFormatPreservesCase,
+  getStress,
+  ingglishToArpabet,
+  isVowel,
+  registerFormat,
+  registerPronunciation,
+  stripStress
+};

package/package.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+  "name": "@ingglish/phonemes",
+  "version": "0.1.0",
+  "description": "Phoneme definitions and ARPAbet/IPA/Ingglish conversion maps",
+  "type": "module",
+  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "source": "./src/index.ts",
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      },
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      }
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "sideEffects": false,
+  "engines": {
+    "node": ">=16"
+  },
+  "scripts": {
+    "build": "tsup",
+    "build:fast": "tsup src/index.ts --format esm",
+    "lint": "eslint --cache src",
+    "test": "vitest run --no-color",
+    "bench": "vitest bench --no-color",
+    "prepublishOnly": "npm run build"
+  },
+  "author": "Paul Tarjan",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ptarjan/ingglish.git",
+    "directory": "packages/phonemes"
+  },
+  "homepage": "https://github.com/ptarjan/ingglish#readme",
+  "bugs": {
+    "url": "https://github.com/ptarjan/ingglish/issues"
+  }
+}