npm - @ingglish/phonemes - Versions diffs - 0.1.0 - Mend

@ingglish/phonemes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,254 @@
+/**
+ * ARPAbet phoneme utilities and constants.
+ *
+ * ARPAbet is the phonetic notation system used by the CMU Pronouncing Dictionary.
+ * Each English phoneme is represented by a 1-3 letter code, with vowels having
+ * optional stress markers (0=unstressed, 1=primary, 2=secondary).
+ */
+/**
+ * All ARPAbet vowel phonemes (without stress markers).
+ */
+declare const ARPABET_VOWELS: readonly ["AA", "AE", "AH", "AO", "AW", "AY", "EH", "ER", "EY", "IH", "IY", "OW", "OY", "UH", "UW"];
+/**
+ * All ARPAbet consonant phonemes.
+ */
+declare const ARPABET_CONSONANTS: readonly ["B", "D", "G", "K", "P", "T", "DH", "F", "HH", "S", "SH", "TH", "V", "Z", "ZH", "CH", "JH", "M", "N", "NG", "L", "R", "W", "Y"];
+/** Regex pattern to match ARPAbet stress markers (0, 1, 2) at end of phoneme */
+declare const STRESS_MARKER_REGEX: RegExp;
+/**
+ * Extracts the stress level from a phoneme.
+ * Returns null for consonants or vowels without explicit stress.
+ * Uses charCode check instead of regex (consistent with stripStress).
+ *
+ * @example
+ * getStress('AH0') // 0
+ * getStress('EY1') // 1
+ * getStress('AO2') // 2
+ * getStress('B')   // null
+ */
+declare function getStress(phoneme: string): 0 | 1 | 2 | null;
+/**
+ * Checks if an ARPAbet phoneme is a vowel.
+ * Vowels can have stress markers (0, 1, 2).
+ *
+ * @example
+ * isVowel('AH0') // true
+ * isVowel('EY1') // true
+ * isVowel('B')   // false
+ */
+declare function isVowel(phoneme: string): boolean;
+/**
+ * Strips stress markers (0, 1, 2) from a phoneme.
+ * Uses charCode check instead of regex (benchmarked 2x faster).
+ *
+ * @example
+ * stripStress('AH0') // 'AH'
+ * stripStress('EY1') // 'EY'
+ * stripStress('B')   // 'B'
+ */
+declare function stripStress(phoneme: string): string;
+/**
+ * Finds the starting index of the maximal valid onset from a consonant cluster.
+ *
+ * Uses the Maximal Onset Principle: assign as many consonants as possible
+ * to the onset, as long as they form a legal onset cluster. The remaining
+ * consonants become the coda of the previous syllable.
+ *
+ * @param consonants Array of consonant phonemes between two vowels
+ * @returns Index within the consonant array where the valid onset begins
+ *
+ * @example
+ * // "extra" has consonants [K, S, T, R] between vowels
+ * // [S, T, R] is valid onset, so onset starts at index 1
+ * findOnsetStart(['K', 'S', 'T', 'R']) // 1
+ *
+ * // "instruct" has [N, S, T, R]
+ * // [S, T, R] is valid, so onset starts at index 1
+ * findOnsetStart(['N', 'S', 'T', 'R']) // 1
+ */
+declare function findOnsetStart(consonants: string[]): number;
+interface FormatHandler {
+    forward?: ForwardConverter;
+    isLatinScript?: boolean;
+    /** Compound word join separator. Default '' */
+    joinSeparator?: string;
+    /** Display name ('Ingglish', 'IPA', etc.) */
+    label?: string;
+    /** Display name in the format's own script (e.g. '𐑖𐑱𐑝𐑾𐑯' for Shavian) */
+    nativeLabel?: string;
+    /** Whether case is preserved (caps, sentence start). Defaults to isLatinScript. */
+    preservesCase?: boolean;
+    reverseText?: ReverseTextConverter;
+    reverseTextWithMapping?: ReverseTextWithMappingConverter;
+}
+type ForwardConverter = (arpabet: string[], options?: ForwardConverterOptions) => string;
+interface ForwardConverterOptions {
+    /** Disable English R-coloring rules (vowel+R fusion). Use for foreign text. */
+    disableRColoring?: boolean;
+}
+/** Token returned by reverse-with-mapping translation */
+interface ReverseToken {
+    isWord: boolean;
+    matched?: boolean;
+    original: string;
+    translated: string;
+}
+type ReverseTextConverter = (text: string) => string;
+type ReverseTextWithMappingConverter = (text: string) => ReverseToken[];
+declare function getFormatHandler(name: string): FormatHandler | undefined;
+/**
+ * Returns whether a format uses Latin script characters.
+ * Defaults to true for unknown formats (safe for case handling).
+ */
+declare function getFormatIsLatinScript(name: string): boolean;
+/**
+ * Returns the join separator for compound word parts.
+ * Defaults to '' (no separator).
+ */
+declare function getFormatJoinSeparator(name: string): string;
+/**
+ * Returns the display label for a format (e.g. 'Ingglish', 'IPA').
+ * Falls back to the raw format name.
+ */
+declare function getFormatLabel(name: string): string;
+/**
+ * Returns the native-script label for a format (e.g. '𐑖𐑱𐑝𐑾𐑯' for Shavian).
+ * Falls back to the standard label, then the raw format name.
+ */
+declare function getFormatNativeLabel(name: string): string;
+/**
+ * Returns whether a format preserves case (capitalization, sentence start).
+ * Falls back to isLatinScript, then true for unknown formats.
+ */
+declare function getFormatPreservesCase(name: string): boolean;
+declare function registerFormat(name: string, handler: FormatHandler): void;
+/**
+ * Output format for phoneme conversion.
+ */
+type OutputFormat = 'ingglish' | 'ipa' | (string & {});
+/**
+ * ARPAbet to Ingglish conversion.
+ *
+ * Converts CMU dictionary phoneme sequences to Ingglish spelling.
+ * Ingglish is a phonetic spelling system that uses only standard
+ * English letters with no ambiguity.
+ */
+/**
+ * Converts a single ARPAbet phoneme to Ingglish spelling.
+ *
+ * @param phoneme ARPAbet phoneme (e.g., "AH0", "EY1", "B")
+ * @returns Ingglish spelling (e.g., "a", "ay", "b")
+ */
+declare function arpabetPhonemeToIngglish(phoneme: string): string;
+/**
+ * Converts an array of ARPAbet phonemes to Ingglish spelling.
+ * Uses direct loop + string concat (benchmarked 60% faster than map+join).
+ *
+ * R-colored vowels: AA+R → 'ar', AO+R → 'or', IH+R → 'eer' (more intuitive than 'or'/'awr'/'ir')
+ *
+ * @param arpabet Array of ARPAbet symbols (e.g., ["HH", "AH0", "L", "OW1"])
+ * @returns Ingglish spelling (e.g., "haloh")
+ */
+declare function arpabetToIngglish(arpabet: string[]): string;
+interface FormatOptions {
+    /** Disable English R-coloring rules (vowel+R fusion). Use for foreign text. */
+    disableRColoring?: boolean;
+}
+/**
+ * Universal pipeline exit point: converts the ARPAbet IR to any output format.
+ *
+ * Every translation path (dictionary, fallback, G2P) produces an ARPAbet
+ * `string[]` and calls this function to get the final user-facing string.
+ *
+ * @param arpabet Array of ARPAbet symbols (the IR)
+ * @param format Output format (e.g. 'ingglish', 'ipa', 'shavian')
+ * @param options Conversion options (e.g. disable R-coloring for foreign text)
+ * @returns Formatted string
+ */
+declare function arpabetToFormat(arpabet: string[], format?: OutputFormat, options?: FormatOptions): string;
+declare function registerPronunciation(): void;
+/**
+ * Ingglish to ARPAbet conversion.
+ *
+ * Used to parse Ingglish spellings back to ARPAbet phonemes
+ * for reverse translation (Ingglish -> English).
+ */
+/**
+ * Generates alternative ARPAbet sequences for ambiguous spellings.
+ *
+ * For length-changing alternatives (ER→EH+R, SH→S+HH), generates
+ * single-position substitutions. For same-length alternatives (AE→AH),
+ * also generates an "all-replaced" variant to handle words with multiple
+ * ambiguous vowels (e.g., "difakalt" → D IH F AH K AH L T).
+ */
+declare function expandArpabetAlternatives(arpabet: string[]): string[][];
+/**
+ * Converts an Ingglish spelling to ARPAbet phonemes.
+ * Uses index-based parsing to avoid intermediate string allocations.
+ *
+ * @param ingglish - Ingglish string (e.g., "haloh" for "hello")
+ * @returns Array of ARPAbet phonemes (e.g., ["HH", "AH", "L", "OW"]), or null if empty
+ */
+declare function ingglishToArpabet(ingglish: string): null | string[];
+/**
+ * Combined ARPAbet to Ingglish map.
+ */
+declare const ARPABET_TO_INGGLISH_MAP: Record<string, string>;
+/**
+ * Forward lookup: ARPAbet vowel → Ingglish prefix (before 'r').
+ * Used by arpabetToIngglish() to handle VOWEL+R sequences.
+ */
+declare const R_COLORED_FORWARD: Map<string, string>;
+/**
+ * Custom format converter builder.
+ *
+ * Creates ARPAbet-to-spelling converters using user-defined phoneme mappings.
+ * Mirrors arpabetToIngglish() logic but uses merged custom mappings.
+ */
+/**
+ * Configuration for a custom phoneme-to-spelling mapping.
+ * Only diffs from the default Ingglish mapping need to be specified.
+ */
+interface CustomMappingConfig {
+    /** Overrides of ARPABET_TO_INGGLISH_MAP. Includes AH0 as a separate key. */
+    phonemeMap: Record<string, string>;
+    /** Overrides of R_COLORED_FORWARD prefixes (keyed by base vowel, e.g. 'AA'). */
+    rColoredPrefixes: Record<string, string>;
+}
+/**
+ * Creates a forward converter function from a custom mapping config.
+ *
+ * The converter checks phonemes in this order:
+ * 1. Exact phoneme with stress digit (e.g., AH0, EY0) in phonemeMap
+ * 2. Stress-stripped base (e.g., AH, EY) in phonemeMap
+ * 3. Default ARPABET_TO_INGGLISH_MAP
+ * 4. Lowercase phoneme as fallback
+ */
+declare function createCustomConverter(config: CustomMappingConfig): (arpabet: string[], options?: ForwardConverterOptions) => string;
+/**
+ * A single token from a translated text, preserving the mapping between
+ * original and translated forms. Used by both forward and reverse translation.
+ */
+interface TranslatedToken {
+    /** Whether this token is a word (true) or punctuation/whitespace (false). */
+    isWord: boolean;
+    /** Whether the word was found in the dictionary (false = heuristic fallback). */
+    matched: boolean;
+    /** The original text of this token (English for forward, Ingglish for reverse). */
+    original: string;
+    /** The translated text (Ingglish for forward, English for reverse). */
+    translated: string;
+}
+export { ARPABET_CONSONANTS, ARPABET_TO_INGGLISH_MAP, ARPABET_VOWELS, type CustomMappingConfig, type OutputFormat, R_COLORED_FORWARD, type ReverseToken, STRESS_MARKER_REGEX, type TranslatedToken, arpabetPhonemeToIngglish, arpabetToFormat, arpabetToIngglish, createCustomConverter, expandArpabetAlternatives, findOnsetStart, getFormatHandler, getFormatIsLatinScript, getFormatJoinSeparator, getFormatLabel, getFormatNativeLabel, getFormatPreservesCase, getStress, ingglishToArpabet, isVowel, registerFormat, registerPronunciation, stripStress };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,254 @@
+/**
+ * ARPAbet phoneme utilities and constants.
+ *
+ * ARPAbet is the phonetic notation system used by the CMU Pronouncing Dictionary.
+ * Each English phoneme is represented by a 1-3 letter code, with vowels having
+ * optional stress markers (0=unstressed, 1=primary, 2=secondary).
+ */
+/**
+ * All ARPAbet vowel phonemes (without stress markers).
+ */
+declare const ARPABET_VOWELS: readonly ["AA", "AE", "AH", "AO", "AW", "AY", "EH", "ER", "EY", "IH", "IY", "OW", "OY", "UH", "UW"];
+/**
+ * All ARPAbet consonant phonemes.
+ */
+declare const ARPABET_CONSONANTS: readonly ["B", "D", "G", "K", "P", "T", "DH", "F", "HH", "S", "SH", "TH", "V", "Z", "ZH", "CH", "JH", "M", "N", "NG", "L", "R", "W", "Y"];
+/** Regex pattern to match ARPAbet stress markers (0, 1, 2) at end of phoneme */
+declare const STRESS_MARKER_REGEX: RegExp;
+/**
+ * Extracts the stress level from a phoneme.
+ * Returns null for consonants or vowels without explicit stress.
+ * Uses charCode check instead of regex (consistent with stripStress).
+ *
+ * @example
+ * getStress('AH0') // 0
+ * getStress('EY1') // 1
+ * getStress('AO2') // 2
+ * getStress('B')   // null
+ */
+declare function getStress(phoneme: string): 0 | 1 | 2 | null;
+/**
+ * Checks if an ARPAbet phoneme is a vowel.
+ * Vowels can have stress markers (0, 1, 2).
+ *
+ * @example
+ * isVowel('AH0') // true
+ * isVowel('EY1') // true
+ * isVowel('B')   // false
+ */
+declare function isVowel(phoneme: string): boolean;
+/**
+ * Strips stress markers (0, 1, 2) from a phoneme.
+ * Uses charCode check instead of regex (benchmarked 2x faster).
+ *
+ * @example
+ * stripStress('AH0') // 'AH'
+ * stripStress('EY1') // 'EY'
+ * stripStress('B')   // 'B'
+ */
+declare function stripStress(phoneme: string): string;
+/**
+ * Finds the starting index of the maximal valid onset from a consonant cluster.
+ *
+ * Uses the Maximal Onset Principle: assign as many consonants as possible
+ * to the onset, as long as they form a legal onset cluster. The remaining
+ * consonants become the coda of the previous syllable.
+ *
+ * @param consonants Array of consonant phonemes between two vowels
+ * @returns Index within the consonant array where the valid onset begins
+ *
+ * @example
+ * // "extra" has consonants [K, S, T, R] between vowels
+ * // [S, T, R] is valid onset, so onset starts at index 1
+ * findOnsetStart(['K', 'S', 'T', 'R']) // 1
+ *
+ * // "instruct" has [N, S, T, R]
+ * // [S, T, R] is valid, so onset starts at index 1
+ * findOnsetStart(['N', 'S', 'T', 'R']) // 1
+ */
+declare function findOnsetStart(consonants: string[]): number;
+interface FormatHandler {
+    forward?: ForwardConverter;
+    isLatinScript?: boolean;
+    /** Compound word join separator. Default '' */
+    joinSeparator?: string;
+    /** Display name ('Ingglish', 'IPA', etc.) */
+    label?: string;
+    /** Display name in the format's own script (e.g. '𐑖𐑱𐑝𐑾𐑯' for Shavian) */
+    nativeLabel?: string;
+    /** Whether case is preserved (caps, sentence start). Defaults to isLatinScript. */
+    preservesCase?: boolean;
+    reverseText?: ReverseTextConverter;
+    reverseTextWithMapping?: ReverseTextWithMappingConverter;
+}
+type ForwardConverter = (arpabet: string[], options?: ForwardConverterOptions) => string;
+interface ForwardConverterOptions {
+    /** Disable English R-coloring rules (vowel+R fusion). Use for foreign text. */
+    disableRColoring?: boolean;
+}
+/** Token returned by reverse-with-mapping translation */
+interface ReverseToken {
+    isWord: boolean;
+    matched?: boolean;
+    original: string;
+    translated: string;
+}
+type ReverseTextConverter = (text: string) => string;
+type ReverseTextWithMappingConverter = (text: string) => ReverseToken[];
+declare function getFormatHandler(name: string): FormatHandler | undefined;
+/**
+ * Returns whether a format uses Latin script characters.
+ * Defaults to true for unknown formats (safe for case handling).
+ */
+declare function getFormatIsLatinScript(name: string): boolean;
+/**
+ * Returns the join separator for compound word parts.
+ * Defaults to '' (no separator).
+ */
+declare function getFormatJoinSeparator(name: string): string;
+/**
+ * Returns the display label for a format (e.g. 'Ingglish', 'IPA').
+ * Falls back to the raw format name.
+ */
+declare function getFormatLabel(name: string): string;
+/**
+ * Returns the native-script label for a format (e.g. '𐑖𐑱𐑝𐑾𐑯' for Shavian).
+ * Falls back to the standard label, then the raw format name.
+ */
+declare function getFormatNativeLabel(name: string): string;
+/**
+ * Returns whether a format preserves case (capitalization, sentence start).
+ * Falls back to isLatinScript, then true for unknown formats.
+ */
+declare function getFormatPreservesCase(name: string): boolean;
+declare function registerFormat(name: string, handler: FormatHandler): void;
+/**
+ * Output format for phoneme conversion.
+ */
+type OutputFormat = 'ingglish' | 'ipa' | (string & {});
+/**
+ * ARPAbet to Ingglish conversion.
+ *
+ * Converts CMU dictionary phoneme sequences to Ingglish spelling.
+ * Ingglish is a phonetic spelling system that uses only standard
+ * English letters with no ambiguity.
+ */
+/**
+ * Converts a single ARPAbet phoneme to Ingglish spelling.
+ *
+ * @param phoneme ARPAbet phoneme (e.g., "AH0", "EY1", "B")
+ * @returns Ingglish spelling (e.g., "a", "ay", "b")
+ */
+declare function arpabetPhonemeToIngglish(phoneme: string): string;
+/**
+ * Converts an array of ARPAbet phonemes to Ingglish spelling.
+ * Uses direct loop + string concat (benchmarked 60% faster than map+join).
+ *
+ * R-colored vowels: AA+R → 'ar', AO+R → 'or', IH+R → 'eer' (more intuitive than 'or'/'awr'/'ir')
+ *
+ * @param arpabet Array of ARPAbet symbols (e.g., ["HH", "AH0", "L", "OW1"])
+ * @returns Ingglish spelling (e.g., "haloh")
+ */
+declare function arpabetToIngglish(arpabet: string[]): string;
+interface FormatOptions {
+    /** Disable English R-coloring rules (vowel+R fusion). Use for foreign text. */
+    disableRColoring?: boolean;
+}
+/**
+ * Universal pipeline exit point: converts the ARPAbet IR to any output format.
+ *
+ * Every translation path (dictionary, fallback, G2P) produces an ARPAbet
+ * `string[]` and calls this function to get the final user-facing string.
+ *
+ * @param arpabet Array of ARPAbet symbols (the IR)
+ * @param format Output format (e.g. 'ingglish', 'ipa', 'shavian')
+ * @param options Conversion options (e.g. disable R-coloring for foreign text)
+ * @returns Formatted string
+ */
+declare function arpabetToFormat(arpabet: string[], format?: OutputFormat, options?: FormatOptions): string;
+declare function registerPronunciation(): void;
+/**
+ * Ingglish to ARPAbet conversion.
+ *
+ * Used to parse Ingglish spellings back to ARPAbet phonemes
+ * for reverse translation (Ingglish -> English).
+ */
+/**
+ * Generates alternative ARPAbet sequences for ambiguous spellings.
+ *
+ * For length-changing alternatives (ER→EH+R, SH→S+HH), generates
+ * single-position substitutions. For same-length alternatives (AE→AH),
+ * also generates an "all-replaced" variant to handle words with multiple
+ * ambiguous vowels (e.g., "difakalt" → D IH F AH K AH L T).
+ */
+declare function expandArpabetAlternatives(arpabet: string[]): string[][];
+/**
+ * Converts an Ingglish spelling to ARPAbet phonemes.
+ * Uses index-based parsing to avoid intermediate string allocations.
+ *
+ * @param ingglish - Ingglish string (e.g., "haloh" for "hello")
+ * @returns Array of ARPAbet phonemes (e.g., ["HH", "AH", "L", "OW"]), or null if empty
+ */
+declare function ingglishToArpabet(ingglish: string): null | string[];
+/**
+ * Combined ARPAbet to Ingglish map.
+ */
+declare const ARPABET_TO_INGGLISH_MAP: Record<string, string>;
+/**
+ * Forward lookup: ARPAbet vowel → Ingglish prefix (before 'r').
+ * Used by arpabetToIngglish() to handle VOWEL+R sequences.
+ */
+declare const R_COLORED_FORWARD: Map<string, string>;
+/**
+ * Custom format converter builder.
+ *
+ * Creates ARPAbet-to-spelling converters using user-defined phoneme mappings.
+ * Mirrors arpabetToIngglish() logic but uses merged custom mappings.
+ */
+/**
+ * Configuration for a custom phoneme-to-spelling mapping.
+ * Only diffs from the default Ingglish mapping need to be specified.
+ */
+interface CustomMappingConfig {
+    /** Overrides of ARPABET_TO_INGGLISH_MAP. Includes AH0 as a separate key. */
+    phonemeMap: Record<string, string>;
+    /** Overrides of R_COLORED_FORWARD prefixes (keyed by base vowel, e.g. 'AA'). */
+    rColoredPrefixes: Record<string, string>;
+}
+/**
+ * Creates a forward converter function from a custom mapping config.
+ *
+ * The converter checks phonemes in this order:
+ * 1. Exact phoneme with stress digit (e.g., AH0, EY0) in phonemeMap
+ * 2. Stress-stripped base (e.g., AH, EY) in phonemeMap
+ * 3. Default ARPABET_TO_INGGLISH_MAP
+ * 4. Lowercase phoneme as fallback
+ */
+declare function createCustomConverter(config: CustomMappingConfig): (arpabet: string[], options?: ForwardConverterOptions) => string;
+/**
+ * A single token from a translated text, preserving the mapping between
+ * original and translated forms. Used by both forward and reverse translation.
+ */
+interface TranslatedToken {
+    /** Whether this token is a word (true) or punctuation/whitespace (false). */
+    isWord: boolean;
+    /** Whether the word was found in the dictionary (false = heuristic fallback). */
+    matched: boolean;
+    /** The original text of this token (English for forward, Ingglish for reverse). */
+    original: string;
+    /** The translated text (Ingglish for forward, English for reverse). */
+    translated: string;
+}
+export { ARPABET_CONSONANTS, ARPABET_TO_INGGLISH_MAP, ARPABET_VOWELS, type CustomMappingConfig, type OutputFormat, R_COLORED_FORWARD, type ReverseToken, STRESS_MARKER_REGEX, type TranslatedToken, arpabetPhonemeToIngglish, arpabetToFormat, arpabetToIngglish, createCustomConverter, expandArpabetAlternatives, findOnsetStart, getFormatHandler, getFormatIsLatinScript, getFormatJoinSeparator, getFormatLabel, getFormatNativeLabel, getFormatPreservesCase, getStress, ingglishToArpabet, isVowel, registerFormat, registerPronunciation, stripStress };