@ingglish/g2p 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ import { OutputFormat } from '@ingglish/phonemes';
2
+
3
+ /**
4
+ * NRL/Elovitz letter-to-sound rules for grapheme-to-phoneme conversion.
5
+ *
6
+ * Based on: Elovitz, H., Johnson, R., McHugh, A., and Shore, J. (1976)
7
+ * "Automatic translation of English text to phonetics by means of
8
+ * letter-to-sound rules." NRL Report 7948.
9
+ *
10
+ * ~960 context-sensitive rules that examine left and right letter context
11
+ * to determine pronunciation. ~150 are general spelling rules (silent E,
12
+ * digraphs, vowel teams, C/G softening, suffixes). The other ~800 are
13
+ * narrow context patches and whole-word memorizations for the cases where
14
+ * those rules break down.
15
+ *
16
+ * Each rule has the form:
17
+ *
18
+ * leftContext[TARGET]rightContext=/PHONEMES/
19
+ *
20
+ * Special context symbols:
21
+ * # = 1+ vowels (AEIOUY)
22
+ * . = voiced consonant (BDVGJLMNRWZ)
23
+ * % = suffix (ER, E, ES, ED, ING, ELY)
24
+ * & = sibilant (S, C, G, Z, X, J, CH, SH)
25
+ * @ = non-palate consonant (T, S, R, D, L, Z, N, J, TH, CH, SH)
26
+ * ^ = single consonant
27
+ * + = front vowel (E, I, Y)
28
+ * : = 0+ consonants
29
+ * ' '= word boundary (space)
30
+ */
31
+
32
+ interface G2PTrace {
33
+ phonemes: string[];
34
+ steps: G2PTraceStep[];
35
+ }
36
+ interface G2PTraceStep {
37
+ letters: string;
38
+ phonemes: string[];
39
+ rule: string;
40
+ }
41
+ /**
42
+ * Converts a word to ARPAbet using NRL context-sensitive rules.
43
+ *
44
+ * @param word The word to convert
45
+ * @returns Array of ARPAbet phonemes
46
+ */
47
+ declare function wordToArpabet(word: string): string[];
48
+ /**
49
+ * Converts a word to ARPAbet using NRL context-sensitive rules,
50
+ * returning both the phonemes and a trace of which rules fired.
51
+ */
52
+ declare function wordToArpabetTraced(word: string): G2PTrace;
53
+ /**
54
+ * Translates an unknown word using NRL letter-to-sound rules.
55
+ * This is a fallback when the word isn't in the dictionary.
56
+ *
57
+ * @param word The unknown word
58
+ * @param format The output format
59
+ * @returns The best-effort translation
60
+ */
61
+ declare function wordToPhonetic(word: string, format?: OutputFormat): string;
62
+
63
+ /**
64
+ * Stress prediction post-processor for G2P rules.
65
+ *
66
+ * The NRL rules assign stress-1 to ALL non-schwa vowels, meaning multi-syllable
67
+ * words get every vowel stressed. This module predicts which syllable gets
68
+ * primary stress using Rastle & Coltheart-style affix rules, then reduces
69
+ * unstressed vowels to schwa where appropriate.
70
+ */
71
+ /**
72
+ * Apply stress prediction to a phoneme array produced by NRL rules.
73
+ *
74
+ * - Finds all vowel positions (phonemes ending in 0/1/2)
75
+ * - Predicts which syllable gets primary stress
76
+ * - Reduces unstressed reducible vowels to AH0 (schwa)
77
+ * - Marks other unstressed vowels with 0
78
+ */
79
+ declare function applyStressPrediction(word: string, phonemes: string[]): string[];
80
+
81
+ export { type G2PTrace, type G2PTraceStep, applyStressPrediction, wordToArpabet, wordToArpabetTraced, wordToPhonetic };
@@ -0,0 +1,81 @@
1
+ import { OutputFormat } from '@ingglish/phonemes';
2
+
3
+ /**
4
+ * NRL/Elovitz letter-to-sound rules for grapheme-to-phoneme conversion.
5
+ *
6
+ * Based on: Elovitz, H., Johnson, R., McHugh, A., and Shore, J. (1976)
7
+ * "Automatic translation of English text to phonetics by means of
8
+ * letter-to-sound rules." NRL Report 7948.
9
+ *
10
+ * ~960 context-sensitive rules that examine left and right letter context
11
+ * to determine pronunciation. ~150 are general spelling rules (silent E,
12
+ * digraphs, vowel teams, C/G softening, suffixes). The other ~800 are
13
+ * narrow context patches and whole-word memorizations for the cases where
14
+ * those rules break down.
15
+ *
16
+ * Each rule has the form:
17
+ *
18
+ * leftContext[TARGET]rightContext=/PHONEMES/
19
+ *
20
+ * Special context symbols:
21
+ * # = 1+ vowels (AEIOUY)
22
+ * . = voiced consonant (BDVGJLMNRWZ)
23
+ * % = suffix (ER, E, ES, ED, ING, ELY)
24
+ * & = sibilant (S, C, G, Z, X, J, CH, SH)
25
+ * @ = non-palate consonant (T, S, R, D, L, Z, N, J, TH, CH, SH)
26
+ * ^ = single consonant
27
+ * + = front vowel (E, I, Y)
28
+ * : = 0+ consonants
29
+ * ' '= word boundary (space)
30
+ */
31
+
32
+ interface G2PTrace {
33
+ phonemes: string[];
34
+ steps: G2PTraceStep[];
35
+ }
36
+ interface G2PTraceStep {
37
+ letters: string;
38
+ phonemes: string[];
39
+ rule: string;
40
+ }
41
+ /**
42
+ * Converts a word to ARPAbet using NRL context-sensitive rules.
43
+ *
44
+ * @param word The word to convert
45
+ * @returns Array of ARPAbet phonemes
46
+ */
47
+ declare function wordToArpabet(word: string): string[];
48
+ /**
49
+ * Converts a word to ARPAbet using NRL context-sensitive rules,
50
+ * returning both the phonemes and a trace of which rules fired.
51
+ */
52
+ declare function wordToArpabetTraced(word: string): G2PTrace;
53
+ /**
54
+ * Translates an unknown word using NRL letter-to-sound rules.
55
+ * This is a fallback when the word isn't in the dictionary.
56
+ *
57
+ * @param word The unknown word
58
+ * @param format The output format
59
+ * @returns The best-effort translation
60
+ */
61
+ declare function wordToPhonetic(word: string, format?: OutputFormat): string;
62
+
63
+ /**
64
+ * Stress prediction post-processor for G2P rules.
65
+ *
66
+ * The NRL rules assign stress-1 to ALL non-schwa vowels, meaning multi-syllable
67
+ * words get every vowel stressed. This module predicts which syllable gets
68
+ * primary stress using Rastle & Coltheart-style affix rules, then reduces
69
+ * unstressed vowels to schwa where appropriate.
70
+ */
71
+ /**
72
+ * Apply stress prediction to a phoneme array produced by NRL rules.
73
+ *
74
+ * - Finds all vowel positions (phonemes ending in 0/1/2)
75
+ * - Predicts which syllable gets primary stress
76
+ * - Reduces unstressed reducible vowels to AH0 (schwa)
77
+ * - Marks other unstressed vowels with 0
78
+ */
79
+ declare function applyStressPrediction(word: string, phonemes: string[]): string[];
80
+
81
+ export { type G2PTrace, type G2PTraceStep, applyStressPrediction, wordToArpabet, wordToArpabetTraced, wordToPhonetic };