@ingglish/g2p 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1392 -0
- package/dist/index.d.cts +81 -0
- package/dist/index.d.ts +81 -0
- package/dist/index.js +1362 -0
- package/package.json +61 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { OutputFormat } from '@ingglish/phonemes';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* NRL/Elovitz letter-to-sound rules for grapheme-to-phoneme conversion.
|
|
5
|
+
*
|
|
6
|
+
* Based on: Elovitz, H., Johnson, R., McHugh, A., and Shore, J. (1976)
|
|
7
|
+
* "Automatic translation of English text to phonetics by means of
|
|
8
|
+
* letter-to-sound rules." NRL Report 7948.
|
|
9
|
+
*
|
|
10
|
+
* ~960 context-sensitive rules that examine left and right letter context
|
|
11
|
+
* to determine pronunciation. ~150 are general spelling rules (silent E,
|
|
12
|
+
* digraphs, vowel teams, C/G softening, suffixes). The other ~800 are
|
|
13
|
+
* narrow context patches and whole-word memorizations for the cases where
|
|
14
|
+
* those rules break down.
|
|
15
|
+
*
|
|
16
|
+
* Each rule has the form:
|
|
17
|
+
*
|
|
18
|
+
* leftContext[TARGET]rightContext=/PHONEMES/
|
|
19
|
+
*
|
|
20
|
+
* Special context symbols:
|
|
21
|
+
* # = 1+ vowels (AEIOUY)
|
|
22
|
+
* . = voiced consonant (BDVGJLMNRWZ)
|
|
23
|
+
* % = suffix (ER, E, ES, ED, ING, ELY)
|
|
24
|
+
* & = sibilant (S, C, G, Z, X, J, CH, SH)
|
|
25
|
+
* @ = non-palate consonant (T, S, R, D, L, Z, N, J, TH, CH, SH)
|
|
26
|
+
* ^ = single consonant
|
|
27
|
+
* + = front vowel (E, I, Y)
|
|
28
|
+
* : = 0+ consonants
|
|
29
|
+
* ' '= word boundary (space)
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
interface G2PTrace {
|
|
33
|
+
phonemes: string[];
|
|
34
|
+
steps: G2PTraceStep[];
|
|
35
|
+
}
|
|
36
|
+
interface G2PTraceStep {
|
|
37
|
+
letters: string;
|
|
38
|
+
phonemes: string[];
|
|
39
|
+
rule: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Converts a word to ARPAbet using NRL context-sensitive rules.
|
|
43
|
+
*
|
|
44
|
+
* @param word The word to convert
|
|
45
|
+
* @returns Array of ARPAbet phonemes
|
|
46
|
+
*/
|
|
47
|
+
declare function wordToArpabet(word: string): string[];
|
|
48
|
+
/**
|
|
49
|
+
* Converts a word to ARPAbet using NRL context-sensitive rules,
|
|
50
|
+
* returning both the phonemes and a trace of which rules fired.
|
|
51
|
+
*/
|
|
52
|
+
declare function wordToArpabetTraced(word: string): G2PTrace;
|
|
53
|
+
/**
|
|
54
|
+
* Translates an unknown word using NRL letter-to-sound rules.
|
|
55
|
+
* This is a fallback when the word isn't in the dictionary.
|
|
56
|
+
*
|
|
57
|
+
* @param word The unknown word
|
|
58
|
+
* @param format The output format
|
|
59
|
+
* @returns The best-effort translation
|
|
60
|
+
*/
|
|
61
|
+
declare function wordToPhonetic(word: string, format?: OutputFormat): string;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Stress prediction post-processor for G2P rules.
|
|
65
|
+
*
|
|
66
|
+
* The NRL rules assign stress-1 to ALL non-schwa vowels, meaning multi-syllable
|
|
67
|
+
* words get every vowel stressed. This module predicts which syllable gets
|
|
68
|
+
* primary stress using Rastle & Coltheart-style affix rules, then reduces
|
|
69
|
+
* unstressed vowels to schwa where appropriate.
|
|
70
|
+
*/
|
|
71
|
+
/**
|
|
72
|
+
* Apply stress prediction to a phoneme array produced by NRL rules.
|
|
73
|
+
*
|
|
74
|
+
* - Finds all vowel positions (phonemes ending in 0/1/2)
|
|
75
|
+
* - Predicts which syllable gets primary stress
|
|
76
|
+
* - Reduces unstressed reducible vowels to AH0 (schwa)
|
|
77
|
+
* - Marks other unstressed vowels with 0
|
|
78
|
+
*/
|
|
79
|
+
declare function applyStressPrediction(word: string, phonemes: string[]): string[];
|
|
80
|
+
|
|
81
|
+
export { type G2PTrace, type G2PTraceStep, applyStressPrediction, wordToArpabet, wordToArpabetTraced, wordToPhonetic };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { OutputFormat } from '@ingglish/phonemes';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* NRL/Elovitz letter-to-sound rules for grapheme-to-phoneme conversion.
|
|
5
|
+
*
|
|
6
|
+
* Based on: Elovitz, H., Johnson, R., McHugh, A., and Shore, J. (1976)
|
|
7
|
+
* "Automatic translation of English text to phonetics by means of
|
|
8
|
+
* letter-to-sound rules." NRL Report 7948.
|
|
9
|
+
*
|
|
10
|
+
* ~960 context-sensitive rules that examine left and right letter context
|
|
11
|
+
* to determine pronunciation. ~150 are general spelling rules (silent E,
|
|
12
|
+
* digraphs, vowel teams, C/G softening, suffixes). The other ~800 are
|
|
13
|
+
* narrow context patches and whole-word memorizations for the cases where
|
|
14
|
+
* those rules break down.
|
|
15
|
+
*
|
|
16
|
+
* Each rule has the form:
|
|
17
|
+
*
|
|
18
|
+
* leftContext[TARGET]rightContext=/PHONEMES/
|
|
19
|
+
*
|
|
20
|
+
* Special context symbols:
|
|
21
|
+
* # = 1+ vowels (AEIOUY)
|
|
22
|
+
* . = voiced consonant (BDVGJLMNRWZ)
|
|
23
|
+
* % = suffix (ER, E, ES, ED, ING, ELY)
|
|
24
|
+
* & = sibilant (S, C, G, Z, X, J, CH, SH)
|
|
25
|
+
* @ = non-palate consonant (T, S, R, D, L, Z, N, J, TH, CH, SH)
|
|
26
|
+
* ^ = single consonant
|
|
27
|
+
* + = front vowel (E, I, Y)
|
|
28
|
+
* : = 0+ consonants
|
|
29
|
+
* ' '= word boundary (space)
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
interface G2PTrace {
|
|
33
|
+
phonemes: string[];
|
|
34
|
+
steps: G2PTraceStep[];
|
|
35
|
+
}
|
|
36
|
+
interface G2PTraceStep {
|
|
37
|
+
letters: string;
|
|
38
|
+
phonemes: string[];
|
|
39
|
+
rule: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Converts a word to ARPAbet using NRL context-sensitive rules.
|
|
43
|
+
*
|
|
44
|
+
* @param word The word to convert
|
|
45
|
+
* @returns Array of ARPAbet phonemes
|
|
46
|
+
*/
|
|
47
|
+
declare function wordToArpabet(word: string): string[];
|
|
48
|
+
/**
|
|
49
|
+
* Converts a word to ARPAbet using NRL context-sensitive rules,
|
|
50
|
+
* returning both the phonemes and a trace of which rules fired.
|
|
51
|
+
*/
|
|
52
|
+
declare function wordToArpabetTraced(word: string): G2PTrace;
|
|
53
|
+
/**
|
|
54
|
+
* Translates an unknown word using NRL letter-to-sound rules.
|
|
55
|
+
* This is a fallback when the word isn't in the dictionary.
|
|
56
|
+
*
|
|
57
|
+
* @param word The unknown word
|
|
58
|
+
* @param format The output format
|
|
59
|
+
* @returns The best-effort translation
|
|
60
|
+
*/
|
|
61
|
+
declare function wordToPhonetic(word: string, format?: OutputFormat): string;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Stress prediction post-processor for G2P rules.
|
|
65
|
+
*
|
|
66
|
+
* The NRL rules assign stress-1 to ALL non-schwa vowels, meaning multi-syllable
|
|
67
|
+
* words get every vowel stressed. This module predicts which syllable gets
|
|
68
|
+
* primary stress using Rastle & Coltheart-style affix rules, then reduces
|
|
69
|
+
* unstressed vowels to schwa where appropriate.
|
|
70
|
+
*/
|
|
71
|
+
/**
|
|
72
|
+
* Apply stress prediction to a phoneme array produced by NRL rules.
|
|
73
|
+
*
|
|
74
|
+
* - Finds all vowel positions (phonemes ending in 0/1/2)
|
|
75
|
+
* - Predicts which syllable gets primary stress
|
|
76
|
+
* - Reduces unstressed reducible vowels to AH0 (schwa)
|
|
77
|
+
* - Marks other unstressed vowels with 0
|
|
78
|
+
*/
|
|
79
|
+
declare function applyStressPrediction(word: string, phonemes: string[]): string[];
|
|
80
|
+
|
|
81
|
+
export { type G2PTrace, type G2PTraceStep, applyStressPrediction, wordToArpabet, wordToArpabetTraced, wordToPhonetic };
|