phoonnx 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/__init__.py +0 -0
- phoonnx/config.py +490 -0
- phoonnx/locale/ca/phonetic_spellings.txt +2 -0
- phoonnx/locale/en/phonetic_spellings.txt +1 -0
- phoonnx/locale/gl/phonetic_spellings.txt +2 -0
- phoonnx/locale/pt/phonetic_spellings.txt +2 -0
- phoonnx/phoneme_ids.py +453 -0
- phoonnx/phonemizers/__init__.py +45 -0
- phoonnx/phonemizers/ar.py +42 -0
- phoonnx/phonemizers/base.py +216 -0
- phoonnx/phonemizers/en.py +250 -0
- phoonnx/phonemizers/fa.py +46 -0
- phoonnx/phonemizers/gl.py +142 -0
- phoonnx/phonemizers/he.py +67 -0
- phoonnx/phonemizers/ja.py +119 -0
- phoonnx/phonemizers/ko.py +97 -0
- phoonnx/phonemizers/mul.py +606 -0
- phoonnx/phonemizers/vi.py +44 -0
- phoonnx/phonemizers/zh.py +308 -0
- phoonnx/thirdparty/__init__.py +0 -0
- phoonnx/thirdparty/arpa2ipa.py +249 -0
- phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- phoonnx/thirdparty/hangul2ipa.py +783 -0
- phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
- phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
- phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
- phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
- phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
- phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
- phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
- phoonnx/thirdparty/ko_tables/yale.csv +22 -0
- phoonnx/thirdparty/kog2p/__init__.py +385 -0
- phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
- phoonnx/thirdparty/mantoq/__init__.py +67 -0
- phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
- phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
- phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
- phoonnx/thirdparty/mantoq/num2words.py +37 -0
- phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
- phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
- phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
- phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
- phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
- phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
- phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
- phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
- phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
- phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
- phoonnx/thirdparty/tashkeel/LICENSE +22 -0
- phoonnx/thirdparty/tashkeel/SOURCE +1 -0
- phoonnx/thirdparty/tashkeel/__init__.py +212 -0
- phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
- phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
- phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
- phoonnx/thirdparty/zh_num.py +238 -0
- phoonnx/util.py +705 -0
- phoonnx/version.py +6 -0
- phoonnx/voice.py +521 -0
- phoonnx-0.0.0.dist-info/METADATA +255 -0
- phoonnx-0.0.0.dist-info/RECORD +86 -0
- phoonnx-0.0.0.dist-info/WHEEL +5 -0
- phoonnx-0.0.0.dist-info/top_level.txt +2 -0
- phoonnx_train/__main__.py +151 -0
- phoonnx_train/export_onnx.py +109 -0
- phoonnx_train/norm_audio/__init__.py +92 -0
- phoonnx_train/norm_audio/trim.py +54 -0
- phoonnx_train/norm_audio/vad.py +54 -0
- phoonnx_train/preprocess.py +420 -0
- phoonnx_train/vits/__init__.py +0 -0
- phoonnx_train/vits/attentions.py +427 -0
- phoonnx_train/vits/commons.py +147 -0
- phoonnx_train/vits/config.py +330 -0
- phoonnx_train/vits/dataset.py +214 -0
- phoonnx_train/vits/lightning.py +352 -0
- phoonnx_train/vits/losses.py +58 -0
- phoonnx_train/vits/mel_processing.py +139 -0
- phoonnx_train/vits/models.py +732 -0
- phoonnx_train/vits/modules.py +527 -0
- phoonnx_train/vits/monotonic_align/__init__.py +20 -0
- phoonnx_train/vits/monotonic_align/setup.py +13 -0
- phoonnx_train/vits/transforms.py +212 -0
- phoonnx_train/vits/utils.py +16 -0
- phoonnx_train/vits/wavfile.py +860 -0
phoonnx/phoneme_ids.py
ADDED
@@ -0,0 +1,453 @@
|
|
1
|
+
"""Utilities for converting phonemes to ids."""
|
2
|
+
|
3
|
+
from collections.abc import Sequence
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Optional, TextIO, Dict, List, Union, Set, Mapping
|
6
|
+
|
7
|
+
try:
|
8
|
+
from ovos_utils.log import LOG
|
9
|
+
except ImportError:
|
10
|
+
import logging
|
11
|
+
|
12
|
+
LOG = logging.getLogger(__name__)
|
13
|
+
LOG.setLevel("DEBUG")
|
14
|
+
|
15
|
+
PHONEME_ID_LIST = List[int]
|
16
|
+
PHONEME_ID_MAP = Dict[str, int]
|
17
|
+
PHONEME_LIST = List[str]
|
18
|
+
PHONEME_WORD_LIST = List[PHONEME_LIST]
|
19
|
+
|
20
|
+
DEFAULT_IPA_PHONEME_ID_MAP: Dict[str, PHONEME_ID_LIST] = {
|
21
|
+
"_": [0],
|
22
|
+
"^": [1],
|
23
|
+
"$": [2],
|
24
|
+
" ": [3],
|
25
|
+
"!": [4],
|
26
|
+
"'": [5],
|
27
|
+
"(": [6],
|
28
|
+
")": [7],
|
29
|
+
",": [8],
|
30
|
+
"-": [9],
|
31
|
+
".": [10],
|
32
|
+
":": [11],
|
33
|
+
";": [12],
|
34
|
+
"?": [13],
|
35
|
+
"a": [14],
|
36
|
+
"b": [15],
|
37
|
+
"c": [16],
|
38
|
+
"d": [17],
|
39
|
+
"e": [18],
|
40
|
+
"f": [19],
|
41
|
+
"h": [20],
|
42
|
+
"i": [21],
|
43
|
+
"j": [22],
|
44
|
+
"k": [23],
|
45
|
+
"l": [24],
|
46
|
+
"m": [25],
|
47
|
+
"n": [26],
|
48
|
+
"o": [27],
|
49
|
+
"p": [28],
|
50
|
+
"q": [29],
|
51
|
+
"r": [30],
|
52
|
+
"s": [31],
|
53
|
+
"t": [32],
|
54
|
+
"u": [33],
|
55
|
+
"v": [34],
|
56
|
+
"w": [35],
|
57
|
+
"x": [36],
|
58
|
+
"y": [37],
|
59
|
+
"z": [38],
|
60
|
+
"æ": [39],
|
61
|
+
"ç": [40],
|
62
|
+
"ð": [41],
|
63
|
+
"ø": [42],
|
64
|
+
"ħ": [43],
|
65
|
+
"ŋ": [44],
|
66
|
+
"œ": [45],
|
67
|
+
"ǀ": [46],
|
68
|
+
"ǁ": [47],
|
69
|
+
"ǂ": [48],
|
70
|
+
"ǃ": [49],
|
71
|
+
"ɐ": [50],
|
72
|
+
"ɑ": [51],
|
73
|
+
"ɒ": [52],
|
74
|
+
"ɓ": [53],
|
75
|
+
"ɔ": [54],
|
76
|
+
"ɕ": [55],
|
77
|
+
"ɖ": [56],
|
78
|
+
"ɗ": [57],
|
79
|
+
"ɘ": [58],
|
80
|
+
"ə": [59],
|
81
|
+
"ɚ": [60],
|
82
|
+
"ɛ": [61],
|
83
|
+
"ɜ": [62],
|
84
|
+
"ɞ": [63],
|
85
|
+
"ɟ": [64],
|
86
|
+
"ɠ": [65],
|
87
|
+
"ɡ": [66],
|
88
|
+
"ɢ": [67],
|
89
|
+
"ɣ": [68],
|
90
|
+
"ɤ": [69],
|
91
|
+
"ɥ": [70],
|
92
|
+
"ɦ": [71],
|
93
|
+
"ɧ": [72],
|
94
|
+
"ɨ": [73],
|
95
|
+
"ɪ": [74],
|
96
|
+
"ɫ": [75],
|
97
|
+
"ɬ": [76],
|
98
|
+
"ɭ": [77],
|
99
|
+
"ɮ": [78],
|
100
|
+
"ɯ": [79],
|
101
|
+
"ɰ": [80],
|
102
|
+
"ɱ": [81],
|
103
|
+
"ɲ": [82],
|
104
|
+
"ɳ": [83],
|
105
|
+
"ɴ": [84],
|
106
|
+
"ɵ": [85],
|
107
|
+
"ɶ": [86],
|
108
|
+
"ɸ": [87],
|
109
|
+
"ɹ": [88],
|
110
|
+
"ɺ": [89],
|
111
|
+
"ɻ": [90],
|
112
|
+
"ɽ": [91],
|
113
|
+
"ɾ": [92],
|
114
|
+
"ʀ": [93],
|
115
|
+
"ʁ": [94],
|
116
|
+
"ʂ": [95],
|
117
|
+
"ʃ": [96],
|
118
|
+
"ʄ": [97],
|
119
|
+
"ʈ": [98],
|
120
|
+
"ʉ": [99],
|
121
|
+
"ʊ": [100],
|
122
|
+
"ʋ": [101],
|
123
|
+
"ʌ": [102],
|
124
|
+
"ʍ": [103],
|
125
|
+
"ʎ": [104],
|
126
|
+
"ʏ": [105],
|
127
|
+
"ʐ": [106],
|
128
|
+
"ʑ": [107],
|
129
|
+
"ʒ": [108],
|
130
|
+
"ʔ": [109],
|
131
|
+
"ʕ": [110],
|
132
|
+
"ʘ": [111],
|
133
|
+
"ʙ": [112],
|
134
|
+
"ʛ": [113],
|
135
|
+
"ʜ": [114],
|
136
|
+
"ʝ": [115],
|
137
|
+
"ʟ": [116],
|
138
|
+
"ʡ": [117],
|
139
|
+
"ʢ": [118],
|
140
|
+
"ʲ": [119],
|
141
|
+
"ˈ": [120],
|
142
|
+
"ˌ": [121],
|
143
|
+
"ː": [122],
|
144
|
+
"ˑ": [123],
|
145
|
+
"˞": [124],
|
146
|
+
"β": [125],
|
147
|
+
"θ": [126],
|
148
|
+
"χ": [127],
|
149
|
+
"ᵻ": [128],
|
150
|
+
"ⱱ": [129],
|
151
|
+
"0": [130],
|
152
|
+
"1": [131],
|
153
|
+
"2": [132],
|
154
|
+
"3": [133],
|
155
|
+
"4": [134],
|
156
|
+
"5": [135],
|
157
|
+
"6": [136],
|
158
|
+
"7": [137],
|
159
|
+
"8": [138],
|
160
|
+
"9": [139],
|
161
|
+
"̧": [140],
|
162
|
+
"̃": [141],
|
163
|
+
"̪": [142],
|
164
|
+
"̯": [143],
|
165
|
+
"̩": [144],
|
166
|
+
"ʰ": [145],
|
167
|
+
"ˤ": [146],
|
168
|
+
"ε": [147],
|
169
|
+
"↓": [148],
|
170
|
+
"#": [149],
|
171
|
+
'"': [150],
|
172
|
+
"↑": [151],
|
173
|
+
"̺": [152],
|
174
|
+
"̻": [153],
|
175
|
+
"g": [154],
|
176
|
+
"ʦ": [155],
|
177
|
+
"X": [156],
|
178
|
+
"̝": [157],
|
179
|
+
"̊": [158],
|
180
|
+
"ɝ": [159],
|
181
|
+
"ʷ": [160],
|
182
|
+
}
|
183
|
+
|
184
|
+
DEFAULT_PAD_TOKEN = DEFAULT_BLANK_TOKEN = "_" # padding (0)
|
185
|
+
DEFAULT_BOS_TOKEN = "^" # beginning of sentence
|
186
|
+
DEFAULT_EOS_TOKEN = "$" # end of sentence
|
187
|
+
DEFAULT_BLANK_WORD_TOKEN = " " # padding between words
|
188
|
+
|
189
|
+
STRESS: Set[str] = {"ˈ", "ˌ"}
|
190
|
+
"""Default stress characters"""
|
191
|
+
|
192
|
+
PUNCTUATION_MAP: Mapping[str, str] = {";": ",", ":": ",", "?": ".", "!": "."}
|
193
|
+
"""Default punctuation simplification into short (,) and long (.) pauses"""
|
194
|
+
|
195
|
+
|
196
|
+
class BlankBetween(str, Enum):
|
197
|
+
"""Placement of blank tokens"""
|
198
|
+
|
199
|
+
TOKENS = "tokens"
|
200
|
+
"""Blank between every token/phoneme"""
|
201
|
+
|
202
|
+
WORDS = "words"
|
203
|
+
"""Blank between every word"""
|
204
|
+
|
205
|
+
TOKENS_AND_WORDS = "tokens_and_words"
|
206
|
+
"""Blank between every token/phoneme and every word (may be different symbols)"""
|
207
|
+
|
208
|
+
|
209
|
+
def phonemes_to_ids(
|
210
|
+
phonemes: PHONEME_LIST,
|
211
|
+
id_map: Optional[Mapping[str, Union[int, Sequence[int]]]] = None,
|
212
|
+
blank_token: Optional[str] = DEFAULT_BLANK_TOKEN,
|
213
|
+
bos_token: Optional[str] = DEFAULT_BOS_TOKEN,
|
214
|
+
eos_token: Optional[str] = DEFAULT_EOS_TOKEN,
|
215
|
+
word_sep_token: Optional[str] = DEFAULT_BLANK_WORD_TOKEN,
|
216
|
+
include_whitespace: Optional[bool] = True,
|
217
|
+
blank_at_start: bool = True,
|
218
|
+
blank_at_end: bool = True,
|
219
|
+
blank_between: BlankBetween = BlankBetween.TOKENS_AND_WORDS,
|
220
|
+
) -> PHONEME_ID_LIST:
|
221
|
+
"""Phonemes to ids."""
|
222
|
+
if not phonemes:
|
223
|
+
return []
|
224
|
+
if not id_map:
|
225
|
+
id_map = DEFAULT_IPA_PHONEME_ID_MAP
|
226
|
+
|
227
|
+
# compat with piper-style mapping that uses lists
|
228
|
+
id_map = {k: v if isinstance(v, list) else [v]
|
229
|
+
for k, v in id_map.items()}
|
230
|
+
|
231
|
+
ids: list[int] = []
|
232
|
+
blank_id = blank_token if isinstance(blank_token, int) \
|
233
|
+
else id_map.get(blank_token, [len(id_map)]) if blank_token \
|
234
|
+
else [len(id_map)]
|
235
|
+
eos_id = eos_token if isinstance(eos_token, int) \
|
236
|
+
else id_map.get(eos_token, [len(id_map)]) if eos_token \
|
237
|
+
else [len(id_map)]
|
238
|
+
bos_id = eos_token if isinstance(bos_token, int) \
|
239
|
+
else id_map.get(bos_token, [len(id_map)]) if bos_token \
|
240
|
+
else [len(id_map)]
|
241
|
+
|
242
|
+
if bos_token is not None:
|
243
|
+
ids.extend(bos_id)
|
244
|
+
if blank_token is not None and blank_at_start:
|
245
|
+
ids.extend(blank_id)
|
246
|
+
|
247
|
+
blank_between_tokns = (blank_token is not None and
|
248
|
+
blank_between in [BlankBetween.TOKENS, BlankBetween.TOKENS_AND_WORDS])
|
249
|
+
blank_between_words = (blank_token is not None and
|
250
|
+
blank_between in [BlankBetween.WORDS, BlankBetween.TOKENS_AND_WORDS])
|
251
|
+
|
252
|
+
# first pre-process phoneme_map to check for dipthongs having their own phoneme_id
|
253
|
+
# common in mimic3 models
|
254
|
+
compound_phonemes = sorted((k for k in id_map if len(k) > 1), key=len, reverse=True)
|
255
|
+
i = 0
|
256
|
+
while i < len(phonemes):
|
257
|
+
matched = False
|
258
|
+
|
259
|
+
# Try to match compound phonemes starting at index i
|
260
|
+
for compound in compound_phonemes:
|
261
|
+
n = len(compound)
|
262
|
+
joined = ''.join(phonemes[i:i + n])
|
263
|
+
if joined == compound:
|
264
|
+
ids.extend(id_map[compound])
|
265
|
+
if blank_between_tokns and i + n < len(phonemes):
|
266
|
+
ids.extend(blank_id)
|
267
|
+
i += n
|
268
|
+
matched = True
|
269
|
+
break
|
270
|
+
|
271
|
+
if matched:
|
272
|
+
continue
|
273
|
+
|
274
|
+
phoneme = phonemes[i]
|
275
|
+
if phoneme not in id_map:
|
276
|
+
if phoneme == " " and not include_whitespace:
|
277
|
+
i += 1
|
278
|
+
continue
|
279
|
+
LOG.warning("Missing phoneme from id map: %s", phoneme)
|
280
|
+
i += 1
|
281
|
+
continue
|
282
|
+
|
283
|
+
if phoneme == " ":
|
284
|
+
if include_whitespace:
|
285
|
+
ids.extend(id_map[phoneme])
|
286
|
+
if blank_between_tokns:
|
287
|
+
ids.extend(blank_id)
|
288
|
+
elif blank_between_words:
|
289
|
+
ids.extend(id_map[word_sep_token])
|
290
|
+
if blank_between_tokns:
|
291
|
+
ids.extend(blank_id)
|
292
|
+
else:
|
293
|
+
ids.extend(id_map[phoneme])
|
294
|
+
if blank_between_tokns and i < len(phonemes) - 1:
|
295
|
+
ids.extend(blank_id)
|
296
|
+
i += 1
|
297
|
+
|
298
|
+
if blank_token is not None and blank_at_end:
|
299
|
+
if not include_whitespace and word_sep_token and blank_between_words:
|
300
|
+
if blank_between_tokns:
|
301
|
+
ids.extend(blank_id)
|
302
|
+
ids.extend(id_map[word_sep_token])
|
303
|
+
if blank_between_tokns:
|
304
|
+
ids.extend(blank_id)
|
305
|
+
else:
|
306
|
+
ids.extend(blank_id)
|
307
|
+
if eos_token is not None:
|
308
|
+
ids.extend(eos_id)
|
309
|
+
|
310
|
+
return ids
|
311
|
+
|
312
|
+
def load_phoneme_ids(phonemes_file: TextIO) -> PHONEME_ID_MAP:
|
313
|
+
"""
|
314
|
+
Load phoneme id mapping from a text file.
|
315
|
+
Format is ID<space>PHONEME
|
316
|
+
Comments start with #
|
317
|
+
|
318
|
+
Args:
|
319
|
+
phonemes_file: text file
|
320
|
+
|
321
|
+
Returns:
|
322
|
+
dict with phoneme -> id
|
323
|
+
"""
|
324
|
+
phoneme_to_id = {}
|
325
|
+
for line in phonemes_file:
|
326
|
+
line = line.strip("\r\n")
|
327
|
+
if (not line) or line.startswith("#") or (" " not in line):
|
328
|
+
# Exclude blank lines, comments, or malformed lines
|
329
|
+
continue
|
330
|
+
|
331
|
+
if line.strip().isdigit(): # phoneme is whitespace
|
332
|
+
phoneme_str = " "
|
333
|
+
phoneme_id = int(line)
|
334
|
+
else:
|
335
|
+
phoneme_id, phoneme_str = line.split(" ", maxsplit=1)
|
336
|
+
if phoneme_str.isdigit():
|
337
|
+
phoneme_id, phoneme_str = phoneme_str, phoneme_id
|
338
|
+
|
339
|
+
phoneme_to_id[phoneme_str] = int(phoneme_id)
|
340
|
+
|
341
|
+
return phoneme_to_id
|
342
|
+
|
343
|
+
|
344
|
+
def load_phoneme_map(phoneme_map_file: TextIO) -> Dict[str, List[str]]:
|
345
|
+
"""
|
346
|
+
Load phoneme/phoneme mapping from a text file.
|
347
|
+
Format is FROM_PHONEME<space>TO_PHONEME[<space>TO_PHONEME...]
|
348
|
+
Comments start with #
|
349
|
+
|
350
|
+
Args:
|
351
|
+
phoneme_map_file: text file
|
352
|
+
|
353
|
+
Returns:
|
354
|
+
dict with from_phoneme -> [to_phoneme, to_phoneme, ...]
|
355
|
+
"""
|
356
|
+
phoneme_map = {}
|
357
|
+
for line in phoneme_map_file:
|
358
|
+
line = line.strip("\r\n")
|
359
|
+
if (not line) or line.startswith("#") or (" " not in line):
|
360
|
+
# Exclude blank lines, comments, or malformed lines
|
361
|
+
continue
|
362
|
+
|
363
|
+
from_phoneme, to_phonemes_str = line.split(" ", maxsplit=1)
|
364
|
+
if not to_phonemes_str.strip():
|
365
|
+
# To whitespace
|
366
|
+
phoneme_map[from_phoneme] = [" "]
|
367
|
+
else:
|
368
|
+
# To one or more non-whitespace phonemes
|
369
|
+
phoneme_map[from_phoneme] = to_phonemes_str.split()
|
370
|
+
|
371
|
+
return phoneme_map
|
372
|
+
|
373
|
+
|
374
|
+
if __name__ == "__main__":
|
375
|
+
phoneme_ids_path = "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phonemes.txt"
|
376
|
+
with open(phoneme_ids_path, "r", encoding="utf-8") as ids_file:
|
377
|
+
phoneme_to_id = load_phoneme_ids(ids_file)
|
378
|
+
print(phoneme_to_id)
|
379
|
+
|
380
|
+
phoneme_map_path = "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phoneme_map.txt"
|
381
|
+
with open(phoneme_map_path, "r", encoding="utf-8") as map_file:
|
382
|
+
phoneme_map = load_phoneme_map(map_file)
|
383
|
+
# print(phoneme_map)
|
384
|
+
|
385
|
+
from phoonnx.phonemizers import EspeakPhonemizer
|
386
|
+
|
387
|
+
# test original mimic3 code
|
388
|
+
from phonemes2ids import phonemes2ids as mimic3_phonemes2ids
|
389
|
+
|
390
|
+
# test original piper code
|
391
|
+
from piper.phoneme_ids import phonemes_to_ids as piper_phonemes_to_ids
|
392
|
+
|
393
|
+
espeak = EspeakPhonemizer()
|
394
|
+
phone_str: str = espeak.phonemize_string("hello world", "en")
|
395
|
+
|
396
|
+
phones: PHONEME_LIST = list(phone_str)
|
397
|
+
phone_words: PHONEME_WORD_LIST = [list(w) for w in phone_str.split()]
|
398
|
+
print(phone_str)
|
399
|
+
print(phones) # piper style
|
400
|
+
print(phone_words) # mimic3 style
|
401
|
+
|
402
|
+
mapping = {k: v[0] for k, v in DEFAULT_IPA_PHONEME_ID_MAP.items()}
|
403
|
+
print("\n#### piper (tokens_and_words + include_whitespace)")
|
404
|
+
print("reference", piper_phonemes_to_ids(phones))
|
405
|
+
print("phonnx ", phonemes_to_ids(phones,
|
406
|
+
id_map=mapping, include_whitespace=True))
|
407
|
+
|
408
|
+
print("\n#### mimic3 (words)")
|
409
|
+
print("reference", mimic3_phonemes2ids(phone_words,
|
410
|
+
mapping,
|
411
|
+
bos=DEFAULT_BOS_TOKEN,
|
412
|
+
eos=DEFAULT_EOS_TOKEN,
|
413
|
+
blank=DEFAULT_PAD_TOKEN,
|
414
|
+
blank_at_end=True,
|
415
|
+
blank_at_start=True,
|
416
|
+
blank_word=DEFAULT_BLANK_WORD_TOKEN,
|
417
|
+
blank_between=BlankBetween.WORDS,
|
418
|
+
auto_bos_eos=True))
|
419
|
+
print("phonnx ", phonemes_to_ids(phones,
|
420
|
+
id_map=mapping,
|
421
|
+
include_whitespace=False,
|
422
|
+
blank_between=BlankBetween.WORDS))
|
423
|
+
|
424
|
+
print("\n#### mimic3 (tokens)")
|
425
|
+
print("reference", mimic3_phonemes2ids(phone_words,
|
426
|
+
mapping,
|
427
|
+
bos=DEFAULT_BOS_TOKEN,
|
428
|
+
eos=DEFAULT_EOS_TOKEN,
|
429
|
+
blank=DEFAULT_PAD_TOKEN,
|
430
|
+
blank_at_end=True,
|
431
|
+
blank_at_start=True,
|
432
|
+
blank_word=DEFAULT_BLANK_WORD_TOKEN,
|
433
|
+
blank_between=BlankBetween.TOKENS,
|
434
|
+
auto_bos_eos=True))
|
435
|
+
print("phonnx ", phonemes_to_ids(phones,
|
436
|
+
id_map=mapping,
|
437
|
+
include_whitespace=False,
|
438
|
+
blank_between=BlankBetween.TOKENS))
|
439
|
+
print("\n#### mimic3 (tokens_and_words)")
|
440
|
+
print("reference", mimic3_phonemes2ids(phone_words,
|
441
|
+
mapping,
|
442
|
+
bos=DEFAULT_BOS_TOKEN,
|
443
|
+
eos=DEFAULT_EOS_TOKEN,
|
444
|
+
blank=DEFAULT_PAD_TOKEN,
|
445
|
+
blank_at_end=True,
|
446
|
+
blank_at_start=True,
|
447
|
+
blank_word=DEFAULT_BLANK_WORD_TOKEN,
|
448
|
+
blank_between=BlankBetween.TOKENS_AND_WORDS,
|
449
|
+
auto_bos_eos=True))
|
450
|
+
print("phonnx ", phonemes_to_ids(phones,
|
451
|
+
id_map=mapping,
|
452
|
+
include_whitespace=False,
|
453
|
+
blank_between=BlankBetween.TOKENS_AND_WORDS))
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from typing import Union
|
2
|
+
|
3
|
+
from phoonnx.phonemizers.base import BasePhonemizer, UnicodeCodepointPhonemizer, RawPhonemes, GraphemePhonemizer, TextChunks, RawPhonemizedChunks
|
4
|
+
from phoonnx.phonemizers.en import DeepPhonemizer, OpenPhonemizer, G2PEnPhonemizer
|
5
|
+
from phoonnx.phonemizers.gl import CotoviaPhonemizer
|
6
|
+
from phoonnx.phonemizers.vi import VIPhonemePhonemizer
|
7
|
+
from phoonnx.phonemizers.he import PhonikudPhonemizer
|
8
|
+
from phoonnx.phonemizers.ar import MantoqPhonemizer
|
9
|
+
from phoonnx.phonemizers.fa import PersianPhonemizer
|
10
|
+
from phoonnx.phonemizers.ja import PyKakasiPhonemizer, CutletPhonemizer, OpenJTaklPhonemizer
|
11
|
+
from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
|
12
|
+
from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
|
13
|
+
XpinyinPhonemizer, JiebaPhonemizer)
|
14
|
+
from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
|
15
|
+
GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer)
|
16
|
+
|
17
|
+
Phonemizer = Union[
|
18
|
+
MisakiPhonemizer,
|
19
|
+
ByT5Phonemizer,
|
20
|
+
UnicodeCodepointPhonemizer,
|
21
|
+
CharsiuPhonemizer,
|
22
|
+
EspeakPhonemizer,
|
23
|
+
GruutPhonemizer,
|
24
|
+
EpitranPhonemizer,
|
25
|
+
OpenJTaklPhonemizer,
|
26
|
+
CutletPhonemizer,
|
27
|
+
PyKakasiPhonemizer,
|
28
|
+
PersianPhonemizer,
|
29
|
+
VIPhonemePhonemizer,
|
30
|
+
G2PKPhonemizer,
|
31
|
+
KoG2PPhonemizer,
|
32
|
+
G2pCPhonemizer,
|
33
|
+
G2pMPhonemizer,
|
34
|
+
PypinyinPhonemizer,
|
35
|
+
XpinyinPhonemizer,
|
36
|
+
JiebaPhonemizer,
|
37
|
+
PhonikudPhonemizer,
|
38
|
+
CotoviaPhonemizer,
|
39
|
+
MantoqPhonemizer,
|
40
|
+
GraphemePhonemizer,
|
41
|
+
RawPhonemes,
|
42
|
+
OpenPhonemizer,
|
43
|
+
G2PEnPhonemizer,
|
44
|
+
DeepPhonemizer
|
45
|
+
]
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
2
|
+
from phoonnx.thirdparty.mantoq import g2p as mantoq
|
3
|
+
from phoonnx.config import Alphabet
|
4
|
+
|
5
|
+
class MantoqPhonemizer(BasePhonemizer):
|
6
|
+
|
7
|
+
def __init__(self):
|
8
|
+
super().__init__(Alphabet.IPA)
|
9
|
+
|
10
|
+
@classmethod
|
11
|
+
def get_lang(cls, target_lang: str) -> str:
|
12
|
+
"""
|
13
|
+
Validates and returns the closest supported language code.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
target_lang (str): The language code to validate.
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
str: The validated language code.
|
20
|
+
|
21
|
+
Raises:
|
22
|
+
ValueError: If the language code is unsupported.
|
23
|
+
"""
|
24
|
+
# this check is here only to throw an exception if invalid language is provided
|
25
|
+
return cls.match_lang(target_lang, ["ar"])
|
26
|
+
|
27
|
+
def phonemize_string(self, text: str, lang: str = "ar") -> str:
|
28
|
+
"""
|
29
|
+
"""
|
30
|
+
lang = self.get_lang(lang)
|
31
|
+
normalized_text, phonemes = mantoq(text)
|
32
|
+
return "".join(phonemes).replace("_+_", " ")
|
33
|
+
|
34
|
+
|
35
|
+
if __name__ == "__main__":
|
36
|
+
text = "مرحبا بالعالم"
|
37
|
+
# gets normalized to
|
38
|
+
# مَرْحَبًا بِالْعالَم
|
39
|
+
|
40
|
+
pho = MantoqPhonemizer()
|
41
|
+
print(pho.phonemize(text, "ar"))
|
42
|
+
# [('m a r H a b a n aa b i l E aa l a m', '.', True)]
|