phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
phoonnx/__init__.py ADDED
File without changes
phoonnx/config.py ADDED
@@ -0,0 +1,490 @@
1
+ import json
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from typing import Any, Mapping, Optional, Sequence
5
+ from phoonnx.phoneme_ids import (load_phoneme_ids, BlankBetween,
6
+ DEFAULT_BLANK_WORD_TOKEN, DEFAULT_BLANK_TOKEN,
7
+ DEFAULT_PAD_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_EOS_TOKEN)
8
+
9
+
10
+ DEFAULT_NOISE_SCALE = 0.667
11
+ DEFAULT_LENGTH_SCALE = 1.0
12
+ DEFAULT_NOISE_W_SCALE = 0.8
13
+
14
+ try:
15
+ from ovos_utils.log import LOG
16
+ except ImportError:
17
+ import logging
18
+ LOG = logging.getLogger(__name__)
19
+
20
+
21
+ class Alphabet(str, Enum):
22
+ UNICODE = "unicode"
23
+ IPA = "ipa"
24
+ ARPA = "arpa" # en
25
+ HANGUL = "hangul" # ko
26
+ KANA = "kana" # ja
27
+ HIRA = "hira" # ja
28
+ HEPBURN = "hepburn" # ja romanization
29
+ KUNREI = "kunrei" # ja romanization
30
+ NIHON = "nihon" # ja romanization
31
+ PINYIN = "pinyin" # zh
32
+ ERAAB = "eraab" # fa
33
+ COTOVIA = "cotovia" # gl
34
+ HANZI = "hanzi" # zh
35
+
36
+
37
+
38
+ class PhonemeType(str, Enum):
39
+ RAW = "raw" # direct phonemes
40
+ UNICODE = "unicode" # unicode codepoints
41
+ GRAPHEMES = "graphemes" # text characters
42
+
43
+ MISAKI = "misaki"
44
+ ESPEAK = "espeak"
45
+ GRUUT = "gruut"
46
+ EPITRAN = "epitran"
47
+ BYT5 = "byt5"
48
+ CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
49
+
50
+ DEEPPHONEMIZER = "deepphonemizer" # en
51
+ OPENPHONEMIZER = "openphonemizer" # en
52
+ G2PEN = "g2pen" # en
53
+
54
+ G2PFA = "g2pfa"
55
+ OPENJTALK = "openjtalk" # ja
56
+ CUTLET = "cutlet" # ja
57
+ PYKAKASI = "pykakasi" # ja
58
+ COTOVIA = "cotovia" # galician (no ipa!)
59
+ PHONIKUD = "phonikud" # hebrew
60
+ MANTOQ = "mantoq" # arabic
61
+ VIPHONEME = "viphoneme" # vietnamese
62
+ G2PK = "g2pk" # korean
63
+ KOG2PK = "kog2p" # korean
64
+ G2PC = "g2pc" # chinese
65
+ G2PM = "g2pm" # chinese
66
+ PYPINYIN = "pypinyin" # chinese
67
+ XPINYIN = "xpinyin" # chinese
68
+ JIEBA = "jieba" # chinese (not a real phonemizer!)
69
+
70
+
71
+ @dataclass
72
+ class VoiceConfig:
73
+ """TTS model configuration"""
74
+
75
+ num_symbols: int
76
+ """Number of phonemes."""
77
+
78
+ num_speakers: int
79
+ """Number of speakers."""
80
+
81
+ num_langs: int
82
+ """Number of langs."""
83
+
84
+ sample_rate: int
85
+ """Sample rate of output audio."""
86
+
87
+ lang_code: Optional[str]
88
+ """Name of espeak-ng voice or alphabet."""
89
+
90
+ phoneme_id_map: Optional[Mapping[str, Sequence[int]]]
91
+ """Phoneme -> [id,]. Used for phoneme-based models."""
92
+
93
+ phoneme_type: PhonemeType
94
+ """espeak, byt5, text, cotovia, or graphemes."""
95
+
96
+ alphabet: Optional[Alphabet]
97
+
98
+ phonemizer_model: Optional[str]
99
+ """for phonemizers that allow changing base model """
100
+
101
+ speaker_id_map: Mapping[str, int] = field(default_factory=dict)
102
+ """Speaker -> id"""
103
+
104
+ lang_id_map: Mapping[str, int] = field(default_factory=dict)
105
+ """lang-code -> id"""
106
+
107
+ # Inference settings
108
+ length_scale: float = DEFAULT_LENGTH_SCALE
109
+ noise_scale: float = DEFAULT_NOISE_SCALE
110
+ noise_w_scale: float = DEFAULT_NOISE_W_SCALE
111
+
112
+ # tokenization settings
113
+ blank_at_start: bool = True
114
+ blank_at_end: bool = True
115
+ include_whitespace: Optional[bool] = True
116
+ pad_token: Optional[str] = DEFAULT_PAD_TOKEN
117
+ blank_token: Optional[str] = DEFAULT_PAD_TOKEN
118
+ bos_token: Optional[str] = DEFAULT_BOS_TOKEN
119
+ eos_token: Optional[str] = DEFAULT_EOS_TOKEN
120
+ word_sep_token: Optional[str] = DEFAULT_BLANK_WORD_TOKEN
121
+ blank_between: BlankBetween = BlankBetween.TOKENS_AND_WORDS
122
+
123
+ def __post_init__(self):
124
+ self.lang_code = self.lang_code or "und"
125
+
126
+ @staticmethod
127
+ def is_mimic3(config: dict[str, Any]) -> bool:
128
+ # https://huggingface.co/mukowaty/mimic3-voices
129
+
130
+ # mimic3 models indicate a phonemizer strategy in their config
131
+ if ("phonemizer" not in config or
132
+ not isinstance(config["phonemizer"], str)):
133
+ return False
134
+
135
+ # mimic3 models include a "phonemes" section with token info
136
+ if "phonemes" not in config or not isinstance(config["phonemes"], dict):
137
+ return False
138
+
139
+ # validate phonemizer type as expected by mimic3
140
+ phonemizer = config["phonemizer"]
141
+ # class Phonemizer(str, Enum):
142
+ # SYMBOLS = "symbols"
143
+ # GRUUT = "gruut"
144
+ # ESPEAK = "espeak"
145
+ # EPITRAN = "epitran"
146
+ if phonemizer not in ["symbols", "gruut", "espeak", "epitran"]:
147
+ return False
148
+
149
+ return True
150
+
151
+ @staticmethod
152
+ def is_piper(config: dict[str, Any]) -> bool:
153
+ if "piper_version" in config:
154
+ return True
155
+ # piper models indicate a phonemizer strategy in their config
156
+ if ("phoneme_type" not in config or
157
+ not isinstance(config["phoneme_type"], str)):
158
+ return False
159
+
160
+ # piper models include a "phoneme_id_map" section mapping phonemes to int
161
+ if "phoneme_id_map" not in config or not isinstance(config["phoneme_id_map"], dict):
162
+ return False
163
+
164
+ # validate phonemizer type as expected by piper
165
+ phonemizer = config["phoneme_type"]
166
+ if phonemizer not in ["text", "espeak"]:
167
+ return False
168
+
169
+ return True
170
+
171
+ @staticmethod
172
+ def is_coqui_vits(config: dict[str, Any]) -> bool:
173
+ # coqui vits grapheme models include a "characters" section with token info
174
+ if "characters" not in config or not isinstance(config["characters"], dict):
175
+ return False
176
+
177
+ # double check this was trained with coqui
178
+ if config["characters"].get("characters_class", "") not in ["TTS.tts.models.vits.VitsCharacters",
179
+ "TTS.tts.utils.text.characters.Graphemes"]:
180
+ return False
181
+
182
+ return True
183
+
184
+ @staticmethod
185
+ def is_phoonnx(config: dict[str, Any]) -> bool:
186
+ # phoonnx models indicate a phonemizer strategy in their config
187
+ if ("phoneme_type" not in config or
188
+ not isinstance(config["phoneme_type"], str)):
189
+ return False
190
+
191
+ if "lang_code" not in config:
192
+ return False
193
+
194
+ # validate phonemizer type as expected
195
+ phonemizer = config["phoneme_type"]
196
+ if phonemizer not in list(PhonemeType):
197
+ return False
198
+
199
+ return True
200
+
201
+ @staticmethod
202
+ def is_cotovia(config: dict[str, Any]) -> bool:
203
+ # no way to determine unless explicitly configured unfortunately
204
+ # afaik only the sabela galician model uses this
205
+ # will fallback to coqui "graphemes" if "cotovia" not specified,
206
+ # this will work but will make mistakes
207
+ if (not VoiceConfig.is_coqui_vits(config)
208
+ or not VoiceConfig.is_phoonnx(config)):
209
+ return False
210
+
211
+ return config["phoneme_type"] == PhonemeType.COTOVIA
212
+
213
+ @staticmethod
214
+ def from_dict(config: dict[str, Any],
215
+ phonemes_txt: Optional[str] = None,
216
+ lang_code: Optional[str] = None,
217
+ phoneme_type_str: Optional[str] = None) -> "VoiceConfig":
218
+ """Load configuration from a dictionary."""
219
+ blank_type = BlankBetween.TOKENS_AND_WORDS
220
+ lang_code = lang_code or config.get("lang_code")
221
+ phoneme_type_str = phoneme_type_str or config.get("phoneme_type")
222
+ phoneme_id_map = config.get("phoneme_id_map")
223
+ alphabet = config.get("alphabet")
224
+
225
+ if phonemes_txt:
226
+ if phonemes_txt.endswith(".txt"):
227
+ # either from mimic3 models or as an override at runtime
228
+ with open(phonemes_txt, "r", encoding="utf-8") as ids_file:
229
+ phoneme_id_map = load_phoneme_ids(ids_file)
230
+ elif phonemes_txt.endswith(".json"):
231
+ with open(phonemes_txt) as ids_file:
232
+ phoneme_id_map = json.load(ids_file)
233
+
234
+ # check if model was trained for PiperTTS
235
+ if VoiceConfig.is_piper(config):
236
+ lang_code = lang_code or (config.get("language", {}).get("code") or
237
+ config.get("espeak", {}).get("voice"))
238
+ phoneme_type_str = config.get("phoneme_type", PhonemeType.ESPEAK.value)
239
+ if phoneme_type_str == "text":
240
+ phoneme_type_str = PhonemeType.UNICODE.value
241
+ alphabet = Alphabet.UNICODE
242
+ else:
243
+ alphabet = Alphabet.IPA
244
+
245
+ # not configurable in piper
246
+ config["pad"] = DEFAULT_PAD_TOKEN
247
+ config["blank"] = DEFAULT_BLANK_TOKEN
248
+ config["bos"] = DEFAULT_BOS_TOKEN
249
+ config["eos"] = DEFAULT_EOS_TOKEN
250
+
251
+ # check if model was trained for Mimic3
252
+ elif VoiceConfig.is_mimic3(config):
253
+ if not phonemes_txt:
254
+ raise ValueError("mimic3 models require an external phonemes.txt file in addition to the config")
255
+ lang_code = config.get("text_language")
256
+ phoneme_type_str = config.get("phonemizer", PhonemeType.GRUUT.value)
257
+ # read phoneme settings
258
+ phoneme_cfg = config.get("phonemes", {})
259
+ blank_type = BlankBetween(phoneme_cfg.get("blank_between", "tokens_and_words"))
260
+ config.update(phoneme_cfg)
261
+
262
+ if phoneme_type_str == "symbols":
263
+ # Mimic3 "symbols" models are grapheme models
264
+ # symbol map comes from phonemes_txt
265
+ phoneme_type_str = PhonemeType.GRAPHEMES.value
266
+ alphabet = Alphabet.UNICODE
267
+ else:
268
+ alphabet = Alphabet.IPA
269
+
270
+ # check if model was trained with Coqui
271
+ # NOTE: cotovia is included here
272
+ elif VoiceConfig.is_coqui_vits(config):
273
+ if VoiceConfig.is_cotovia(config):
274
+ phoneme_type_str = PhonemeType.COTOVIA.value
275
+ alphabet = Alphabet.COTOVIA
276
+ else:
277
+ phoneme_type_str = PhonemeType.GRAPHEMES.value
278
+ alphabet = Alphabet.UNICODE
279
+
280
+ # NOTE: lang code usually not provided and often wrong :(
281
+ ds = config.get("datasets", [])
282
+ if ds and not lang_code:
283
+ lang_code = ds[0].get("language")
284
+
285
+ characters_config = config.get("characters", {})
286
+ if config.get("add_blank", True):
287
+ blank_type = BlankBetween.TOKENS
288
+ characters_config["blank"] = characters_config.get("blank") or "<BLNK>"
289
+ config.update(characters_config)
290
+ # For Coqui VITS grapheme models, build phoneme_id_map from characters
291
+ characters = characters_config.get("characters")
292
+ punctuations = characters_config.get("punctuations")
293
+
294
+ if not config.get("enable_eos_bos_chars", True):
295
+ config["bos"] = config["eos"] = None
296
+
297
+ # Construct vocabulary based on the order defined in the original Graphemes class
298
+ # [PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]
299
+ vocab_list = []
300
+
301
+ if characters_config.get("pad") is not None:
302
+ vocab_list.append(characters_config["pad"])
303
+
304
+ # ?? - haven't see any coqui model
305
+ # adding bos and eos to vocab_list
306
+
307
+ #if characters_config.get("eos") is not None:
308
+ # vocab_list.append(characters_config["eos"])
309
+ #if characters_config.get("bos") is not None:
310
+ # vocab_list.append(characters_config["bos"])
311
+
312
+ if punctuations:
313
+ vocab_list.extend(list(punctuations))
314
+ if characters:
315
+ vocab_list.extend(list(characters))
316
+
317
+
318
+ if characters_config.get("blank") is not None:
319
+ vocab_list.append(characters_config["blank"])
320
+
321
+ # Ensure unique characters and sort if needed (though not strictly necessary for map creation)
322
+ # This part of logic was previously in Graphemes, now implicitly handled by set/list conversion
323
+ phoneme_id_map = {char: idx for idx, char in enumerate(vocab_list)}
324
+
325
+ phoneme_type = PhonemeType(phoneme_type_str)
326
+ LOG.debug(f"phonemizer: {phoneme_type}")
327
+ inference = config.get("inference", {})
328
+
329
+ include_whitespace = " " in config.get("characters", "") or " " in config.get("phoneme_id_map", {})
330
+ return VoiceConfig(
331
+ num_langs=config.get("num_langs", 1),
332
+ num_symbols=config.get("num_symbols", 256),
333
+ num_speakers=config.get("num_speakers", 1),
334
+ sample_rate=config.get("audio", {}).get("sample_rate", 16000),
335
+ noise_scale=inference.get("noise_scale", DEFAULT_NOISE_SCALE),
336
+ length_scale=inference.get("length_scale", DEFAULT_LENGTH_SCALE),
337
+ noise_w_scale=inference.get("noise_w", DEFAULT_NOISE_W_SCALE),
338
+ lang_code=lang_code,
339
+ alphabet=alphabet,
340
+ phonemizer_model=config.get("phonemizer_model"),
341
+ phoneme_id_map=phoneme_id_map,
342
+ phoneme_type=phoneme_type,
343
+ speaker_id_map=config.get("speaker_id_map", {}),
344
+ blank_between=blank_type,
345
+ include_whitespace=include_whitespace,
346
+ blank_at_start=config.get("blank_at_start", True),
347
+ blank_at_end=config.get("blank_at_end", True),
348
+ pad_token=config.get("pad"),
349
+ blank_token=config.get("blank"),
350
+ bos_token=config.get("bos"),
351
+ eos_token=config.get("eos"),
352
+ word_sep_token=config.get("word_sep_token") or config.get("blank_word", " ")
353
+ )
354
+
355
+
356
+ @dataclass
357
+ class SynthesisConfig:
358
+ """Configuration for synthesis."""
359
+
360
+ speaker_id: Optional[int] = None
361
+ """Index of speaker to use (multi-speaker voices only)."""
362
+
363
+ lang_id: Optional[int] = None
364
+ """Index of lang to use (multi-lang voices only)."""
365
+
366
+ length_scale: Optional[float] = None
367
+ """Phoneme length scale (< 1 is faster, > 1 is slower)."""
368
+
369
+ noise_scale: Optional[float] = None
370
+ """Amount of generator noise to add."""
371
+
372
+ noise_w_scale: Optional[float] = None
373
+ """Amount of phoneme width noise to add."""
374
+
375
+ normalize_audio: bool = True
376
+ """Enable/disable scaling audio samples to fit full range."""
377
+
378
+ volume: float = 1.0
379
+ """Multiplier for audio samples (< 1 is quieter, > 1 is louder)."""
380
+
381
+ enable_phonetic_spellings: bool = True
382
+
383
+
384
+ def get_phonemizer(phoneme_type: PhonemeType,
385
+ alphabet: Alphabet = Alphabet.IPA,
386
+ model: Optional[str] = None) -> 'Phonemizer':
387
+ from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
388
+ ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
389
+ G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
390
+ GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
391
+ KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
392
+ CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
393
+ UnicodeCodepointPhonemizer, JiebaPhonemizer, RawPhonemes)
394
+ if phoneme_type == PhonemeType.ESPEAK:
395
+ phonemizer = EspeakPhonemizer()
396
+ elif phoneme_type == PhonemeType.BYT5:
397
+ phonemizer = ByT5Phonemizer(model)
398
+ elif phoneme_type == PhonemeType.CHARSIU:
399
+ phonemizer = CharsiuPhonemizer(model)
400
+ elif phoneme_type == PhonemeType.GRUUT:
401
+ phonemizer = GruutPhonemizer()
402
+ elif phoneme_type == PhonemeType.EPITRAN:
403
+ phonemizer = EpitranPhonemizer()
404
+ elif phoneme_type == PhonemeType.MISAKI:
405
+ phonemizer = MisakiPhonemizer()
406
+ elif phoneme_type == PhonemeType.DEEPPHONEMIZER:
407
+ phonemizer = DeepPhonemizer(model)
408
+ elif phoneme_type == PhonemeType.OPENPHONEMIZER:
409
+ phonemizer = OpenPhonemizer()
410
+ elif phoneme_type == PhonemeType.G2PEN:
411
+ phonemizer = G2PEnPhonemizer(alphabet=alphabet)
412
+ elif phoneme_type == PhonemeType.OPENJTALK:
413
+ phonemizer = OpenJTaklPhonemizer(alphabet=alphabet)
414
+ elif phoneme_type == PhonemeType.PYKAKASI:
415
+ phonemizer = PyKakasiPhonemizer(alphabet=alphabet)
416
+ elif phoneme_type == PhonemeType.CUTLET:
417
+ phonemizer = CutletPhonemizer(alphabet=alphabet)
418
+ elif phoneme_type == PhonemeType.G2PFA:
419
+ phonemizer = PersianPhonemizer(alphabet=alphabet)
420
+ elif phoneme_type == PhonemeType.PHONIKUD:
421
+ phonemizer = PhonikudPhonemizer()
422
+ elif phoneme_type == PhonemeType.MANTOQ:
423
+ phonemizer = MantoqPhonemizer()
424
+ elif phoneme_type == PhonemeType.VIPHONEME:
425
+ phonemizer = VIPhonemePhonemizer()
426
+ elif phoneme_type == PhonemeType.KOG2PK:
427
+ phonemizer = KoG2PPhonemizer(alphabet=alphabet)
428
+ elif phoneme_type == PhonemeType.G2PK:
429
+ phonemizer = G2PKPhonemizer(alphabet=alphabet)
430
+ elif phoneme_type == PhonemeType.PYPINYIN:
431
+ phonemizer = PypinyinPhonemizer(alphabet=alphabet)
432
+ elif phoneme_type == PhonemeType.XPINYIN:
433
+ phonemizer = XpinyinPhonemizer(alphabet=alphabet)
434
+ elif phoneme_type == PhonemeType.JIEBA:
435
+ phonemizer = JiebaPhonemizer()
436
+ elif phoneme_type == PhonemeType.G2PC:
437
+ phonemizer = G2pCPhonemizer(alphabet=alphabet)
438
+ elif phoneme_type == PhonemeType.G2PM:
439
+ phonemizer = G2pMPhonemizer(alphabet=alphabet)
440
+ elif phoneme_type == PhonemeType.COTOVIA:
441
+ phonemizer = CotoviaPhonemizer()
442
+ elif phoneme_type == PhonemeType.UNICODE:
443
+ phonemizer = UnicodeCodepointPhonemizer()
444
+ elif phoneme_type == PhonemeType.GRAPHEMES:
445
+ phonemizer = GraphemePhonemizer()
446
+ elif phoneme_type == PhonemeType.RAW:
447
+ phonemizer = RawPhonemes()
448
+ else:
449
+ raise ValueError("invalid phonemizer")
450
+ return phonemizer
451
+
452
+
453
+
454
+ if __name__ == "__main__":
455
+ config_files = [
456
+ "/home/miro/PycharmProjects/phoonnx_tts/sabela_cotovia_vits.json",
457
+ "/home/miro/PycharmProjects/phoonnx_tts/celtia_vits.json",
458
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_gruut.json",
459
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_espeak.json",
460
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_epitran.json",
461
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_symbols.json",
462
+ "/home/miro/PycharmProjects/phoonnx_tts/piper_espeak.json",
463
+ "/home/miro/PycharmProjects/phoonnx_tts/vits-coqui-pt-cv/config.json",
464
+ "/home/miro/PycharmProjects/phoonnx_tts/phonikud/model.config.json"
465
+ ]
466
+ phoneme_txts = [
467
+ None,
468
+ None,
469
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phonemes.txt",
470
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phonemes.txt",
471
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phonemes.txt",
472
+ "/home/miro/PycharmProjects/phoonnx_tts/mimic3_ap/phonemes.txt",
473
+ None,
474
+ None,
475
+ None
476
+ ]
477
+ print("Testing model config file parsing\n###############")
478
+ for idx, cfile in enumerate(config_files):
479
+ print(f"\nConfig file: {cfile}")
480
+ with open(cfile) as f:
481
+ config = json.load(f)
482
+ print("Mimic3:", VoiceConfig.is_mimic3(config))
483
+ print("Piper:", VoiceConfig.is_piper(config))
484
+ print("Coqui:", VoiceConfig.is_coqui_vits(config))
485
+ print("Cotovia:", VoiceConfig.is_cotovia(config))
486
+ print("Phoonx:", VoiceConfig.is_phoonnx(config))
487
+ cfg = VoiceConfig.from_dict(config, phoneme_txts[idx])
488
+ print(cfg)
489
+
490
+
@@ -0,0 +1,2 @@
1
+ OpenVoiceOS: oupen vois ou es
2
+ mycroft: maicroft
@@ -0,0 +1 @@
1
+ OpenVoiceOS: Open Voice O S
@@ -0,0 +1,2 @@
1
+ OpenVoiceOS: oupen vois ou es
2
+ mycroft: maicroft
@@ -0,0 +1,2 @@
1
+ OpenVoiceOS: oupen voice ou s
2
+ mycroft: mái croft