phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,216 @@
1
+ import abc
2
+ import re
3
+ import string
4
+ import unicodedata
5
+ from typing import List, Tuple, Optional, Literal
6
+
7
+ from langcodes import tag_distance
8
+ from quebra_frases import sentence_tokenize
9
+ from phoonnx.config import Alphabet
10
+ from phoonnx.util import normalize
11
+
12
+ # list of (substring, terminator, end_of_sentence) tuples.
13
+ TextChunks = List[Tuple[str, str, bool]]
14
+ # list of (phonemes, terminator, end_of_sentence) tuples.
15
+ RawPhonemizedChunks = List[Tuple[str, str, bool]]
16
+
17
+ PhonemizedChunks = list[list[str]]
18
+
19
+
20
+ class BasePhonemizer(metaclass=abc.ABCMeta):
21
+ def __init__(self, alphabet: Alphabet = Alphabet.UNICODE):
22
+ super().__init__()
23
+ self.alphabet = alphabet
24
+
25
+ @abc.abstractmethod
26
+ def phonemize_string(self, text: str, lang: str) -> str:
27
+ raise NotImplementedError
28
+
29
+ def phonemize_to_list(self, text: str, lang: str) -> List[str]:
30
+ return list(self.phonemize_string(text, lang))
31
+
32
+ def phonemize(self, text: str, lang: str) -> PhonemizedChunks:
33
+ if not text:
34
+ return [('', '', True)]
35
+ results: RawPhonemizedChunks = []
36
+ text = normalize(text, lang)
37
+ for chunk, punct, eos in self.chunk_text(text):
38
+ phoneme_str = self.phonemize_string(self.remove_punctuation(chunk), lang)
39
+ results += [(phoneme_str, punct, True)]
40
+ return self._process_phones(results)
41
+
42
+ @staticmethod
43
+ def _process_phones(raw_phones: RawPhonemizedChunks) -> PhonemizedChunks:
44
+ """Text to phonemes grouped by sentence."""
45
+ all_phonemes: list[list[str]] = []
46
+ sentence_phonemes: list[str] = []
47
+ for phonemes_str, terminator_str, end_of_sentence in raw_phones:
48
+ # Filter out (lang) switch (flags).
49
+ # These surround words from languages other than the current voice.
50
+ phonemes_str = re.sub(r"\([^)]+\)", "", phonemes_str)
51
+ sentence_phonemes.extend(list(phonemes_str))
52
+ if end_of_sentence:
53
+ all_phonemes.append(sentence_phonemes)
54
+ sentence_phonemes = []
55
+ if sentence_phonemes:
56
+ all_phonemes.append(sentence_phonemes)
57
+ return all_phonemes
58
+
59
+ @staticmethod
60
+ def match_lang(target_lang: str, valid_langs: List[str]) -> str:
61
+ """
62
+ Validates and returns the closest supported language code.
63
+
64
+ Args:
65
+ target_lang (str): The language code to validate.
66
+
67
+ Returns:
68
+ str: The validated language code.
69
+
70
+ Raises:
71
+ ValueError: If the language code is unsupported.
72
+ """
73
+ if target_lang in valid_langs:
74
+ return target_lang
75
+ best_lang = "und"
76
+ best_distance = 10000000
77
+ for l in valid_langs:
78
+ try:
79
+ distance: int = tag_distance(l, target_lang)
80
+ except:
81
+ try:
82
+ l = f"{l.split('-')[0]}-{l.split('-')[1]}"
83
+ distance: int = tag_distance(l, target_lang)
84
+ except:
85
+ try:
86
+ distance: int = tag_distance(l.split('-')[0], target_lang)
87
+ except:
88
+ continue
89
+ if distance < best_distance:
90
+ best_lang, best_distance = l, distance
91
+
92
+ # If the score is low (meaning a good match), return the language
93
+ if best_distance <= 10:
94
+ return best_lang
95
+ # Otherwise, raise an error for unsupported language
96
+ raise ValueError(f"unsupported language code: {target_lang}")
97
+
98
+ @staticmethod
99
+ def remove_punctuation(text):
100
+ """
101
+ Removes all punctuation characters from a string.
102
+ Punctuation characters are defined by string.punctuation.
103
+ """
104
+ # Create a regex pattern that matches any character in string.punctuation
105
+ punctuation_pattern = r"[" + re.escape(string.punctuation) + r"]"
106
+ return re.sub(punctuation_pattern, '', text).strip()
107
+
108
+ @staticmethod
109
+ def chunk_text(text: str, delimiters: Optional[List[str]] = None) -> TextChunks:
110
+ if not text:
111
+ return [('', '', True)]
112
+
113
+ results: TextChunks = []
114
+ delimiters = delimiters or [", ", ":", ";", "...", "|"]
115
+
116
+ # Create a regex pattern that matches any of the delimiters
117
+ delimiter_pattern = re.escape(delimiters[0])
118
+ for delimiter in delimiters[1:]:
119
+ delimiter_pattern += f"|{re.escape(delimiter)}"
120
+
121
+ for sentence in sentence_tokenize(text):
122
+ # Default punctuation if no specific punctuation found
123
+ default_punc = sentence[-1] if sentence and sentence[-1] in string.punctuation else "."
124
+
125
+ # Use regex to split the sentence by any of the delimiters
126
+ parts = re.split(f'({delimiter_pattern})', sentence)
127
+
128
+ # Group parts into chunks (text + delimiter)
129
+ chunks = []
130
+ for i in range(0, len(parts), 2):
131
+ # If there's a delimiter after the text, use it
132
+ delimiter = parts[i + 1] if i + 1 < len(parts) else default_punc
133
+
134
+ # Last chunk is marked as complete
135
+ is_last = (i + 2 >= len(parts))
136
+
137
+ chunks.append((parts[i].strip(), delimiter.strip(), is_last))
138
+
139
+ results.extend(chunks)
140
+
141
+ return results
142
+
143
+
144
+ ### all the 3 below are essentially the same thing
145
+ # no phonemization really happens
146
+
147
+ class RawPhonemes(BasePhonemizer):
148
+ """no phonemization, text is phonemes already"""
149
+
150
+ def phonemize_string(self, text: str, lang: str) -> str:
151
+ return text
152
+
153
+
154
+ class GraphemePhonemizer(BasePhonemizer):
155
+ """
156
+ A phonemizer class that treats input text as graphemes (characters).
157
+ It performs text normalization and returns the normalized text as a string
158
+ of characters.
159
+ """
160
+ # Regular expression matching whitespace:
161
+ whitespace_re = re.compile(r"\s+")
162
+
163
+ def phonemize_string(self, text: str, lang: str) -> str:
164
+ """
165
+ Normalizes input text by applying a series of transformations
166
+ and returns it as a sequence of graphemes.
167
+
168
+ Parameters:
169
+ text (str): Input text to be converted to graphemes.
170
+ lang (str): The language code (ignored for grapheme phonemization,
171
+ but required by BasePhonemizer).
172
+
173
+ Returns:
174
+ str: A normalized string of graphemes.
175
+ """
176
+ text = text.lower()
177
+ text = text.replace(";", ",")
178
+ text = text.replace("-", " ")
179
+ text = text.replace(":", ",")
180
+ text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
181
+ text = re.sub(self.whitespace_re, " ", text).strip()
182
+ return text
183
+
184
+
185
+ class UnicodeCodepointPhonemizer(BasePhonemizer):
186
+ """Phonemes = codepoints
187
+ normalization also splits accents and punctuation into it's own codepoints
188
+ """
189
+
190
+ def __init__(self, form: Literal["NFC", "NFD", "NFKC", "NFKD"] = "NFD"):
191
+ self.form = form
192
+ super().__init__(Alphabet.UNICODE)
193
+
194
+ def phonemize_string(self, text: str, lang: str) -> str:
195
+ # Phonemes = codepoints
196
+ return unicodedata.normalize(self.form, text)
197
+
198
+
199
+ if __name__ == "__main__":
200
+ raw = RawPhonemes()
201
+ grap = GraphemePhonemizer()
202
+ uni = UnicodeCodepointPhonemizer()
203
+
204
+ text = "olá, quem são vocês?"
205
+ lang = "pt"
206
+ print(raw.phonemize(text, lang))
207
+ print(grap.phonemize(text, lang))
208
+ print(uni.phonemize(text, lang))
209
+
210
+ print(raw.phonemize_string(text, lang))
211
+ print(grap.phonemize_string(text, lang))
212
+ print(uni.phonemize_string(text, lang))
213
+
214
+ print(raw.phonemize_to_list(text, lang))
215
+ print(grap.phonemize_to_list(text, lang))
216
+ print(uni.phonemize_to_list(text, lang))
@@ -0,0 +1,250 @@
1
+ import os
2
+
3
+ import requests
4
+
5
+ from phoonnx.thirdparty.arpa2ipa import arpa_to_ipa_lookup
6
+ from phoonnx.phonemizers.base import BasePhonemizer
7
+ from phoonnx.config import Alphabet
8
+
9
+
10
+ class DeepPhonemizer(BasePhonemizer):
11
+ """
12
+ https://github.com/spring-media/DeepPhonemizer
13
+ """
14
+ MODELS = {
15
+ "latin_ipa_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/latin_ipa_forward.pt",
16
+ "en_us_cmudict_ipa_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt",
17
+ "en_us_cmudict_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_forward.pt"
18
+ }
19
+
20
+ def __init__(self, model="latin_ipa_forward.pt"):
21
+ import dp
22
+ from dp.phonemizer import Phonemizer
23
+ import torch
24
+ # needed for latest torch version
25
+ torch.serialization.add_safe_globals([dp.preprocessing.text.Preprocessor])
26
+ torch.serialization.add_safe_globals([dp.preprocessing.text.LanguageTokenizer])
27
+ torch.serialization.add_safe_globals([dp.preprocessing.text.SequenceTokenizer])
28
+
29
+ if "ipa" in model:
30
+ super().__init__(Alphabet.IPA)
31
+ else:
32
+ super().__init__(Alphabet.ARPA)
33
+
34
+ if not os.path.isfile(model):
35
+ if model in self.MODELS:
36
+ url = self.MODELS[model]
37
+ cache_dir = os.path.expanduser("~/.local/share/deepphonemizer")
38
+ os.makedirs(cache_dir, exist_ok=True)
39
+ model_path = os.path.join(cache_dir, model)
40
+ if not os.path.isfile(model_path):
41
+ print(f"Downloading {model} from {url}...")
42
+ with requests.get(url, stream=True) as r:
43
+ r.raise_for_status()
44
+ with open(model_path, 'wb') as f:
45
+ for chunk in r.iter_content(chunk_size=8192):
46
+ f.write(chunk)
47
+ print(f"Saved model to {model_path}")
48
+ model = model_path
49
+ else:
50
+ raise ValueError("invalid model")
51
+
52
+ self.phonemizer = Phonemizer.from_checkpoint(model)
53
+
54
+ @classmethod
55
+ def get_lang(cls, target_lang: str) -> str:
56
+ """
57
+ Validates and returns the closest supported language code.
58
+
59
+ Args:
60
+ target_lang (str): The language code to validate.
61
+
62
+ Returns:
63
+ str: The validated language code.
64
+
65
+ Raises:
66
+ ValueError: If the language code is unsupported.
67
+ """
68
+ # this check is here only to throw an exception if invalid language is provided
69
+ return cls.match_lang(target_lang, ['de', 'en_us'])
70
+
71
+ def phonemize_string(self, text: str, lang: str) -> str:
72
+ """
73
+ Normalizes input text by applying a series of transformations
74
+ and returns it as a sequence of graphemes.
75
+
76
+ Parameters:
77
+ text (str): Input text to be converted to graphemes.
78
+ lang (str): The language code (ignored for grapheme phonemization,
79
+ but required by BasePhonemizer).
80
+
81
+ Returns:
82
+ str: A normalized string of graphemes.
83
+ """
84
+ lang = self.get_lang(lang)
85
+ return self.phonemizer(text, lang)
86
+
87
+
88
+ class OpenPhonemizer(BasePhonemizer):
89
+ """
90
+ https://github.com/NeuralVox/OpenPhonemizer
91
+ """
92
+
93
+ def __init__(self):
94
+ from openphonemizer import OpenPhonemizer
95
+ import torch
96
+ # needed for latest torch version
97
+ import dp
98
+ torch.serialization.add_safe_globals([dp.preprocessing.text.Preprocessor])
99
+ torch.serialization.add_safe_globals([dp.preprocessing.text.LanguageTokenizer])
100
+ torch.serialization.add_safe_globals([dp.preprocessing.text.SequenceTokenizer])
101
+
102
+ self.phonemizer = OpenPhonemizer()
103
+ super().__init__(Alphabet.IPA)
104
+
105
+ @classmethod
106
+ def get_lang(cls, target_lang: str) -> str:
107
+ """
108
+ Validates and returns the closest supported language code.
109
+
110
+ Args:
111
+ target_lang (str): The language code to validate.
112
+
113
+ Returns:
114
+ str: The validated language code.
115
+
116
+ Raises:
117
+ ValueError: If the language code is unsupported.
118
+ """
119
+ # this check is here only to throw an exception if invalid language is provided
120
+ return cls.match_lang(target_lang, ["en"])
121
+
122
+ def phonemize_string(self, text: str, lang: str) -> str:
123
+ """
124
+ Normalizes input text by applying a series of transformations
125
+ and returns it as a sequence of graphemes.
126
+
127
+ Parameters:
128
+ text (str): Input text to be converted to graphemes.
129
+ lang (str): The language code (ignored for grapheme phonemization,
130
+ but required by BasePhonemizer).
131
+
132
+ Returns:
133
+ str: A normalized string of graphemes.
134
+ """
135
+ lang = self.get_lang(lang)
136
+ return self.phonemizer(text)
137
+
138
+
139
+ class G2PEnPhonemizer(BasePhonemizer):
140
+ """
141
+ https://github.com/Kyubyong/g2p
142
+ """
143
+
144
+ def __init__(self, alphabet=Alphabet.IPA):
145
+ assert alphabet in [Alphabet.IPA, Alphabet.ARPA]
146
+ import nltk
147
+ nltk.download('averaged_perceptron_tagger_eng')
148
+ nltk.download('cmudict')
149
+ from g2p_en import G2p
150
+ self.g2p = G2p()
151
+ super().__init__(alphabet)
152
+
153
+ @classmethod
154
+ def get_lang(cls, target_lang: str) -> str:
155
+ """
156
+ Validates and returns the closest supported language code.
157
+
158
+ Args:
159
+ target_lang (str): The language code to validate.
160
+
161
+ Returns:
162
+ str: The validated language code.
163
+
164
+ Raises:
165
+ ValueError: If the language code is unsupported.
166
+ """
167
+ # this check is here only to throw an exception if invalid language is provided
168
+ return cls.match_lang(target_lang, ["en"])
169
+
170
+ def phonemize_string(self, text: str, lang: str) -> str:
171
+ """
172
+ Normalizes input text by applying a series of transformations
173
+ and returns it as a sequence of graphemes.
174
+
175
+ Parameters:
176
+ text (str): Input text to be converted to graphemes.
177
+ lang (str): The language code (ignored for grapheme phonemization,
178
+ but required by BasePhonemizer).
179
+
180
+ Returns:
181
+ str: A normalized string of graphemes.
182
+ """
183
+ lang = self.get_lang(lang)
184
+ # NOTE: this model returns ARPA not IPA, may need to map phonemes
185
+ if self.alphabet == Alphabet.ARPA:
186
+ return self.g2p(text)
187
+ return "".join([arpa_to_ipa_lookup.get(pho, pho) for pho in self.g2p(text)])
188
+
189
+
190
+
191
+ if __name__ == "__main__":
192
+ # for comparison
193
+ from phoonnx.phonemizers.mul import (ByT5Phonemizer, EspeakPhonemizer, GruutPhonemizer,
194
+ EpitranPhonemizer, CharsiuPhonemizer)
195
+ byt5 = ByT5Phonemizer()
196
+ espeak = EspeakPhonemizer()
197
+ gruut = GruutPhonemizer()
198
+ epitr = EpitranPhonemizer()
199
+ charsiu = CharsiuPhonemizer()
200
+ openphon = OpenPhonemizer()
201
+ g2pen = G2PEnPhonemizer()
202
+ dp = DeepPhonemizer()
203
+
204
+ lang = "en-gb"
205
+
206
+ print("\n--- Getting phonemes for 'Hello, world. How are you?' ---")
207
+ text1 = "Hello, world. How are you?"
208
+ phonemes1 = espeak.phonemize(text1, lang)
209
+ phonemes1b = gruut.phonemize(text1, lang)
210
+ phonemes1c = byt5.phonemize(text1, lang)
211
+ phonemes1d = epitr.phonemize(text1, lang)
212
+ phonemes1e = charsiu.phonemize(text1, lang)
213
+ phonemes1f = openphon.phonemize(text1, lang)
214
+ phonemes1g = g2pen.phonemize(text1, lang)
215
+ phonemes1h = dp.phonemize(text1, lang)
216
+ print(f" Espeak Phonemes: {phonemes1}")
217
+ print(f" Gruut Phonemes: {phonemes1b}")
218
+ print(f" byt5 Phonemes: {phonemes1c}")
219
+ print(f" Epitran Phonemes: {phonemes1d}")
220
+ print(f" Charsiu Phonemes: {phonemes1e}")
221
+ print(f" OpenPhonemizer Phonemes: {phonemes1f}")
222
+ print(f" DeepPhonemizer Phonemes: {phonemes1h}")
223
+ print(f" G2P_en Phonemes: {phonemes1g}")
224
+
225
+ print("\n--- Getting phonemes for 'This is a test: a quick one; and done!' ---")
226
+ text2 = "This is a test: a quick one; and done!"
227
+ phonemes2 = espeak.phonemize(text2, lang)
228
+ phonemes2b = gruut.phonemize(text2, lang)
229
+ phonemes2c = byt5.phonemize(text2, lang)
230
+ phonemes2d = epitr.phonemize(text2, lang)
231
+ phonemes2e = charsiu.phonemize(text2, lang)
232
+ print(f" Espeak Phonemes: {phonemes2}")
233
+ print(f" Gruut Phonemes: {phonemes2b}")
234
+ print(f" byt5 Phonemes: {phonemes2c}")
235
+ print(f" Epitran Phonemes: {phonemes2d}")
236
+ print(f" Charsiu Phonemes: {phonemes2e}")
237
+
238
+ print("\n--- Getting phonemes for 'Just a phrase without punctuation' ---")
239
+ text3 = "Just a phrase without punctuation"
240
+ phonemes3 = espeak.phonemize(text3, lang)
241
+ phonemes3b = gruut.phonemize(text3, lang)
242
+ phonemes3c = byt5.phonemize(text3, lang)
243
+ phonemes3d = epitr.phonemize(text3, lang)
244
+ phonemes3e = charsiu.phonemize(text3, lang)
245
+ print(f" Espeak Phonemes: {phonemes3}")
246
+ print(f" Gruut Phonemes: {phonemes3b}")
247
+ print(f" byt5 Phonemes: {phonemes3c}")
248
+ print(f" Epitran Phonemes: {phonemes3d}")
249
+ print(f" Charsiu Phonemes: {phonemes3e}")
250
+
@@ -0,0 +1,46 @@
1
+ from phoonnx.phonemizers.base import BasePhonemizer
2
+ from phoonnx.config import Alphabet
3
+
4
+
5
+ class PersianPhonemizer(BasePhonemizer):
6
+ """https://github.com/de-mh/persian_phonemizer"""
7
+ def __init__(self, alphabet=Alphabet.IPA):
8
+ from persian_phonemizer import Phonemizer
9
+ assert alphabet in [Alphabet.ERAAB, Alphabet.IPA]
10
+ output_format = "IPA" if alphabet == Alphabet.IPA else 'eraab'
11
+ self.g2p = Phonemizer(output_format)
12
+ super().__init__(alphabet)
13
+
14
+ @classmethod
15
+ def get_lang(cls, target_lang: str) -> str:
16
+ """
17
+ Validates and returns the closest supported language code.
18
+
19
+ Args:
20
+ target_lang (str): The language code to validate.
21
+
22
+ Returns:
23
+ str: The validated language code.
24
+
25
+ Raises:
26
+ ValueError: If the language code is unsupported.
27
+ """
28
+ # this check is here only to throw an exception if invalid language is provided
29
+ return cls.match_lang(target_lang, ["fa"])
30
+
31
+ def phonemize_string(self, text: str, lang: str = "fa") -> str:
32
+ """
33
+ """
34
+ lang = self.get_lang(lang)
35
+ return self.g2p.phonemize(text)
36
+
37
+
38
+ if __name__ == "__main__":
39
+ text = "دوچرخه جدید علی گم شد."
40
+
41
+ pho = PersianPhonemizer()
42
+ lang = "fa"
43
+
44
+ print(f"\n--- Getting phonemes for '{text}' ---")
45
+ phonemes_cotovia = pho.phonemize(text, lang)
46
+ print(f" Phonemes: {phonemes_cotovia}")
@@ -0,0 +1,142 @@
1
+ import os
2
+ import platform
3
+ import re
4
+ import subprocess
5
+ from typing import Optional
6
+
7
+ from phoonnx.phonemizers.base import BasePhonemizer
8
+ from phoonnx.config import Alphabet
9
+
10
+ class CotoviaError(Exception):
11
+ """Custom exception for cotovia related errors."""
12
+ pass
13
+
14
+
15
+ class CotoviaPhonemizer(BasePhonemizer):
16
+ """
17
+ A phonemizer class that uses the Cotovia TTS binary to convert text into phonemes.
18
+ It processes the input sentence through a command-line phonemization tool, applying multiple
19
+ regular expression transformations to clean and normalize the phonetic representation.
20
+ """
21
+
22
+ def __init__(self, cotovia_bin_path: Optional[str] = None):
23
+ """
24
+ Initializes the CotoviaPhonemizer.
25
+
26
+ Args:
27
+ cotovia_bin_path (str, optional): Path to the Cotovia TTS binary.
28
+ If None, it will try to find it in common locations.
29
+ """
30
+ self.cotovia_bin = cotovia_bin_path or self.find_cotovia()
31
+ if not os.path.exists(self.cotovia_bin):
32
+ raise FileNotFoundError(f"Cotovia binary not found at {self.cotovia_bin}. "
33
+ "Please ensure it's installed or provide the correct path.")
34
+ super().__init__(Alphabet.COTOVIA)
35
+
36
+ @classmethod
37
+ def get_lang(cls, target_lang: str) -> str:
38
+ """
39
+ Validates and returns the closest supported language code.
40
+
41
+ Args:
42
+ target_lang (str): The language code to validate.
43
+
44
+ Returns:
45
+ str: The validated language code.
46
+
47
+ Raises:
48
+ ValueError: If the language code is unsupported.
49
+ """
50
+ # this check is here only to throw an exception if invalid language is provided
51
+ return cls.match_lang(target_lang, ["gl-ES"])
52
+
53
+ @staticmethod
54
+ def find_cotovia() -> str:
55
+ """
56
+ Attempts to find the cotovia binary in common locations.
57
+ """
58
+ path = subprocess.run(["which", "cotovia"], capture_output=True, text=True).stdout.strip()
59
+ if path and os.path.isfile(path):
60
+ return path
61
+
62
+ # Fallback to bundled binaries
63
+ local_path = f"{os.path.dirname(os.path.dirname(__file__))}/thirdparty/cotovia/cotovia_{platform.machine()}"
64
+ if os.path.isfile(local_path):
65
+ return local_path
66
+
67
+ # Last resort common system path
68
+ if os.path.isfile("/usr/bin/cotovia"):
69
+ return "/usr/bin/cotovia"
70
+
71
+ return "cotovia" # Return "cotovia" to let subprocess raise FileNotFoundError if not found in PATH
72
+
73
+ def phonemize_string(self, text: str, lang: str) -> str:
74
+ """
75
+ Converts a given sentence into phonemes using the Cotovia TTS binary.
76
+
77
+ Processes the input sentence through a command-line phonemization tool, applying multiple regular expression transformations to clean and normalize the phonetic representation.
78
+
79
+ Parameters:
80
+ text (str): The input text to be phonemized
81
+ lang (str): The language code (ignored by Cotovia, but required by BasePhonemizer)
82
+
83
+ Returns:
84
+ str: A cleaned and normalized phonetic representation of the input sentence
85
+
86
+ Notes:
87
+ - Uses subprocess to execute the Cotovia TTS binary
88
+ - Applies multiple regex substitutions to improve punctuation and spacing
89
+ - Converts text from ISO-8859-1 to UTF-8 encoding
90
+ """
91
+ lang = self.get_lang(lang)
92
+ cmd = f'echo "{text}" | {self.cotovia_bin} -t -n -S | iconv -f iso88591 -t utf8'
93
+ str_ext = subprocess.check_output(cmd, shell=True).decode("utf-8")
94
+
95
+ ## fix punctuation in cotovia output - from official inference script
96
+
97
+ # substitute ' ·\n' by ...
98
+ str_ext = re.sub(r" ·", r"...", str_ext)
99
+
100
+ # remove spaces before , . ! ? ; : ) ] of the extended string
101
+ str_ext = re.sub(r"\s+([.,!?;:)\]])", r"\1", str_ext)
102
+
103
+ # remove spaces after ( [ ¡ ¿ of the extended string
104
+ str_ext = re.sub(r"([\(\[¡¿])\s+", r"\1", str_ext)
105
+
106
+ # remove unwanted spaces between quotations marks
107
+ str_ext = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', str_ext)
108
+
109
+ # substitute '- text -' to '-text-'
110
+ str_ext = re.sub(r"-\s*([^-]*?)\s*-", r"-\1-", str_ext)
111
+
112
+ # remove initial question marks
113
+ str_ext = re.sub(r"[¿¡]", r"", str_ext)
114
+
115
+ # eliminate extra spaces
116
+ str_ext = re.sub(r"\s+", r" ", str_ext)
117
+
118
+ str_ext = re.sub(r"(\d+)\s*-\s*(\d+)", r"\1 \2", str_ext)
119
+
120
+ ### - , ' and () by commas
121
+ # substitute '- text -' to ', text,'
122
+ str_ext = re.sub(r"(\w+)\s+-([^-]*?)-\s+([^-]*?)", r"\1, \\2, ", str_ext)
123
+
124
+ # substitute ' - ' by ', '
125
+ str_ext = re.sub(r"(\w+[!\?]?)\s+-\s*", r"\1, ", str_ext)
126
+
127
+ # substitute ' ( text )' to ', text,'
128
+ str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \\2,", str_ext)
129
+
130
+ return str_ext
131
+
132
+
133
+
134
+ if __name__ == "__main__":
135
+
136
+ cotovia = CotoviaPhonemizer()
137
+
138
+ lang = "gl"
139
+ text_gl = "Este é un sistema de conversión de texto a voz en lingua galega baseado en redes neuronais artificiais. Ten en conta que as funcionalidades incluídas nesta páxina ofrécense unicamente con fins de demostración. Se tes algún comentario, suxestión ou detectas algún problema durante a demostración, ponte en contacto connosco."
140
+ print(f"\n--- Getting phonemes for '{text_gl}' (Cotovia) ---")
141
+ phonemes_cotovia = cotovia.phonemize(text_gl, lang)
142
+ print(f" Cotovia Phonemes: {phonemes_cotovia}")