phoonnx 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/__init__.py +0 -0
- phoonnx/config.py +490 -0
- phoonnx/locale/ca/phonetic_spellings.txt +2 -0
- phoonnx/locale/en/phonetic_spellings.txt +1 -0
- phoonnx/locale/gl/phonetic_spellings.txt +2 -0
- phoonnx/locale/pt/phonetic_spellings.txt +2 -0
- phoonnx/phoneme_ids.py +453 -0
- phoonnx/phonemizers/__init__.py +45 -0
- phoonnx/phonemizers/ar.py +42 -0
- phoonnx/phonemizers/base.py +216 -0
- phoonnx/phonemizers/en.py +250 -0
- phoonnx/phonemizers/fa.py +46 -0
- phoonnx/phonemizers/gl.py +142 -0
- phoonnx/phonemizers/he.py +67 -0
- phoonnx/phonemizers/ja.py +119 -0
- phoonnx/phonemizers/ko.py +97 -0
- phoonnx/phonemizers/mul.py +606 -0
- phoonnx/phonemizers/vi.py +44 -0
- phoonnx/phonemizers/zh.py +308 -0
- phoonnx/thirdparty/__init__.py +0 -0
- phoonnx/thirdparty/arpa2ipa.py +249 -0
- phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- phoonnx/thirdparty/hangul2ipa.py +783 -0
- phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
- phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
- phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
- phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
- phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
- phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
- phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
- phoonnx/thirdparty/ko_tables/yale.csv +22 -0
- phoonnx/thirdparty/kog2p/__init__.py +385 -0
- phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
- phoonnx/thirdparty/mantoq/__init__.py +67 -0
- phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
- phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
- phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
- phoonnx/thirdparty/mantoq/num2words.py +37 -0
- phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
- phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
- phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
- phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
- phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
- phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
- phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
- phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
- phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
- phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
- phoonnx/thirdparty/tashkeel/LICENSE +22 -0
- phoonnx/thirdparty/tashkeel/SOURCE +1 -0
- phoonnx/thirdparty/tashkeel/__init__.py +212 -0
- phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
- phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
- phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
- phoonnx/thirdparty/zh_num.py +238 -0
- phoonnx/util.py +705 -0
- phoonnx/version.py +6 -0
- phoonnx/voice.py +521 -0
- phoonnx-0.0.0.dist-info/METADATA +255 -0
- phoonnx-0.0.0.dist-info/RECORD +86 -0
- phoonnx-0.0.0.dist-info/WHEEL +5 -0
- phoonnx-0.0.0.dist-info/top_level.txt +2 -0
- phoonnx_train/__main__.py +151 -0
- phoonnx_train/export_onnx.py +109 -0
- phoonnx_train/norm_audio/__init__.py +92 -0
- phoonnx_train/norm_audio/trim.py +54 -0
- phoonnx_train/norm_audio/vad.py +54 -0
- phoonnx_train/preprocess.py +420 -0
- phoonnx_train/vits/__init__.py +0 -0
- phoonnx_train/vits/attentions.py +427 -0
- phoonnx_train/vits/commons.py +147 -0
- phoonnx_train/vits/config.py +330 -0
- phoonnx_train/vits/dataset.py +214 -0
- phoonnx_train/vits/lightning.py +352 -0
- phoonnx_train/vits/losses.py +58 -0
- phoonnx_train/vits/mel_processing.py +139 -0
- phoonnx_train/vits/models.py +732 -0
- phoonnx_train/vits/modules.py +527 -0
- phoonnx_train/vits/monotonic_align/__init__.py +20 -0
- phoonnx_train/vits/monotonic_align/setup.py +13 -0
- phoonnx_train/vits/transforms.py +212 -0
- phoonnx_train/vits/utils.py +16 -0
- phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,216 @@
|
|
1
|
+
import abc
|
2
|
+
import re
|
3
|
+
import string
|
4
|
+
import unicodedata
|
5
|
+
from typing import List, Tuple, Optional, Literal
|
6
|
+
|
7
|
+
from langcodes import tag_distance
|
8
|
+
from quebra_frases import sentence_tokenize
|
9
|
+
from phoonnx.config import Alphabet
|
10
|
+
from phoonnx.util import normalize
|
11
|
+
|
12
|
+
# list of (substring, terminator, end_of_sentence) tuples.
|
13
|
+
TextChunks = List[Tuple[str, str, bool]]
|
14
|
+
# list of (phonemes, terminator, end_of_sentence) tuples.
|
15
|
+
RawPhonemizedChunks = List[Tuple[str, str, bool]]
|
16
|
+
|
17
|
+
PhonemizedChunks = list[list[str]]
|
18
|
+
|
19
|
+
|
20
|
+
class BasePhonemizer(metaclass=abc.ABCMeta):
|
21
|
+
def __init__(self, alphabet: Alphabet = Alphabet.UNICODE):
|
22
|
+
super().__init__()
|
23
|
+
self.alphabet = alphabet
|
24
|
+
|
25
|
+
@abc.abstractmethod
|
26
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
27
|
+
raise NotImplementedError
|
28
|
+
|
29
|
+
def phonemize_to_list(self, text: str, lang: str) -> List[str]:
|
30
|
+
return list(self.phonemize_string(text, lang))
|
31
|
+
|
32
|
+
def phonemize(self, text: str, lang: str) -> PhonemizedChunks:
|
33
|
+
if not text:
|
34
|
+
return [('', '', True)]
|
35
|
+
results: RawPhonemizedChunks = []
|
36
|
+
text = normalize(text, lang)
|
37
|
+
for chunk, punct, eos in self.chunk_text(text):
|
38
|
+
phoneme_str = self.phonemize_string(self.remove_punctuation(chunk), lang)
|
39
|
+
results += [(phoneme_str, punct, True)]
|
40
|
+
return self._process_phones(results)
|
41
|
+
|
42
|
+
@staticmethod
|
43
|
+
def _process_phones(raw_phones: RawPhonemizedChunks) -> PhonemizedChunks:
|
44
|
+
"""Text to phonemes grouped by sentence."""
|
45
|
+
all_phonemes: list[list[str]] = []
|
46
|
+
sentence_phonemes: list[str] = []
|
47
|
+
for phonemes_str, terminator_str, end_of_sentence in raw_phones:
|
48
|
+
# Filter out (lang) switch (flags).
|
49
|
+
# These surround words from languages other than the current voice.
|
50
|
+
phonemes_str = re.sub(r"\([^)]+\)", "", phonemes_str)
|
51
|
+
sentence_phonemes.extend(list(phonemes_str))
|
52
|
+
if end_of_sentence:
|
53
|
+
all_phonemes.append(sentence_phonemes)
|
54
|
+
sentence_phonemes = []
|
55
|
+
if sentence_phonemes:
|
56
|
+
all_phonemes.append(sentence_phonemes)
|
57
|
+
return all_phonemes
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def match_lang(target_lang: str, valid_langs: List[str]) -> str:
|
61
|
+
"""
|
62
|
+
Validates and returns the closest supported language code.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
target_lang (str): The language code to validate.
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
str: The validated language code.
|
69
|
+
|
70
|
+
Raises:
|
71
|
+
ValueError: If the language code is unsupported.
|
72
|
+
"""
|
73
|
+
if target_lang in valid_langs:
|
74
|
+
return target_lang
|
75
|
+
best_lang = "und"
|
76
|
+
best_distance = 10000000
|
77
|
+
for l in valid_langs:
|
78
|
+
try:
|
79
|
+
distance: int = tag_distance(l, target_lang)
|
80
|
+
except:
|
81
|
+
try:
|
82
|
+
l = f"{l.split('-')[0]}-{l.split('-')[1]}"
|
83
|
+
distance: int = tag_distance(l, target_lang)
|
84
|
+
except:
|
85
|
+
try:
|
86
|
+
distance: int = tag_distance(l.split('-')[0], target_lang)
|
87
|
+
except:
|
88
|
+
continue
|
89
|
+
if distance < best_distance:
|
90
|
+
best_lang, best_distance = l, distance
|
91
|
+
|
92
|
+
# If the score is low (meaning a good match), return the language
|
93
|
+
if best_distance <= 10:
|
94
|
+
return best_lang
|
95
|
+
# Otherwise, raise an error for unsupported language
|
96
|
+
raise ValueError(f"unsupported language code: {target_lang}")
|
97
|
+
|
98
|
+
@staticmethod
|
99
|
+
def remove_punctuation(text):
|
100
|
+
"""
|
101
|
+
Removes all punctuation characters from a string.
|
102
|
+
Punctuation characters are defined by string.punctuation.
|
103
|
+
"""
|
104
|
+
# Create a regex pattern that matches any character in string.punctuation
|
105
|
+
punctuation_pattern = r"[" + re.escape(string.punctuation) + r"]"
|
106
|
+
return re.sub(punctuation_pattern, '', text).strip()
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def chunk_text(text: str, delimiters: Optional[List[str]] = None) -> TextChunks:
|
110
|
+
if not text:
|
111
|
+
return [('', '', True)]
|
112
|
+
|
113
|
+
results: TextChunks = []
|
114
|
+
delimiters = delimiters or [", ", ":", ";", "...", "|"]
|
115
|
+
|
116
|
+
# Create a regex pattern that matches any of the delimiters
|
117
|
+
delimiter_pattern = re.escape(delimiters[0])
|
118
|
+
for delimiter in delimiters[1:]:
|
119
|
+
delimiter_pattern += f"|{re.escape(delimiter)}"
|
120
|
+
|
121
|
+
for sentence in sentence_tokenize(text):
|
122
|
+
# Default punctuation if no specific punctuation found
|
123
|
+
default_punc = sentence[-1] if sentence and sentence[-1] in string.punctuation else "."
|
124
|
+
|
125
|
+
# Use regex to split the sentence by any of the delimiters
|
126
|
+
parts = re.split(f'({delimiter_pattern})', sentence)
|
127
|
+
|
128
|
+
# Group parts into chunks (text + delimiter)
|
129
|
+
chunks = []
|
130
|
+
for i in range(0, len(parts), 2):
|
131
|
+
# If there's a delimiter after the text, use it
|
132
|
+
delimiter = parts[i + 1] if i + 1 < len(parts) else default_punc
|
133
|
+
|
134
|
+
# Last chunk is marked as complete
|
135
|
+
is_last = (i + 2 >= len(parts))
|
136
|
+
|
137
|
+
chunks.append((parts[i].strip(), delimiter.strip(), is_last))
|
138
|
+
|
139
|
+
results.extend(chunks)
|
140
|
+
|
141
|
+
return results
|
142
|
+
|
143
|
+
|
144
|
+
### all the 3 below are essentially the same thing
|
145
|
+
# no phonemization really happens
|
146
|
+
|
147
|
+
class RawPhonemes(BasePhonemizer):
|
148
|
+
"""no phonemization, text is phonemes already"""
|
149
|
+
|
150
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
151
|
+
return text
|
152
|
+
|
153
|
+
|
154
|
+
class GraphemePhonemizer(BasePhonemizer):
|
155
|
+
"""
|
156
|
+
A phonemizer class that treats input text as graphemes (characters).
|
157
|
+
It performs text normalization and returns the normalized text as a string
|
158
|
+
of characters.
|
159
|
+
"""
|
160
|
+
# Regular expression matching whitespace:
|
161
|
+
whitespace_re = re.compile(r"\s+")
|
162
|
+
|
163
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
164
|
+
"""
|
165
|
+
Normalizes input text by applying a series of transformations
|
166
|
+
and returns it as a sequence of graphemes.
|
167
|
+
|
168
|
+
Parameters:
|
169
|
+
text (str): Input text to be converted to graphemes.
|
170
|
+
lang (str): The language code (ignored for grapheme phonemization,
|
171
|
+
but required by BasePhonemizer).
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
str: A normalized string of graphemes.
|
175
|
+
"""
|
176
|
+
text = text.lower()
|
177
|
+
text = text.replace(";", ",")
|
178
|
+
text = text.replace("-", " ")
|
179
|
+
text = text.replace(":", ",")
|
180
|
+
text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
|
181
|
+
text = re.sub(self.whitespace_re, " ", text).strip()
|
182
|
+
return text
|
183
|
+
|
184
|
+
|
185
|
+
class UnicodeCodepointPhonemizer(BasePhonemizer):
|
186
|
+
"""Phonemes = codepoints
|
187
|
+
normalization also splits accents and punctuation into it's own codepoints
|
188
|
+
"""
|
189
|
+
|
190
|
+
def __init__(self, form: Literal["NFC", "NFD", "NFKC", "NFKD"] = "NFD"):
|
191
|
+
self.form = form
|
192
|
+
super().__init__(Alphabet.UNICODE)
|
193
|
+
|
194
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
195
|
+
# Phonemes = codepoints
|
196
|
+
return unicodedata.normalize(self.form, text)
|
197
|
+
|
198
|
+
|
199
|
+
if __name__ == "__main__":
|
200
|
+
raw = RawPhonemes()
|
201
|
+
grap = GraphemePhonemizer()
|
202
|
+
uni = UnicodeCodepointPhonemizer()
|
203
|
+
|
204
|
+
text = "olá, quem são vocês?"
|
205
|
+
lang = "pt"
|
206
|
+
print(raw.phonemize(text, lang))
|
207
|
+
print(grap.phonemize(text, lang))
|
208
|
+
print(uni.phonemize(text, lang))
|
209
|
+
|
210
|
+
print(raw.phonemize_string(text, lang))
|
211
|
+
print(grap.phonemize_string(text, lang))
|
212
|
+
print(uni.phonemize_string(text, lang))
|
213
|
+
|
214
|
+
print(raw.phonemize_to_list(text, lang))
|
215
|
+
print(grap.phonemize_to_list(text, lang))
|
216
|
+
print(uni.phonemize_to_list(text, lang))
|
@@ -0,0 +1,250 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import requests
|
4
|
+
|
5
|
+
from phoonnx.thirdparty.arpa2ipa import arpa_to_ipa_lookup
|
6
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
7
|
+
from phoonnx.config import Alphabet
|
8
|
+
|
9
|
+
|
10
|
+
class DeepPhonemizer(BasePhonemizer):
|
11
|
+
"""
|
12
|
+
https://github.com/spring-media/DeepPhonemizer
|
13
|
+
"""
|
14
|
+
MODELS = {
|
15
|
+
"latin_ipa_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/latin_ipa_forward.pt",
|
16
|
+
"en_us_cmudict_ipa_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt",
|
17
|
+
"en_us_cmudict_forward.pt": "https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_forward.pt"
|
18
|
+
}
|
19
|
+
|
20
|
+
def __init__(self, model="latin_ipa_forward.pt"):
|
21
|
+
import dp
|
22
|
+
from dp.phonemizer import Phonemizer
|
23
|
+
import torch
|
24
|
+
# needed for latest torch version
|
25
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.Preprocessor])
|
26
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.LanguageTokenizer])
|
27
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.SequenceTokenizer])
|
28
|
+
|
29
|
+
if "ipa" in model:
|
30
|
+
super().__init__(Alphabet.IPA)
|
31
|
+
else:
|
32
|
+
super().__init__(Alphabet.ARPA)
|
33
|
+
|
34
|
+
if not os.path.isfile(model):
|
35
|
+
if model in self.MODELS:
|
36
|
+
url = self.MODELS[model]
|
37
|
+
cache_dir = os.path.expanduser("~/.local/share/deepphonemizer")
|
38
|
+
os.makedirs(cache_dir, exist_ok=True)
|
39
|
+
model_path = os.path.join(cache_dir, model)
|
40
|
+
if not os.path.isfile(model_path):
|
41
|
+
print(f"Downloading {model} from {url}...")
|
42
|
+
with requests.get(url, stream=True) as r:
|
43
|
+
r.raise_for_status()
|
44
|
+
with open(model_path, 'wb') as f:
|
45
|
+
for chunk in r.iter_content(chunk_size=8192):
|
46
|
+
f.write(chunk)
|
47
|
+
print(f"Saved model to {model_path}")
|
48
|
+
model = model_path
|
49
|
+
else:
|
50
|
+
raise ValueError("invalid model")
|
51
|
+
|
52
|
+
self.phonemizer = Phonemizer.from_checkpoint(model)
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def get_lang(cls, target_lang: str) -> str:
|
56
|
+
"""
|
57
|
+
Validates and returns the closest supported language code.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
target_lang (str): The language code to validate.
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
str: The validated language code.
|
64
|
+
|
65
|
+
Raises:
|
66
|
+
ValueError: If the language code is unsupported.
|
67
|
+
"""
|
68
|
+
# this check is here only to throw an exception if invalid language is provided
|
69
|
+
return cls.match_lang(target_lang, ['de', 'en_us'])
|
70
|
+
|
71
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
72
|
+
"""
|
73
|
+
Normalizes input text by applying a series of transformations
|
74
|
+
and returns it as a sequence of graphemes.
|
75
|
+
|
76
|
+
Parameters:
|
77
|
+
text (str): Input text to be converted to graphemes.
|
78
|
+
lang (str): The language code (ignored for grapheme phonemization,
|
79
|
+
but required by BasePhonemizer).
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
str: A normalized string of graphemes.
|
83
|
+
"""
|
84
|
+
lang = self.get_lang(lang)
|
85
|
+
return self.phonemizer(text, lang)
|
86
|
+
|
87
|
+
|
88
|
+
class OpenPhonemizer(BasePhonemizer):
|
89
|
+
"""
|
90
|
+
https://github.com/NeuralVox/OpenPhonemizer
|
91
|
+
"""
|
92
|
+
|
93
|
+
def __init__(self):
|
94
|
+
from openphonemizer import OpenPhonemizer
|
95
|
+
import torch
|
96
|
+
# needed for latest torch version
|
97
|
+
import dp
|
98
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.Preprocessor])
|
99
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.LanguageTokenizer])
|
100
|
+
torch.serialization.add_safe_globals([dp.preprocessing.text.SequenceTokenizer])
|
101
|
+
|
102
|
+
self.phonemizer = OpenPhonemizer()
|
103
|
+
super().__init__(Alphabet.IPA)
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def get_lang(cls, target_lang: str) -> str:
|
107
|
+
"""
|
108
|
+
Validates and returns the closest supported language code.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
target_lang (str): The language code to validate.
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
str: The validated language code.
|
115
|
+
|
116
|
+
Raises:
|
117
|
+
ValueError: If the language code is unsupported.
|
118
|
+
"""
|
119
|
+
# this check is here only to throw an exception if invalid language is provided
|
120
|
+
return cls.match_lang(target_lang, ["en"])
|
121
|
+
|
122
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
123
|
+
"""
|
124
|
+
Normalizes input text by applying a series of transformations
|
125
|
+
and returns it as a sequence of graphemes.
|
126
|
+
|
127
|
+
Parameters:
|
128
|
+
text (str): Input text to be converted to graphemes.
|
129
|
+
lang (str): The language code (ignored for grapheme phonemization,
|
130
|
+
but required by BasePhonemizer).
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
str: A normalized string of graphemes.
|
134
|
+
"""
|
135
|
+
lang = self.get_lang(lang)
|
136
|
+
return self.phonemizer(text)
|
137
|
+
|
138
|
+
|
139
|
+
class G2PEnPhonemizer(BasePhonemizer):
|
140
|
+
"""
|
141
|
+
https://github.com/Kyubyong/g2p
|
142
|
+
"""
|
143
|
+
|
144
|
+
def __init__(self, alphabet=Alphabet.IPA):
|
145
|
+
assert alphabet in [Alphabet.IPA, Alphabet.ARPA]
|
146
|
+
import nltk
|
147
|
+
nltk.download('averaged_perceptron_tagger_eng')
|
148
|
+
nltk.download('cmudict')
|
149
|
+
from g2p_en import G2p
|
150
|
+
self.g2p = G2p()
|
151
|
+
super().__init__(alphabet)
|
152
|
+
|
153
|
+
@classmethod
|
154
|
+
def get_lang(cls, target_lang: str) -> str:
|
155
|
+
"""
|
156
|
+
Validates and returns the closest supported language code.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
target_lang (str): The language code to validate.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
str: The validated language code.
|
163
|
+
|
164
|
+
Raises:
|
165
|
+
ValueError: If the language code is unsupported.
|
166
|
+
"""
|
167
|
+
# this check is here only to throw an exception if invalid language is provided
|
168
|
+
return cls.match_lang(target_lang, ["en"])
|
169
|
+
|
170
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
171
|
+
"""
|
172
|
+
Normalizes input text by applying a series of transformations
|
173
|
+
and returns it as a sequence of graphemes.
|
174
|
+
|
175
|
+
Parameters:
|
176
|
+
text (str): Input text to be converted to graphemes.
|
177
|
+
lang (str): The language code (ignored for grapheme phonemization,
|
178
|
+
but required by BasePhonemizer).
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
str: A normalized string of graphemes.
|
182
|
+
"""
|
183
|
+
lang = self.get_lang(lang)
|
184
|
+
# NOTE: this model returns ARPA not IPA, may need to map phonemes
|
185
|
+
if self.alphabet == Alphabet.ARPA:
|
186
|
+
return self.g2p(text)
|
187
|
+
return "".join([arpa_to_ipa_lookup.get(pho, pho) for pho in self.g2p(text)])
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
if __name__ == "__main__":
|
192
|
+
# for comparison
|
193
|
+
from phoonnx.phonemizers.mul import (ByT5Phonemizer, EspeakPhonemizer, GruutPhonemizer,
|
194
|
+
EpitranPhonemizer, CharsiuPhonemizer)
|
195
|
+
byt5 = ByT5Phonemizer()
|
196
|
+
espeak = EspeakPhonemizer()
|
197
|
+
gruut = GruutPhonemizer()
|
198
|
+
epitr = EpitranPhonemizer()
|
199
|
+
charsiu = CharsiuPhonemizer()
|
200
|
+
openphon = OpenPhonemizer()
|
201
|
+
g2pen = G2PEnPhonemizer()
|
202
|
+
dp = DeepPhonemizer()
|
203
|
+
|
204
|
+
lang = "en-gb"
|
205
|
+
|
206
|
+
print("\n--- Getting phonemes for 'Hello, world. How are you?' ---")
|
207
|
+
text1 = "Hello, world. How are you?"
|
208
|
+
phonemes1 = espeak.phonemize(text1, lang)
|
209
|
+
phonemes1b = gruut.phonemize(text1, lang)
|
210
|
+
phonemes1c = byt5.phonemize(text1, lang)
|
211
|
+
phonemes1d = epitr.phonemize(text1, lang)
|
212
|
+
phonemes1e = charsiu.phonemize(text1, lang)
|
213
|
+
phonemes1f = openphon.phonemize(text1, lang)
|
214
|
+
phonemes1g = g2pen.phonemize(text1, lang)
|
215
|
+
phonemes1h = dp.phonemize(text1, lang)
|
216
|
+
print(f" Espeak Phonemes: {phonemes1}")
|
217
|
+
print(f" Gruut Phonemes: {phonemes1b}")
|
218
|
+
print(f" byt5 Phonemes: {phonemes1c}")
|
219
|
+
print(f" Epitran Phonemes: {phonemes1d}")
|
220
|
+
print(f" Charsiu Phonemes: {phonemes1e}")
|
221
|
+
print(f" OpenPhonemizer Phonemes: {phonemes1f}")
|
222
|
+
print(f" DeepPhonemizer Phonemes: {phonemes1h}")
|
223
|
+
print(f" G2P_en Phonemes: {phonemes1g}")
|
224
|
+
|
225
|
+
print("\n--- Getting phonemes for 'This is a test: a quick one; and done!' ---")
|
226
|
+
text2 = "This is a test: a quick one; and done!"
|
227
|
+
phonemes2 = espeak.phonemize(text2, lang)
|
228
|
+
phonemes2b = gruut.phonemize(text2, lang)
|
229
|
+
phonemes2c = byt5.phonemize(text2, lang)
|
230
|
+
phonemes2d = epitr.phonemize(text2, lang)
|
231
|
+
phonemes2e = charsiu.phonemize(text2, lang)
|
232
|
+
print(f" Espeak Phonemes: {phonemes2}")
|
233
|
+
print(f" Gruut Phonemes: {phonemes2b}")
|
234
|
+
print(f" byt5 Phonemes: {phonemes2c}")
|
235
|
+
print(f" Epitran Phonemes: {phonemes2d}")
|
236
|
+
print(f" Charsiu Phonemes: {phonemes2e}")
|
237
|
+
|
238
|
+
print("\n--- Getting phonemes for 'Just a phrase without punctuation' ---")
|
239
|
+
text3 = "Just a phrase without punctuation"
|
240
|
+
phonemes3 = espeak.phonemize(text3, lang)
|
241
|
+
phonemes3b = gruut.phonemize(text3, lang)
|
242
|
+
phonemes3c = byt5.phonemize(text3, lang)
|
243
|
+
phonemes3d = epitr.phonemize(text3, lang)
|
244
|
+
phonemes3e = charsiu.phonemize(text3, lang)
|
245
|
+
print(f" Espeak Phonemes: {phonemes3}")
|
246
|
+
print(f" Gruut Phonemes: {phonemes3b}")
|
247
|
+
print(f" byt5 Phonemes: {phonemes3c}")
|
248
|
+
print(f" Epitran Phonemes: {phonemes3d}")
|
249
|
+
print(f" Charsiu Phonemes: {phonemes3e}")
|
250
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
2
|
+
from phoonnx.config import Alphabet
|
3
|
+
|
4
|
+
|
5
|
+
class PersianPhonemizer(BasePhonemizer):
|
6
|
+
"""https://github.com/de-mh/persian_phonemizer"""
|
7
|
+
def __init__(self, alphabet=Alphabet.IPA):
|
8
|
+
from persian_phonemizer import Phonemizer
|
9
|
+
assert alphabet in [Alphabet.ERAAB, Alphabet.IPA]
|
10
|
+
output_format = "IPA" if alphabet == Alphabet.IPA else 'eraab'
|
11
|
+
self.g2p = Phonemizer(output_format)
|
12
|
+
super().__init__(alphabet)
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def get_lang(cls, target_lang: str) -> str:
|
16
|
+
"""
|
17
|
+
Validates and returns the closest supported language code.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
target_lang (str): The language code to validate.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
str: The validated language code.
|
24
|
+
|
25
|
+
Raises:
|
26
|
+
ValueError: If the language code is unsupported.
|
27
|
+
"""
|
28
|
+
# this check is here only to throw an exception if invalid language is provided
|
29
|
+
return cls.match_lang(target_lang, ["fa"])
|
30
|
+
|
31
|
+
def phonemize_string(self, text: str, lang: str = "fa") -> str:
|
32
|
+
"""
|
33
|
+
"""
|
34
|
+
lang = self.get_lang(lang)
|
35
|
+
return self.g2p.phonemize(text)
|
36
|
+
|
37
|
+
|
38
|
+
if __name__ == "__main__":
|
39
|
+
text = "دوچرخه جدید علی گم شد."
|
40
|
+
|
41
|
+
pho = PersianPhonemizer()
|
42
|
+
lang = "fa"
|
43
|
+
|
44
|
+
print(f"\n--- Getting phonemes for '{text}' ---")
|
45
|
+
phonemes_cotovia = pho.phonemize(text, lang)
|
46
|
+
print(f" Phonemes: {phonemes_cotovia}")
|
@@ -0,0 +1,142 @@
|
|
1
|
+
import os
|
2
|
+
import platform
|
3
|
+
import re
|
4
|
+
import subprocess
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
8
|
+
from phoonnx.config import Alphabet
|
9
|
+
|
10
|
+
class CotoviaError(Exception):
|
11
|
+
"""Custom exception for cotovia related errors."""
|
12
|
+
pass
|
13
|
+
|
14
|
+
|
15
|
+
class CotoviaPhonemizer(BasePhonemizer):
|
16
|
+
"""
|
17
|
+
A phonemizer class that uses the Cotovia TTS binary to convert text into phonemes.
|
18
|
+
It processes the input sentence through a command-line phonemization tool, applying multiple
|
19
|
+
regular expression transformations to clean and normalize the phonetic representation.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, cotovia_bin_path: Optional[str] = None):
|
23
|
+
"""
|
24
|
+
Initializes the CotoviaPhonemizer.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
cotovia_bin_path (str, optional): Path to the Cotovia TTS binary.
|
28
|
+
If None, it will try to find it in common locations.
|
29
|
+
"""
|
30
|
+
self.cotovia_bin = cotovia_bin_path or self.find_cotovia()
|
31
|
+
if not os.path.exists(self.cotovia_bin):
|
32
|
+
raise FileNotFoundError(f"Cotovia binary not found at {self.cotovia_bin}. "
|
33
|
+
"Please ensure it's installed or provide the correct path.")
|
34
|
+
super().__init__(Alphabet.COTOVIA)
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def get_lang(cls, target_lang: str) -> str:
|
38
|
+
"""
|
39
|
+
Validates and returns the closest supported language code.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
target_lang (str): The language code to validate.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
str: The validated language code.
|
46
|
+
|
47
|
+
Raises:
|
48
|
+
ValueError: If the language code is unsupported.
|
49
|
+
"""
|
50
|
+
# this check is here only to throw an exception if invalid language is provided
|
51
|
+
return cls.match_lang(target_lang, ["gl-ES"])
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def find_cotovia() -> str:
|
55
|
+
"""
|
56
|
+
Attempts to find the cotovia binary in common locations.
|
57
|
+
"""
|
58
|
+
path = subprocess.run(["which", "cotovia"], capture_output=True, text=True).stdout.strip()
|
59
|
+
if path and os.path.isfile(path):
|
60
|
+
return path
|
61
|
+
|
62
|
+
# Fallback to bundled binaries
|
63
|
+
local_path = f"{os.path.dirname(os.path.dirname(__file__))}/thirdparty/cotovia/cotovia_{platform.machine()}"
|
64
|
+
if os.path.isfile(local_path):
|
65
|
+
return local_path
|
66
|
+
|
67
|
+
# Last resort common system path
|
68
|
+
if os.path.isfile("/usr/bin/cotovia"):
|
69
|
+
return "/usr/bin/cotovia"
|
70
|
+
|
71
|
+
return "cotovia" # Return "cotovia" to let subprocess raise FileNotFoundError if not found in PATH
|
72
|
+
|
73
|
+
def phonemize_string(self, text: str, lang: str) -> str:
|
74
|
+
"""
|
75
|
+
Converts a given sentence into phonemes using the Cotovia TTS binary.
|
76
|
+
|
77
|
+
Processes the input sentence through a command-line phonemization tool, applying multiple regular expression transformations to clean and normalize the phonetic representation.
|
78
|
+
|
79
|
+
Parameters:
|
80
|
+
text (str): The input text to be phonemized
|
81
|
+
lang (str): The language code (ignored by Cotovia, but required by BasePhonemizer)
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
str: A cleaned and normalized phonetic representation of the input sentence
|
85
|
+
|
86
|
+
Notes:
|
87
|
+
- Uses subprocess to execute the Cotovia TTS binary
|
88
|
+
- Applies multiple regex substitutions to improve punctuation and spacing
|
89
|
+
- Converts text from ISO-8859-1 to UTF-8 encoding
|
90
|
+
"""
|
91
|
+
lang = self.get_lang(lang)
|
92
|
+
cmd = f'echo "{text}" | {self.cotovia_bin} -t -n -S | iconv -f iso88591 -t utf8'
|
93
|
+
str_ext = subprocess.check_output(cmd, shell=True).decode("utf-8")
|
94
|
+
|
95
|
+
## fix punctuation in cotovia output - from official inference script
|
96
|
+
|
97
|
+
# substitute ' ·\n' by ...
|
98
|
+
str_ext = re.sub(r" ·", r"...", str_ext)
|
99
|
+
|
100
|
+
# remove spaces before , . ! ? ; : ) ] of the extended string
|
101
|
+
str_ext = re.sub(r"\s+([.,!?;:)\]])", r"\1", str_ext)
|
102
|
+
|
103
|
+
# remove spaces after ( [ ¡ ¿ of the extended string
|
104
|
+
str_ext = re.sub(r"([\(\[¡¿])\s+", r"\1", str_ext)
|
105
|
+
|
106
|
+
# remove unwanted spaces between quotations marks
|
107
|
+
str_ext = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', str_ext)
|
108
|
+
|
109
|
+
# substitute '- text -' to '-text-'
|
110
|
+
str_ext = re.sub(r"-\s*([^-]*?)\s*-", r"-\1-", str_ext)
|
111
|
+
|
112
|
+
# remove initial question marks
|
113
|
+
str_ext = re.sub(r"[¿¡]", r"", str_ext)
|
114
|
+
|
115
|
+
# eliminate extra spaces
|
116
|
+
str_ext = re.sub(r"\s+", r" ", str_ext)
|
117
|
+
|
118
|
+
str_ext = re.sub(r"(\d+)\s*-\s*(\d+)", r"\1 \2", str_ext)
|
119
|
+
|
120
|
+
### - , ' and () by commas
|
121
|
+
# substitute '- text -' to ', text,'
|
122
|
+
str_ext = re.sub(r"(\w+)\s+-([^-]*?)-\s+([^-]*?)", r"\1, \\2, ", str_ext)
|
123
|
+
|
124
|
+
# substitute ' - ' by ', '
|
125
|
+
str_ext = re.sub(r"(\w+[!\?]?)\s+-\s*", r"\1, ", str_ext)
|
126
|
+
|
127
|
+
# substitute ' ( text )' to ', text,'
|
128
|
+
str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \\2,", str_ext)
|
129
|
+
|
130
|
+
return str_ext
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
if __name__ == "__main__":
|
135
|
+
|
136
|
+
cotovia = CotoviaPhonemizer()
|
137
|
+
|
138
|
+
lang = "gl"
|
139
|
+
text_gl = "Este é un sistema de conversión de texto a voz en lingua galega baseado en redes neuronais artificiais. Ten en conta que as funcionalidades incluídas nesta páxina ofrécense unicamente con fins de demostración. Se tes algún comentario, suxestión ou detectas algún problema durante a demostración, ponte en contacto connosco."
|
140
|
+
print(f"\n--- Getting phonemes for '{text_gl}' (Cotovia) ---")
|
141
|
+
phonemes_cotovia = cotovia.phonemize(text_gl, lang)
|
142
|
+
print(f" Cotovia Phonemes: {phonemes_cotovia}")
|