phoonnx 0.1.0a1__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/config.py +11 -1
- phoonnx/phonemizers/__init__.py +4 -1
- phoonnx/phonemizers/ar.py +36 -44
- phoonnx/phonemizers/base.py +27 -1
- phoonnx/phonemizers/he.py +6 -25
- phoonnx/phonemizers/mul.py +617 -4
- phoonnx/phonemizers/mwl.py +36 -0
- phoonnx/thirdparty/hangul2ipa.py +1 -0
- phoonnx/thirdparty/mantoq/__init__.py +1 -26
- phoonnx/thirdparty/phonikud/__init__.py +24 -0
- phoonnx/version.py +5 -1
- phoonnx/voice.py +4 -16
- {phoonnx-0.1.0a1.dist-info → phoonnx-0.1.1a1.dist-info}/METADATA +4 -1
- {phoonnx-0.1.0a1.dist-info → phoonnx-0.1.1a1.dist-info}/RECORD +19 -17
- phoonnx_train/export_onnx.py +307 -56
- phoonnx_train/preprocess.py +36 -9
- phoonnx_train/vits/dataset.py +4 -0
- {phoonnx-0.1.0a1.dist-info → phoonnx-0.1.1a1.dist-info}/WHEEL +0 -0
- {phoonnx-0.1.0a1.dist-info → phoonnx-0.1.1a1.dist-info}/top_level.txt +0 -0
phoonnx/config.py
CHANGED
@@ -33,7 +33,7 @@ class Alphabet(str, Enum):
|
|
33
33
|
ERAAB = "eraab" # fa
|
34
34
|
COTOVIA = "cotovia" # gl
|
35
35
|
HANZI = "hanzi" # zh
|
36
|
-
|
36
|
+
BUCKWALTER = "buckwalter" # ar
|
37
37
|
|
38
38
|
|
39
39
|
|
@@ -48,6 +48,8 @@ class PhonemeType(str, Enum):
|
|
48
48
|
EPITRAN = "epitran"
|
49
49
|
BYT5 = "byt5"
|
50
50
|
CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
|
51
|
+
TRANSPHONE = "transphone"
|
52
|
+
MIRANDESE = "mwl_phonemizer"
|
51
53
|
|
52
54
|
DEEPPHONEMIZER = "deepphonemizer" # en
|
53
55
|
OPENPHONEMIZER = "openphonemizer" # en
|
@@ -382,6 +384,9 @@ class SynthesisConfig:
|
|
382
384
|
|
383
385
|
enable_phonetic_spellings: bool = True
|
384
386
|
|
387
|
+
"""for arabic and hebrew models"""
|
388
|
+
add_diacritics: bool = True
|
389
|
+
|
385
390
|
|
386
391
|
def get_phonemizer(phoneme_type: PhonemeType,
|
387
392
|
alphabet: Alphabet = Alphabet.IPA,
|
@@ -389,6 +394,7 @@ def get_phonemizer(phoneme_type: PhonemeType,
|
|
389
394
|
from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
|
390
395
|
ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
|
391
396
|
G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
|
397
|
+
TransphonePhonemizer, MirandesePhonemizer,
|
392
398
|
GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
|
393
399
|
KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
|
394
400
|
CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
|
@@ -405,6 +411,10 @@ def get_phonemizer(phoneme_type: PhonemeType,
|
|
405
411
|
phonemizer = EpitranPhonemizer()
|
406
412
|
elif phoneme_type == PhonemeType.MISAKI:
|
407
413
|
phonemizer = MisakiPhonemizer()
|
414
|
+
elif phoneme_type == PhonemeType.TRANSPHONE:
|
415
|
+
phonemizer = TransphonePhonemizer()
|
416
|
+
elif phoneme_type == PhonemeType.MIRANDESE:
|
417
|
+
phonemizer = MirandesePhonemizer()
|
408
418
|
elif phoneme_type == PhonemeType.DEEPPHONEMIZER:
|
409
419
|
phonemizer = DeepPhonemizer(model)
|
410
420
|
elif phoneme_type == PhonemeType.OPENPHONEMIZER:
|
phoonnx/phonemizers/__init__.py
CHANGED
@@ -12,7 +12,8 @@ from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
|
|
12
12
|
from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
|
13
13
|
XpinyinPhonemizer, JiebaPhonemizer)
|
14
14
|
from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
|
15
|
-
GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer)
|
15
|
+
GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer, TransphonePhonemizer)
|
16
|
+
from phoonnx.phonemizers.mwl import MirandesePhonemizer
|
16
17
|
|
17
18
|
Phonemizer = Union[
|
18
19
|
MisakiPhonemizer,
|
@@ -22,6 +23,8 @@ Phonemizer = Union[
|
|
22
23
|
EspeakPhonemizer,
|
23
24
|
GruutPhonemizer,
|
24
25
|
EpitranPhonemizer,
|
26
|
+
TransphonePhonemizer,
|
27
|
+
MirandesePhonemizer,
|
25
28
|
OpenJTaklPhonemizer,
|
26
29
|
CutletPhonemizer,
|
27
30
|
PyKakasiPhonemizer,
|
phoonnx/phonemizers/ar.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
from phoonnx.phonemizers.base import BasePhonemizer
|
2
|
-
from phoonnx.thirdparty.mantoq import g2p as mantoq
|
3
1
|
from phoonnx.config import Alphabet
|
2
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
4
3
|
from phoonnx.thirdparty.bw2ipa import translate as bw2ipa
|
4
|
+
from phoonnx.thirdparty.mantoq import g2p as mantoq
|
5
5
|
|
6
6
|
|
7
7
|
class MantoqPhonemizer(BasePhonemizer):
|
8
8
|
|
9
|
-
def __init__(self, alphabet=Alphabet.
|
10
|
-
if alphabet not in [Alphabet.IPA, Alphabet.
|
9
|
+
def __init__(self, alphabet=Alphabet.BUCKWALTER):
|
10
|
+
if alphabet not in [Alphabet.IPA, Alphabet.BUCKWALTER]:
|
11
11
|
raise ValueError("unsupported alphabet")
|
12
12
|
super().__init__(alphabet)
|
13
13
|
|
@@ -39,7 +39,7 @@ class MantoqPhonemizer(BasePhonemizer):
|
|
39
39
|
|
40
40
|
# The phonemes are a list of characters, we join them into a string
|
41
41
|
# and replace the word separator token with a space.
|
42
|
-
phonemes =
|
42
|
+
phonemes = "".join(phonemes).replace("_+_", " ")
|
43
43
|
|
44
44
|
if self.alphabet == Alphabet.IPA:
|
45
45
|
# If the alphabet is IPA, we use the bw2ipa function to translate
|
@@ -51,56 +51,48 @@ class MantoqPhonemizer(BasePhonemizer):
|
|
51
51
|
|
52
52
|
|
53
53
|
if __name__ == "__main__":
|
54
|
+
from phoonnx.phonemizers.mul import EspeakPhonemizer
|
55
|
+
|
56
|
+
espeak = EspeakPhonemizer()
|
57
|
+
|
54
58
|
# Initialize phonemizers for both MANTOQ and IPA alphabets
|
55
|
-
pho_mantoq = MantoqPhonemizer()
|
56
|
-
|
59
|
+
pho_mantoq = MantoqPhonemizer(alphabet=Alphabet.IPA)
|
60
|
+
|
61
|
+
|
62
|
+
def compare(text):
|
63
|
+
print(f"Original Text: {text}")
|
64
|
+
print(f" Mantoq: {pho_mantoq.phonemize_string(text, 'ar')}")
|
65
|
+
print(f" Espeak: {espeak.phonemize_string(text, 'ar')}")
|
66
|
+
|
67
|
+
ts = pho_mantoq.add_diacritics(text, 'ar')
|
68
|
+
print(f"Tashkeel Text: {ts}")
|
69
|
+
print(f" Mantoq: {pho_mantoq.phonemize_string(ts, 'ar')}")
|
70
|
+
print(f" Espeak: {espeak.phonemize_string(ts, 'ar')}")
|
71
|
+
print("\n#########################")
|
57
72
|
|
58
|
-
text1 = "مرحبا بالعالم"
|
59
|
-
print(f"Original Text: {text1}")
|
60
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text1, 'ar')}")
|
61
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text1, 'ar')}")
|
62
|
-
print("-" * 20)
|
63
73
|
|
64
|
-
|
65
|
-
|
66
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text2, 'ar')}")
|
67
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text2, 'ar')}")
|
68
|
-
print("-" * 20)
|
74
|
+
text = "مرحبا بالعالم"
|
75
|
+
compare(text)
|
69
76
|
|
70
|
-
|
71
|
-
|
77
|
+
text = "ذهب الطالب إلى المكتبة لقراءة كتاب عن تاريخ الأندلس."
|
78
|
+
compare(text)
|
72
79
|
|
73
80
|
# 1. Test for gemination of a sun letter (e.g., ash-shams)
|
74
|
-
|
75
|
-
|
76
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text3, 'ar')}")
|
77
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text3, 'ar')}")
|
78
|
-
print("-" * 20)
|
81
|
+
text = "الشمس"
|
82
|
+
compare(text)
|
79
83
|
|
80
84
|
# 2. Test for long vowels (e.g., 'fil' - elephant)
|
81
|
-
|
82
|
-
|
83
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text4, 'ar')}")
|
84
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text4, 'ar')}")
|
85
|
-
print("-" * 20)
|
85
|
+
text = "فيل"
|
86
|
+
compare(text)
|
86
87
|
|
87
88
|
# 3. Test for glide (e.g., 'yawm' - day)
|
88
|
-
|
89
|
-
|
90
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text5, 'ar')}")
|
91
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text5, 'ar')}")
|
92
|
-
print("-" * 20)
|
89
|
+
text = "يوم"
|
90
|
+
compare(text)
|
93
91
|
|
94
92
|
# 4. Test for long vowels (e.g., 'suwr' - wall)
|
95
|
-
|
96
|
-
|
97
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text6, 'ar')}")
|
98
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text6, 'ar')}")
|
99
|
-
print("-" * 20)
|
93
|
+
text = "سور"
|
94
|
+
compare(text)
|
100
95
|
|
101
96
|
# 5. Test for glide (e.g., 'law' - if)
|
102
|
-
|
103
|
-
|
104
|
-
print(f" Mantoq Phonemizer: {pho_mantoq.phonemize_string(text7, 'ar')}")
|
105
|
-
print(f" IPA Phonemizer: {pho_ipa.phonemize_string(text7, 'ar')}")
|
106
|
-
print("-" * 20)
|
97
|
+
text = "لو"
|
98
|
+
compare(text)
|
phoonnx/phonemizers/base.py
CHANGED
@@ -8,6 +8,8 @@ from langcodes import tag_distance
|
|
8
8
|
from quebra_frases import sentence_tokenize
|
9
9
|
from phoonnx.config import Alphabet
|
10
10
|
from phoonnx.util import normalize
|
11
|
+
from phoonnx.thirdparty.phonikud import PhonikudDiacritizer
|
12
|
+
from phoonnx.thirdparty.tashkeel import TashkeelDiacritizer
|
11
13
|
|
12
14
|
# list of (substring, terminator, end_of_sentence) tuples.
|
13
15
|
TextChunks = List[Tuple[str, str, bool]]
|
@@ -18,10 +20,27 @@ PhonemizedChunks = list[list[str]]
|
|
18
20
|
|
19
21
|
|
20
22
|
class BasePhonemizer(metaclass=abc.ABCMeta):
|
21
|
-
def __init__(self, alphabet: Alphabet = Alphabet.UNICODE
|
23
|
+
def __init__(self, alphabet: Alphabet = Alphabet.UNICODE,
|
24
|
+
taskeen_threshold: Optional[float] = 0.8):
|
22
25
|
super().__init__()
|
23
26
|
self.alphabet = alphabet
|
24
27
|
|
28
|
+
self.taskeen_threshold = taskeen_threshold # arabic only
|
29
|
+
self._tashkeel: Optional[TashkeelDiacritizer] = None
|
30
|
+
self._phonikud: Optional[PhonikudDiacritizer] = None # hebrew only
|
31
|
+
|
32
|
+
@property
|
33
|
+
def phonikud(self) -> PhonikudDiacritizer:
|
34
|
+
if self._phonikud is None:
|
35
|
+
self._phonikud = PhonikudDiacritizer()
|
36
|
+
return self._phonikud
|
37
|
+
|
38
|
+
@property
|
39
|
+
def tashkeel(self) -> TashkeelDiacritizer:
|
40
|
+
if self._tashkeel is None:
|
41
|
+
self._tashkeel = TashkeelDiacritizer()
|
42
|
+
return self._tashkeel
|
43
|
+
|
25
44
|
@abc.abstractmethod
|
26
45
|
def phonemize_string(self, text: str, lang: str) -> str:
|
27
46
|
raise NotImplementedError
|
@@ -29,6 +48,13 @@ class BasePhonemizer(metaclass=abc.ABCMeta):
|
|
29
48
|
def phonemize_to_list(self, text: str, lang: str) -> List[str]:
|
30
49
|
return list(self.phonemize_string(text, lang))
|
31
50
|
|
51
|
+
def add_diacritics(self, text: str, lang: str) -> str:
|
52
|
+
if lang.startswith("he"):
|
53
|
+
return self.phonikud.diacritize(text)
|
54
|
+
elif lang.startswith("ar"):
|
55
|
+
return self.tashkeel.diacritize(text, self.taskeen_threshold)
|
56
|
+
return text
|
57
|
+
|
32
58
|
def phonemize(self, text: str, lang: str) -> PhonemizedChunks:
|
33
59
|
if not text:
|
34
60
|
return [('', '', True)]
|
phoonnx/phonemizers/he.py
CHANGED
@@ -1,30 +1,12 @@
|
|
1
|
-
import os.path
|
2
|
-
|
3
|
-
import requests
|
4
|
-
|
5
|
-
from phoonnx.phonemizers.base import BasePhonemizer
|
6
1
|
from phoonnx.config import Alphabet
|
2
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
7
3
|
|
8
4
|
|
9
5
|
class PhonikudPhonemizer(BasePhonemizer):
|
10
|
-
dl_url = "https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx"
|
11
6
|
|
12
|
-
def __init__(self
|
13
|
-
from phonikud_onnx import Phonikud
|
7
|
+
def __init__(self):
|
14
8
|
from phonikud import phonemize
|
15
9
|
self.g2p = phonemize
|
16
|
-
self.diacritics = diacritics
|
17
|
-
if model is None:
|
18
|
-
base_path = os.path.expanduser("~/.local/share/phonikud")
|
19
|
-
fname = self.dl_url.split("/")[-1]
|
20
|
-
model = f"{base_path}/{fname}"
|
21
|
-
if not os.path.isfile(model):
|
22
|
-
os.makedirs(base_path, exist_ok=True)
|
23
|
-
# TODO - streaming download
|
24
|
-
data = requests.get(self.dl_url).content
|
25
|
-
with open(model, "wb") as f:
|
26
|
-
f.write(data)
|
27
|
-
self.phonikud = Phonikud(model) if diacritics else None
|
28
10
|
super().__init__(Alphabet.IPA)
|
29
11
|
|
30
12
|
@classmethod
|
@@ -48,20 +30,19 @@ class PhonikudPhonemizer(BasePhonemizer):
|
|
48
30
|
"""
|
49
31
|
"""
|
50
32
|
lang = self.get_lang(lang)
|
51
|
-
if self.diacritics:
|
52
|
-
text = self.phonikud.add_diacritics(text)
|
53
33
|
return self.g2p(text)
|
54
34
|
|
55
35
|
|
56
36
|
if __name__ == "__main__":
|
57
|
-
#text = "מתכת יקרה"
|
37
|
+
# text = "מתכת יקרה"
|
58
38
|
text = 'שָׁלוֹם עוֹלָם'
|
59
39
|
|
60
|
-
pho = PhonikudPhonemizer(
|
40
|
+
pho = PhonikudPhonemizer()
|
61
41
|
lang = "he"
|
62
42
|
|
63
43
|
print(f"\n--- Getting phonemes for '{text}' ---")
|
44
|
+
# text = pho.add_diacritics(text, lang)
|
64
45
|
phonemes = pho.phonemize(text, lang)
|
65
46
|
print(f" Phonemes: {phonemes}")
|
66
47
|
# --- Getting phonemes for 'שָׁלוֹם עוֹלָם' ---
|
67
|
-
# Phonemes: [('ʃalˈom ʔolˈam', '.', True)]
|
48
|
+
# Phonemes: [('ʃalˈom ʔolˈam', '.', True)]
|