phoonnx 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/__init__.py +0 -0
- phoonnx/config.py +490 -0
- phoonnx/locale/ca/phonetic_spellings.txt +2 -0
- phoonnx/locale/en/phonetic_spellings.txt +1 -0
- phoonnx/locale/gl/phonetic_spellings.txt +2 -0
- phoonnx/locale/pt/phonetic_spellings.txt +2 -0
- phoonnx/phoneme_ids.py +453 -0
- phoonnx/phonemizers/__init__.py +45 -0
- phoonnx/phonemizers/ar.py +42 -0
- phoonnx/phonemizers/base.py +216 -0
- phoonnx/phonemizers/en.py +250 -0
- phoonnx/phonemizers/fa.py +46 -0
- phoonnx/phonemizers/gl.py +142 -0
- phoonnx/phonemizers/he.py +67 -0
- phoonnx/phonemizers/ja.py +119 -0
- phoonnx/phonemizers/ko.py +97 -0
- phoonnx/phonemizers/mul.py +606 -0
- phoonnx/phonemizers/vi.py +44 -0
- phoonnx/phonemizers/zh.py +308 -0
- phoonnx/thirdparty/__init__.py +0 -0
- phoonnx/thirdparty/arpa2ipa.py +249 -0
- phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- phoonnx/thirdparty/hangul2ipa.py +783 -0
- phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
- phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
- phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
- phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
- phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
- phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
- phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
- phoonnx/thirdparty/ko_tables/yale.csv +22 -0
- phoonnx/thirdparty/kog2p/__init__.py +385 -0
- phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
- phoonnx/thirdparty/mantoq/__init__.py +67 -0
- phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
- phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
- phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
- phoonnx/thirdparty/mantoq/num2words.py +37 -0
- phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
- phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
- phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
- phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
- phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
- phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
- phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
- phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
- phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
- phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
- phoonnx/thirdparty/tashkeel/LICENSE +22 -0
- phoonnx/thirdparty/tashkeel/SOURCE +1 -0
- phoonnx/thirdparty/tashkeel/__init__.py +212 -0
- phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
- phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
- phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
- phoonnx/thirdparty/zh_num.py +238 -0
- phoonnx/util.py +705 -0
- phoonnx/version.py +6 -0
- phoonnx/voice.py +521 -0
- phoonnx-0.0.0.dist-info/METADATA +255 -0
- phoonnx-0.0.0.dist-info/RECORD +86 -0
- phoonnx-0.0.0.dist-info/WHEEL +5 -0
- phoonnx-0.0.0.dist-info/top_level.txt +2 -0
- phoonnx_train/__main__.py +151 -0
- phoonnx_train/export_onnx.py +109 -0
- phoonnx_train/norm_audio/__init__.py +92 -0
- phoonnx_train/norm_audio/trim.py +54 -0
- phoonnx_train/norm_audio/vad.py +54 -0
- phoonnx_train/preprocess.py +420 -0
- phoonnx_train/vits/__init__.py +0 -0
- phoonnx_train/vits/attentions.py +427 -0
- phoonnx_train/vits/commons.py +147 -0
- phoonnx_train/vits/config.py +330 -0
- phoonnx_train/vits/dataset.py +214 -0
- phoonnx_train/vits/lightning.py +352 -0
- phoonnx_train/vits/losses.py +58 -0
- phoonnx_train/vits/mel_processing.py +139 -0
- phoonnx_train/vits/models.py +732 -0
- phoonnx_train/vits/modules.py +527 -0
- phoonnx_train/vits/monotonic_align/__init__.py +20 -0
- phoonnx_train/vits/monotonic_align/setup.py +13 -0
- phoonnx_train/vits/transforms.py +212 -0
- phoonnx_train/vits/utils.py +16 -0
- phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,227 @@
|
|
1
|
+
#!/usr/bin/python
|
2
|
+
# -*- coding=utf-8 -*-
|
3
|
+
"""
|
4
|
+
Constants for named module
|
5
|
+
"""
|
6
|
+
|
7
|
+
RAFE3_LIST = set(
|
8
|
+
[
|
9
|
+
"أنه",
|
10
|
+
"أنك",
|
11
|
+
"أنها",
|
12
|
+
"بأنها",
|
13
|
+
"بأنه",
|
14
|
+
"وأنها",
|
15
|
+
"فأنها",
|
16
|
+
"فأنه",
|
17
|
+
"كأنه",
|
18
|
+
"كأنها",
|
19
|
+
# yahia alhadj
|
20
|
+
"كان",
|
21
|
+
"يكون",
|
22
|
+
"كانت",
|
23
|
+
"صار",
|
24
|
+
"صارت",
|
25
|
+
"يصير",
|
26
|
+
"أمسى",
|
27
|
+
"ليس",
|
28
|
+
"ليست",
|
29
|
+
"ظلّ",
|
30
|
+
"ظلّت",
|
31
|
+
"أضحى",
|
32
|
+
"أضحت",
|
33
|
+
"يضحي",
|
34
|
+
"أصبح",
|
35
|
+
"أصبحت",
|
36
|
+
"يصبح",
|
37
|
+
"بات",
|
38
|
+
"باتت",
|
39
|
+
"يبيت",
|
40
|
+
"مازال",
|
41
|
+
"لازال",
|
42
|
+
"لايزال",
|
43
|
+
"لازالت",
|
44
|
+
"مايزال",
|
45
|
+
"مازالت",
|
46
|
+
"ماتزال",
|
47
|
+
"مابرح",
|
48
|
+
"مايبرح",
|
49
|
+
"مابرحت",
|
50
|
+
"ماانفك",
|
51
|
+
"ماانفكّت",
|
52
|
+
"ماينفك",
|
53
|
+
"لاينفك",
|
54
|
+
"مادام",
|
55
|
+
"مادامت",
|
56
|
+
"نعم",
|
57
|
+
"بئس",
|
58
|
+
"حبذا",
|
59
|
+
# إضافي
|
60
|
+
"هل",
|
61
|
+
# u'من',
|
62
|
+
"ما",
|
63
|
+
"متى",
|
64
|
+
"أين",
|
65
|
+
"ماذا",
|
66
|
+
"كيف",
|
67
|
+
"أيان",
|
68
|
+
# اسماء الإشارة بعد فاء الإستئناف
|
69
|
+
"فهذا",
|
70
|
+
"فذلك",
|
71
|
+
"فتلك",
|
72
|
+
"فهؤلاء",
|
73
|
+
"فأولئك",
|
74
|
+
"فذلكم",
|
75
|
+
"فهذه",
|
76
|
+
# ضمائر الرفع المنفصلة
|
77
|
+
"هو",
|
78
|
+
"هما",
|
79
|
+
"هم",
|
80
|
+
"هي",
|
81
|
+
"هما",
|
82
|
+
"هن",
|
83
|
+
"أنت",
|
84
|
+
"أنتما",
|
85
|
+
"أنتم",
|
86
|
+
"أنت",
|
87
|
+
"أنتما",
|
88
|
+
"أنتن",
|
89
|
+
"أنا",
|
90
|
+
"نحن",
|
91
|
+
"إذ",
|
92
|
+
# ------------
|
93
|
+
# خاص بكتب التراث
|
94
|
+
"قال",
|
95
|
+
"أخبرنا",
|
96
|
+
"أخبرني",
|
97
|
+
"ثنا",
|
98
|
+
]
|
99
|
+
)
|
100
|
+
|
101
|
+
JAR_LIST = set(
|
102
|
+
[
|
103
|
+
"من",
|
104
|
+
"عن",
|
105
|
+
"إلى",
|
106
|
+
"على",
|
107
|
+
"في",
|
108
|
+
"رب",
|
109
|
+
"منذ",
|
110
|
+
"مذ",
|
111
|
+
"عدا",
|
112
|
+
"خلا",
|
113
|
+
"حاشا",
|
114
|
+
"عند",
|
115
|
+
"أمام",
|
116
|
+
"وراء",
|
117
|
+
"خلف",
|
118
|
+
"مع",
|
119
|
+
"قبل",
|
120
|
+
"بعد",
|
121
|
+
"تحت",
|
122
|
+
"أي",
|
123
|
+
"كلّ",
|
124
|
+
"بعض",
|
125
|
+
"غير",
|
126
|
+
"سوى",
|
127
|
+
"ليل",
|
128
|
+
"شمال",
|
129
|
+
"جنوب",
|
130
|
+
"يمين",
|
131
|
+
"شرق",
|
132
|
+
"غرب",
|
133
|
+
"شطر",
|
134
|
+
"أسفل",
|
135
|
+
"أعلى",
|
136
|
+
"جنب",
|
137
|
+
"جانب",
|
138
|
+
"تلقاء",
|
139
|
+
"قدام",
|
140
|
+
"أعلى",
|
141
|
+
"شهر",
|
142
|
+
"سنة",
|
143
|
+
"غروب",
|
144
|
+
"شروق",
|
145
|
+
"دون",
|
146
|
+
"شهور",
|
147
|
+
"يوم",
|
148
|
+
"حين",
|
149
|
+
"ساعة",
|
150
|
+
"زمان",
|
151
|
+
"أزمان",
|
152
|
+
"أيام",
|
153
|
+
"أوقات",
|
154
|
+
"وقت",
|
155
|
+
"لحظة",
|
156
|
+
"خلال",
|
157
|
+
"بدون",
|
158
|
+
"أثناء",
|
159
|
+
"ذات",
|
160
|
+
"ذو",
|
161
|
+
"ذوو",
|
162
|
+
"ذوات",
|
163
|
+
"ذوي",
|
164
|
+
"بن",
|
165
|
+
"ابن",
|
166
|
+
"بنت",
|
167
|
+
"بين",
|
168
|
+
# صيغ واضحة الإضافة
|
169
|
+
"أبو",
|
170
|
+
"أخو",
|
171
|
+
"بواسطة",
|
172
|
+
"فَوْقَ",
|
173
|
+
"مِنْ",
|
174
|
+
"إِلَى",
|
175
|
+
"رُبَّ",
|
176
|
+
"عَلَى",
|
177
|
+
"عَنْ",
|
178
|
+
"فِي",
|
179
|
+
"مِنْ",
|
180
|
+
"عَمَّا",
|
181
|
+
"حَتَّى",
|
182
|
+
"مُنْذُ",
|
183
|
+
"مُذْ",
|
184
|
+
"فَإِلَى",
|
185
|
+
"فَرُبَّ",
|
186
|
+
"فَعَلَى",
|
187
|
+
"فَعَنْ",
|
188
|
+
"فَفِي",
|
189
|
+
"فَمِنْ",
|
190
|
+
"فَعَمَّا",
|
191
|
+
"فَحَتَّى",
|
192
|
+
"فَمُنْذُ",
|
193
|
+
"فَمُذْ",
|
194
|
+
"وَإِلَى",
|
195
|
+
"وَرُبَّ",
|
196
|
+
"وَعَلَى",
|
197
|
+
"وَعَنْ",
|
198
|
+
"وَفِي",
|
199
|
+
"وَمِنْ",
|
200
|
+
"وَعَمَّا",
|
201
|
+
"وَحَتَّى",
|
202
|
+
"وَمُنْذُ",
|
203
|
+
"وَمُذْ",
|
204
|
+
]
|
205
|
+
)
|
206
|
+
|
207
|
+
NOUN_NASEB_LIST = set(
|
208
|
+
[
|
209
|
+
"أن",
|
210
|
+
"إن",
|
211
|
+
"فإن",
|
212
|
+
"لأن",
|
213
|
+
"كأن",
|
214
|
+
"لكن",
|
215
|
+
"ليت",
|
216
|
+
"لعل",
|
217
|
+
# vocalized factor
|
218
|
+
"أَنَّ",
|
219
|
+
"فَإَنَّ",
|
220
|
+
]
|
221
|
+
)
|
222
|
+
|
223
|
+
PROPER_NOUNS = [
|
224
|
+
"عاصم",
|
225
|
+
"جبريل",
|
226
|
+
"أحمد",
|
227
|
+
]
|
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/python
|
2
|
+
# -*- coding=utf-8 -*-
|
3
|
+
"""
|
4
|
+
Normalize
|
5
|
+
Utility functions used by to prepare an arabic text to search and index.
|
6
|
+
@author: Taha Zerrouki <taha_zerrouki at gmail dot com>
|
7
|
+
@author: Taha Zerrouki
|
8
|
+
@contact: taha dot zerrouki at gmail dot com
|
9
|
+
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
|
10
|
+
@license: GPL
|
11
|
+
@date:2017/02/15
|
12
|
+
@version:0.3
|
13
|
+
"""
|
14
|
+
import re
|
15
|
+
|
16
|
+
from . import araby as arabconst
|
17
|
+
|
18
|
+
######################################################################
|
19
|
+
# { Indivudual Functions
|
20
|
+
######################################################################
|
21
|
+
|
22
|
+
|
23
|
+
# --------------------------------------
|
24
|
+
def strip_tashkeel(text):
|
25
|
+
"""Strip vowel from a text and return a result text.
|
26
|
+
The striped marks are :
|
27
|
+
- FATHA, DAMMA, KASRA
|
28
|
+
- SUKUN
|
29
|
+
- SHADDA
|
30
|
+
- FATHATAN, DAMMATAN, KASRATAN, , , .
|
31
|
+
Example:
|
32
|
+
>>> text=u"الْعَرَبِيّةُ"
|
33
|
+
>>> strip_tashkeel(text)
|
34
|
+
العربية
|
35
|
+
|
36
|
+
@param text: arabic text.
|
37
|
+
@type text: unicode.
|
38
|
+
@return: return a striped text.
|
39
|
+
@rtype: unicode.
|
40
|
+
"""
|
41
|
+
return arabconst.strip_tashkeel(text)
|
42
|
+
|
43
|
+
|
44
|
+
# strip tatweel from a text and return a result text
|
45
|
+
# --------------------------------------
|
46
|
+
def strip_tatweel(text):
|
47
|
+
"""
|
48
|
+
Strip tatweel from a text and return a result text.
|
49
|
+
|
50
|
+
Example:
|
51
|
+
>>> text=u"العـــــربية"
|
52
|
+
>>> strip_tatweel(text)
|
53
|
+
العربية
|
54
|
+
|
55
|
+
@param text: arabic text.
|
56
|
+
@type text: unicode.
|
57
|
+
@return: return a striped text.
|
58
|
+
@rtype: unicode.
|
59
|
+
"""
|
60
|
+
return arabconst.strip_tatweel(text)
|
61
|
+
|
62
|
+
|
63
|
+
# --------------------------------------
|
64
|
+
def normalize_hamza(text):
|
65
|
+
"""Normalize Hamza forms into one form, and return a result text.
|
66
|
+
The converted letters are :
|
67
|
+
- The converted lettersinto HAMZA are: WAW_HAMZA,YEH_HAMZA
|
68
|
+
- The converted lettersinto ALEF are: ALEF_MADDA,
|
69
|
+
ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW ,HAMZA_ABOVE, HAMZA_BELOW
|
70
|
+
|
71
|
+
Example:
|
72
|
+
>>> text=u"أهؤلاء من أولئكُ"
|
73
|
+
>>> normalize_hamza(text)
|
74
|
+
اهءلاء من اولءكُ
|
75
|
+
|
76
|
+
@param text: arabic text.
|
77
|
+
@type text: unicode.
|
78
|
+
@return: return a converted text.
|
79
|
+
@rtype: unicode.
|
80
|
+
"""
|
81
|
+
text = arabconst.ALEFAT_PATTERN.sub(arabconst.ALEF, text)
|
82
|
+
return arabconst.HAMZAT_PATTERN.sub(arabconst.HAMZA, text)
|
83
|
+
|
84
|
+
|
85
|
+
# --------------------------------------
|
86
|
+
def normalize_lamalef(text):
|
87
|
+
"""Normalize Lam Alef ligatures into two letters (LAM and ALEF),
|
88
|
+
and return a result text.
|
89
|
+
Some systems present lamAlef ligature as a single letter,
|
90
|
+
this function convert it into two letters,
|
91
|
+
The converted letters into LAM and ALEF are :
|
92
|
+
- LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW,
|
93
|
+
LAM_ALEF_MADDA_ABOVE
|
94
|
+
|
95
|
+
Example:
|
96
|
+
>>> text=u"لانها لالئ الاسلام"
|
97
|
+
>>> normalize_lamalef(text)
|
98
|
+
لانها لالئ الاسلام
|
99
|
+
|
100
|
+
@param text: arabic text.
|
101
|
+
@type text: unicode.
|
102
|
+
@return: return a converted text.
|
103
|
+
@rtype: unicode.
|
104
|
+
"""
|
105
|
+
return arabconst.normalize_ligature(text)
|
106
|
+
|
107
|
+
|
108
|
+
# --------------------------------------
|
109
|
+
def normalize_spellerrors(text):
|
110
|
+
"""Normalize some spellerrors like,
|
111
|
+
TEH_MARBUTA into HEH,ALEF_MAKSURA into YEH, and return
|
112
|
+
a result text.
|
113
|
+
In some context users omit the difference between TEH_MARBUTA
|
114
|
+
and HEH, and ALEF_MAKSURA and YEh.
|
115
|
+
The conversions are:
|
116
|
+
- TEH_MARBUTA into HEH
|
117
|
+
- ALEF_MAKSURA into YEH
|
118
|
+
|
119
|
+
Example:
|
120
|
+
>>> text=u"اشترت سلمى دمية وحلوى"
|
121
|
+
>>> normalize_spellerrors(text)
|
122
|
+
اشترت سلمي دميه وحلوي
|
123
|
+
|
124
|
+
@param text: arabic text.
|
125
|
+
@type text: unicode.
|
126
|
+
@return: return a converted text.
|
127
|
+
@rtype: unicode.
|
128
|
+
"""
|
129
|
+
text = re.sub("[%s]" % arabconst.TEH_MARBUTA, arabconst.HEH, text)
|
130
|
+
return re.sub("[%s]" % arabconst.ALEF_MAKSURA, arabconst.YEH, text)
|
131
|
+
|
132
|
+
|
133
|
+
######################################################################
|
134
|
+
# { Normalize One Function
|
135
|
+
######################################################################
|
136
|
+
|
137
|
+
|
138
|
+
def normalize_searchtext(text):
|
139
|
+
"""Normalize input text and return a result text.
|
140
|
+
Normalize a text by :
|
141
|
+
- strip tashkeel
|
142
|
+
- strip tatweel
|
143
|
+
- normalize Hamza
|
144
|
+
- normalize Lam Alef.
|
145
|
+
- normalize Teh Marbuta and Alef Maksura
|
146
|
+
Example:
|
147
|
+
>>> text=u'أستشتري دمـــى آلية لأبنائك قبل الإغلاق'
|
148
|
+
>>> normalize_searchtext(text)
|
149
|
+
استشتري دمي اليه لابناءك قبل الاغلاق
|
150
|
+
|
151
|
+
@param text: arabic text.
|
152
|
+
@type text: unicode.
|
153
|
+
@return: return a normalized text.
|
154
|
+
@rtype: unicode.
|
155
|
+
"""
|
156
|
+
text = strip_tashkeel(text)
|
157
|
+
text = strip_tatweel(text)
|
158
|
+
text = normalize_lamalef(text)
|
159
|
+
text = normalize_hamza(text)
|
160
|
+
text = normalize_spellerrors(text)
|
161
|
+
return text
|