phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ """
4
+ Constants for named module
5
+ """
6
+
7
+ RAFE3_LIST = set(
8
+ [
9
+ "أنه",
10
+ "أنك",
11
+ "أنها",
12
+ "بأنها",
13
+ "بأنه",
14
+ "وأنها",
15
+ "فأنها",
16
+ "فأنه",
17
+ "كأنه",
18
+ "كأنها",
19
+ # yahia alhadj
20
+ "كان",
21
+ "يكون",
22
+ "كانت",
23
+ "صار",
24
+ "صارت",
25
+ "يصير",
26
+ "أمسى",
27
+ "ليس",
28
+ "ليست",
29
+ "ظلّ",
30
+ "ظلّت",
31
+ "أضحى",
32
+ "أضحت",
33
+ "يضحي",
34
+ "أصبح",
35
+ "أصبحت",
36
+ "يصبح",
37
+ "بات",
38
+ "باتت",
39
+ "يبيت",
40
+ "مازال",
41
+ "لازال",
42
+ "لايزال",
43
+ "لازالت",
44
+ "مايزال",
45
+ "مازالت",
46
+ "ماتزال",
47
+ "مابرح",
48
+ "مايبرح",
49
+ "مابرحت",
50
+ "ماانفك",
51
+ "ماانفكّت",
52
+ "ماينفك",
53
+ "لاينفك",
54
+ "مادام",
55
+ "مادامت",
56
+ "نعم",
57
+ "بئس",
58
+ "حبذا",
59
+ # إضافي
60
+ "هل",
61
+ # u'من',
62
+ "ما",
63
+ "متى",
64
+ "أين",
65
+ "ماذا",
66
+ "كيف",
67
+ "أيان",
68
+ # اسماء الإشارة بعد فاء الإستئناف
69
+ "فهذا",
70
+ "فذلك",
71
+ "فتلك",
72
+ "فهؤلاء",
73
+ "فأولئك",
74
+ "فذلكم",
75
+ "فهذه",
76
+ # ضمائر الرفع المنفصلة
77
+ "هو",
78
+ "هما",
79
+ "هم",
80
+ "هي",
81
+ "هما",
82
+ "هن",
83
+ "أنت",
84
+ "أنتما",
85
+ "أنتم",
86
+ "أنت",
87
+ "أنتما",
88
+ "أنتن",
89
+ "أنا",
90
+ "نحن",
91
+ "إذ",
92
+ # ------------
93
+ # خاص بكتب التراث
94
+ "قال",
95
+ "أخبرنا",
96
+ "أخبرني",
97
+ "ثنا",
98
+ ]
99
+ )
100
+
101
+ JAR_LIST = set(
102
+ [
103
+ "من",
104
+ "عن",
105
+ "إلى",
106
+ "على",
107
+ "في",
108
+ "رب",
109
+ "منذ",
110
+ "مذ",
111
+ "عدا",
112
+ "خلا",
113
+ "حاشا",
114
+ "عند",
115
+ "أمام",
116
+ "وراء",
117
+ "خلف",
118
+ "مع",
119
+ "قبل",
120
+ "بعد",
121
+ "تحت",
122
+ "أي",
123
+ "كلّ",
124
+ "بعض",
125
+ "غير",
126
+ "سوى",
127
+ "ليل",
128
+ "شمال",
129
+ "جنوب",
130
+ "يمين",
131
+ "شرق",
132
+ "غرب",
133
+ "شطر",
134
+ "أسفل",
135
+ "أعلى",
136
+ "جنب",
137
+ "جانب",
138
+ "تلقاء",
139
+ "قدام",
140
+ "أعلى",
141
+ "شهر",
142
+ "سنة",
143
+ "غروب",
144
+ "شروق",
145
+ "دون",
146
+ "شهور",
147
+ "يوم",
148
+ "حين",
149
+ "ساعة",
150
+ "زمان",
151
+ "أزمان",
152
+ "أيام",
153
+ "أوقات",
154
+ "وقت",
155
+ "لحظة",
156
+ "خلال",
157
+ "بدون",
158
+ "أثناء",
159
+ "ذات",
160
+ "ذو",
161
+ "ذوو",
162
+ "ذوات",
163
+ "ذوي",
164
+ "بن",
165
+ "ابن",
166
+ "بنت",
167
+ "بين",
168
+ # صيغ واضحة الإضافة
169
+ "أبو",
170
+ "أخو",
171
+ "بواسطة",
172
+ "فَوْقَ",
173
+ "مِنْ",
174
+ "إِلَى",
175
+ "رُبَّ",
176
+ "عَلَى",
177
+ "عَنْ",
178
+ "فِي",
179
+ "مِنْ",
180
+ "عَمَّا",
181
+ "حَتَّى",
182
+ "مُنْذُ",
183
+ "مُذْ",
184
+ "فَإِلَى",
185
+ "فَرُبَّ",
186
+ "فَعَلَى",
187
+ "فَعَنْ",
188
+ "فَفِي",
189
+ "فَمِنْ",
190
+ "فَعَمَّا",
191
+ "فَحَتَّى",
192
+ "فَمُنْذُ",
193
+ "فَمُذْ",
194
+ "وَإِلَى",
195
+ "وَرُبَّ",
196
+ "وَعَلَى",
197
+ "وَعَنْ",
198
+ "وَفِي",
199
+ "وَمِنْ",
200
+ "وَعَمَّا",
201
+ "وَحَتَّى",
202
+ "وَمُنْذُ",
203
+ "وَمُذْ",
204
+ ]
205
+ )
206
+
207
+ NOUN_NASEB_LIST = set(
208
+ [
209
+ "أن",
210
+ "إن",
211
+ "فإن",
212
+ "لأن",
213
+ "كأن",
214
+ "لكن",
215
+ "ليت",
216
+ "لعل",
217
+ # vocalized factor
218
+ "أَنَّ",
219
+ "فَإَنَّ",
220
+ ]
221
+ )
222
+
223
+ PROPER_NOUNS = [
224
+ "عاصم",
225
+ "جبريل",
226
+ "أحمد",
227
+ ]
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ """
4
+ Normalize
5
+ Utility functions used by to prepare an arabic text to search and index.
6
+ @author: Taha Zerrouki <taha_zerrouki at gmail dot com>
7
+ @author: Taha Zerrouki
8
+ @contact: taha dot zerrouki at gmail dot com
9
+ @copyright: Arabtechies, Arabeyes, Taha Zerrouki
10
+ @license: GPL
11
+ @date:2017/02/15
12
+ @version:0.3
13
+ """
14
+ import re
15
+
16
+ from . import araby as arabconst
17
+
18
+ ######################################################################
19
+ # { Indivudual Functions
20
+ ######################################################################
21
+
22
+
23
+ # --------------------------------------
24
+ def strip_tashkeel(text):
25
+ """Strip vowel from a text and return a result text.
26
+ The striped marks are :
27
+ - FATHA, DAMMA, KASRA
28
+ - SUKUN
29
+ - SHADDA
30
+ - FATHATAN, DAMMATAN, KASRATAN, , , .
31
+ Example:
32
+ >>> text=u"الْعَرَبِيّةُ"
33
+ >>> strip_tashkeel(text)
34
+ العربية
35
+
36
+ @param text: arabic text.
37
+ @type text: unicode.
38
+ @return: return a striped text.
39
+ @rtype: unicode.
40
+ """
41
+ return arabconst.strip_tashkeel(text)
42
+
43
+
44
+ # strip tatweel from a text and return a result text
45
+ # --------------------------------------
46
+ def strip_tatweel(text):
47
+ """
48
+ Strip tatweel from a text and return a result text.
49
+
50
+ Example:
51
+ >>> text=u"العـــــربية"
52
+ >>> strip_tatweel(text)
53
+ العربية
54
+
55
+ @param text: arabic text.
56
+ @type text: unicode.
57
+ @return: return a striped text.
58
+ @rtype: unicode.
59
+ """
60
+ return arabconst.strip_tatweel(text)
61
+
62
+
63
+ # --------------------------------------
64
+ def normalize_hamza(text):
65
+ """Normalize Hamza forms into one form, and return a result text.
66
+ The converted letters are :
67
+ - The converted lettersinto HAMZA are: WAW_HAMZA,YEH_HAMZA
68
+ - The converted lettersinto ALEF are: ALEF_MADDA,
69
+ ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW ,HAMZA_ABOVE, HAMZA_BELOW
70
+
71
+ Example:
72
+ >>> text=u"أهؤلاء من أولئكُ"
73
+ >>> normalize_hamza(text)
74
+ اهءلاء من اولءكُ
75
+
76
+ @param text: arabic text.
77
+ @type text: unicode.
78
+ @return: return a converted text.
79
+ @rtype: unicode.
80
+ """
81
+ text = arabconst.ALEFAT_PATTERN.sub(arabconst.ALEF, text)
82
+ return arabconst.HAMZAT_PATTERN.sub(arabconst.HAMZA, text)
83
+
84
+
85
+ # --------------------------------------
86
+ def normalize_lamalef(text):
87
+ """Normalize Lam Alef ligatures into two letters (LAM and ALEF),
88
+ and return a result text.
89
+ Some systems present lamAlef ligature as a single letter,
90
+ this function convert it into two letters,
91
+ The converted letters into LAM and ALEF are :
92
+ - LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW,
93
+ LAM_ALEF_MADDA_ABOVE
94
+
95
+ Example:
96
+ >>> text=u"لانها لالئ الاسلام"
97
+ >>> normalize_lamalef(text)
98
+ لانها لالئ الاسلام
99
+
100
+ @param text: arabic text.
101
+ @type text: unicode.
102
+ @return: return a converted text.
103
+ @rtype: unicode.
104
+ """
105
+ return arabconst.normalize_ligature(text)
106
+
107
+
108
+ # --------------------------------------
109
+ def normalize_spellerrors(text):
110
+ """Normalize some spellerrors like,
111
+ TEH_MARBUTA into HEH,ALEF_MAKSURA into YEH, and return
112
+ a result text.
113
+ In some context users omit the difference between TEH_MARBUTA
114
+ and HEH, and ALEF_MAKSURA and YEh.
115
+ The conversions are:
116
+ - TEH_MARBUTA into HEH
117
+ - ALEF_MAKSURA into YEH
118
+
119
+ Example:
120
+ >>> text=u"اشترت سلمى دمية وحلوى"
121
+ >>> normalize_spellerrors(text)
122
+ اشترت سلمي دميه وحلوي
123
+
124
+ @param text: arabic text.
125
+ @type text: unicode.
126
+ @return: return a converted text.
127
+ @rtype: unicode.
128
+ """
129
+ text = re.sub("[%s]" % arabconst.TEH_MARBUTA, arabconst.HEH, text)
130
+ return re.sub("[%s]" % arabconst.ALEF_MAKSURA, arabconst.YEH, text)
131
+
132
+
133
+ ######################################################################
134
+ # { Normalize One Function
135
+ ######################################################################
136
+
137
+
138
+ def normalize_searchtext(text):
139
+ """Normalize input text and return a result text.
140
+ Normalize a text by :
141
+ - strip tashkeel
142
+ - strip tatweel
143
+ - normalize Hamza
144
+ - normalize Lam Alef.
145
+ - normalize Teh Marbuta and Alef Maksura
146
+ Example:
147
+ >>> text=u'أستشتري دمـــى آلية لأبنائك قبل الإغلاق'
148
+ >>> normalize_searchtext(text)
149
+ استشتري دمي اليه لابناءك قبل الاغلاق
150
+
151
+ @param text: arabic text.
152
+ @type text: unicode.
153
+ @return: return a normalized text.
154
+ @rtype: unicode.
155
+ """
156
+ text = strip_tashkeel(text)
157
+ text = strip_tatweel(text)
158
+ text = normalize_lamalef(text)
159
+ text = normalize_hamza(text)
160
+ text = normalize_spellerrors(text)
161
+ return text