phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,569 @@
1
+ #!/usr/bin/python
2
+ # -*- coding: UTF8 -*-
3
+
4
+ # adapted from: https://github.com/nawarhalabi/Arabic-Phonetiser/blob/master/phonetise-Buckwalter.py
5
+ # license: Creative Commons Attribution-NonCommercial 4.0 International License.
6
+ # https://creativecommons.org/licenses/by-nc/4.0/
7
+
8
+ import re
9
+
10
+ arabic_to_buckw_dict = { # mapping from Arabic script to Buckwalter
11
+ "\u0628": "b",
12
+ "\u0630": "*",
13
+ "\u0637": "T",
14
+ "\u0645": "m",
15
+ "\u062a": "t",
16
+ "\u0631": "r",
17
+ "\u0638": "Z",
18
+ "\u0646": "n",
19
+ "\u062b": "^",
20
+ "\u0632": "z",
21
+ "\u0639": "E",
22
+ "\u0647": "h",
23
+ "\u062c": "j",
24
+ "\u0633": "s",
25
+ "\u063a": "g",
26
+ "\u062d": "H",
27
+ "\u0642": "q",
28
+ "\u0641": "f",
29
+ "\u062e": "x",
30
+ "\u0635": "S",
31
+ "\u0634": "$",
32
+ "\u062f": "d",
33
+ "\u0636": "D",
34
+ "\u0643": "k",
35
+ "\u0623": ">",
36
+ "\u0621": "'",
37
+ "\u0626": "}",
38
+ "\u0624": "&",
39
+ "\u0625": "<",
40
+ "\u0622": "|",
41
+ "\u0627": "A",
42
+ "\u0649": "Y",
43
+ "\u0629": "p",
44
+ "\u064a": "y",
45
+ "\u0644": "l",
46
+ "\u0648": "w",
47
+ "\u064b": "F",
48
+ "\u064c": "N",
49
+ "\u064d": "K",
50
+ "\u064e": "a",
51
+ "\u064f": "u",
52
+ "\u0650": "i",
53
+ "\u0651": "~",
54
+ "\u0652": "o",
55
+ }
56
+
57
+ buckw_to_arabic_dict = { # mapping from Buckwalter to Arabic script
58
+ "b": "\u0628",
59
+ "*": "\u0630",
60
+ "T": "\u0637",
61
+ "m": "\u0645",
62
+ "t": "\u062a",
63
+ "r": "\u0631",
64
+ "Z": "\u0638",
65
+ "n": "\u0646",
66
+ "^": "\u062b",
67
+ "z": "\u0632",
68
+ "E": "\u0639",
69
+ "h": "\u0647",
70
+ "j": "\u062c",
71
+ "s": "\u0633",
72
+ "g": "\u063a",
73
+ "H": "\u062d",
74
+ "q": "\u0642",
75
+ "f": "\u0641",
76
+ "x": "\u062e",
77
+ "S": "\u0635",
78
+ "$": "\u0634",
79
+ "d": "\u062f",
80
+ "D": "\u0636",
81
+ "k": "\u0643",
82
+ ">": "\u0623",
83
+ "'": "\u0621",
84
+ "}": "\u0626",
85
+ "&": "\u0624",
86
+ "<": "\u0625",
87
+ "|": "\u0622",
88
+ "A": "\u0627",
89
+ "Y": "\u0649",
90
+ "p": "\u0629",
91
+ "y": "\u064a",
92
+ "l": "\u0644",
93
+ "w": "\u0648",
94
+ "F": "\u064b",
95
+ "N": "\u064c",
96
+ "K": "\u064d",
97
+ "a": "\u064e",
98
+ "u": "\u064f",
99
+ "i": "\u0650",
100
+ "~": "\u0651",
101
+ "o": "\u0652",
102
+ }
103
+
104
+
105
+ def arabic_to_buckwalter(word): # Convert input string to Buckwalter
106
+ res = ""
107
+ for letter in word:
108
+ if letter in arabic_to_buckw_dict:
109
+ res += arabic_to_buckw_dict[letter]
110
+ else:
111
+ res += letter
112
+ return res
113
+
114
+
115
+ def buckwalter_to_arabic(word): # Convert input string to Arabic
116
+ res = ""
117
+ for letter in word:
118
+ if letter in buckw_to_arabic_dict:
119
+ res += buckw_to_arabic_dict[letter]
120
+ else:
121
+ res += letter
122
+ return res
123
+
124
+
125
+ # ----------------------------------------------------------------------------
126
+ # Grapheme to Phoneme mappings------------------------------------------------
127
+ # ----------------------------------------------------------------------------
128
+ unambiguousConsonantMap = {
129
+ "b": "b",
130
+ "*": "*",
131
+ "T": "T",
132
+ "m": "m",
133
+ "t": "t",
134
+ "r": "r",
135
+ "Z": "Z",
136
+ "n": "n",
137
+ "^": "^",
138
+ "z": "z",
139
+ "E": "E",
140
+ "h": "h",
141
+ "j": "j",
142
+ "s": "s",
143
+ "g": "g",
144
+ "H": "H",
145
+ "q": "q",
146
+ "f": "f",
147
+ "x": "x",
148
+ "S": "S",
149
+ "$": "$",
150
+ "d": "d",
151
+ "D": "D",
152
+ "k": "k",
153
+ ">": "<",
154
+ "'": "<",
155
+ "}": "<",
156
+ "&": "<",
157
+ "<": "<",
158
+ }
159
+
160
+ ambiguousConsonantMap = {
161
+ # These consonants are only unambiguous in certain contexts
162
+ "l": ["l", ""],
163
+ "w": "w",
164
+ "y": "y",
165
+ "p": ["t", ""],
166
+ }
167
+
168
+ maddaMap = {"|": [["<", "aa"], ["<", "AA"]]}
169
+
170
+ vowelMap = {
171
+ "A": [["aa", ""], ["AA", ""]],
172
+ "Y": [["aa", ""], ["AA", ""]],
173
+ "w": [["uu0", "uu1"], ["UU0", "UU1"]],
174
+ "y": [["ii0", "ii1"], ["II0", "II1"]],
175
+ "a": ["a", "A"],
176
+ "u": [["u0", "u1"], ["U0", "U1"]],
177
+ "i": [["i0", "i1"], ["I0", "I1"]],
178
+ }
179
+
180
+ nunationMap = {
181
+ "F": [["a", "n"], ["A", "n"]],
182
+ "N": [["u1", "n"], ["U1", "n"]],
183
+ "K": [["i1", "n"], ["I1", "n"]],
184
+ }
185
+
186
+ diacritics = ["o", "a", "u", "i", "F", "N", "K", "~"]
187
+ diacriticsWithoutShadda = ["o", "a", "u", "i", "F", "N", "K"]
188
+ emphatics = ["D", "S", "T", "Z", "g", "x", "q"]
189
+ forwardEmphatics = ["g", "x"]
190
+ consonants = [
191
+ ">",
192
+ "<",
193
+ "}",
194
+ "&",
195
+ "'",
196
+ "b",
197
+ "t",
198
+ "^",
199
+ "j",
200
+ "H",
201
+ "x",
202
+ "d",
203
+ "*",
204
+ "r",
205
+ "z",
206
+ "s",
207
+ "$",
208
+ "S",
209
+ "D",
210
+ "T",
211
+ "Z",
212
+ "E",
213
+ "g",
214
+ "f",
215
+ "q",
216
+ "k",
217
+ "l",
218
+ "m",
219
+ "n",
220
+ "h",
221
+ "|",
222
+ ]
223
+
224
+ punctuation = [".", ",", "?", "!"]
225
+
226
+ # ------------------------------------------------------------------------------------
227
+ # Words with fixed irregular pronunciations-------------------------------------------
228
+ # ------------------------------------------------------------------------------------
229
+ fixedWords = {
230
+ "h*A": [
231
+ "h aa * aa",
232
+ "h aa * a",
233
+ ],
234
+ "h*h": ["h aa * i0 h i0", "h aa * i1 h"],
235
+ "h*An": ["h aa * aa n i0", "h aa * aa n"],
236
+ "h&lA'": ["h aa < u0 l aa < i0", "h aa < u0 l aa <"],
237
+ "*lk": ["* aa l i0 k a", "* aa l i0 k"],
238
+ "k*lk": ["k a * aa l i0 k a", "k a * aa l i1 k"],
239
+ "*lkm": "* aa l i0 k u1 m",
240
+ ">wl}k": ["< u0 l aa < i0 k a", "< u0 l aa < i1 k"],
241
+ "Th": "T aa h a",
242
+ "lkn": ["l aa k i0 nn a", "l aa k i1 n"],
243
+ "lknh": "l aa k i0 nn a h u0",
244
+ "lknhm": "l aa k i0 nn a h u1 m",
245
+ "lknk": ["l aa k i0 nn a k a", "l aa k i0 nn a k i0"],
246
+ "lknkm": "l aa k i0 nn a k u1 m",
247
+ "lknkmA": "l aa k i0 nn a k u0 m aa",
248
+ "lknnA": "l aa k i0 nn a n aa",
249
+ "AlrHmn": ["rr a H m aa n i0", "rr a H m aa n"],
250
+ "Allh": ["ll aa h i0", "ll aa h", "ll AA h u0", "ll AA h a", "ll AA h", "ll A"],
251
+ "h*yn": ["h aa * a y n i0", "h aa * a y n"],
252
+ "nt": "n i1 t",
253
+ "fydyw": "v i0 d y uu1",
254
+ "lndn": "l A n d u1 n",
255
+ }
256
+
257
+
258
+ def isFixedWord(word, results, orthography, pronunciations):
259
+ lastLetter = ""
260
+ if len(word) > 0:
261
+ lastLetter = word[-1]
262
+ if lastLetter == "a":
263
+ lastLetter = ["a", "A"]
264
+ elif lastLetter == "A":
265
+ lastLetter = ["aa"]
266
+ elif lastLetter == "u":
267
+ lastLetter = ["u0"]
268
+ elif lastLetter == "i":
269
+ lastLetter = ["i0"]
270
+ elif lastLetter in unambiguousConsonantMap:
271
+ lastLetter = [unambiguousConsonantMap[lastLetter]]
272
+ # Remove all dacritics from word
273
+ wordConsonants = re.sub(r"[^h*Ahn\'>wl}kmyTtfd]", "", word)
274
+ if wordConsonants in fixedWords: # check if word is in the fixed word lookup table
275
+ if isinstance(fixedWords[wordConsonants], list):
276
+ for pronunciation in fixedWords[wordConsonants]:
277
+ if pronunciation.split(" ")[-1] in lastLetter:
278
+ # add each pronunciation to the pronunciation dictionary
279
+ results += word + " " + pronunciation + "\n"
280
+ pronunciations.append(pronunciation.split(" "))
281
+ else:
282
+ # add pronunciation to the pronunciation dictionary
283
+ results += word + " " + fixedWords[wordConsonants] + "\n"
284
+ pronunciations.append(fixedWords[wordConsonants].split(" "))
285
+ return results
286
+
287
+
288
+ def preprocess_utterance(utterance):
289
+ # Do some normalisation work and split utterance to words
290
+ utterance = utterance.replace("AF", "F")
291
+ utterance = utterance.replace("\u0640", "")
292
+ utterance = utterance.replace("o", "")
293
+ utterance = utterance.replace("aA", "A")
294
+ utterance = utterance.replace("aY", "Y")
295
+ utterance = utterance.replace(" A", " ")
296
+ utterance = utterance.replace("F", "an")
297
+ utterance = utterance.replace("N", "un")
298
+ utterance = utterance.replace("K", "in")
299
+ utterance = utterance.replace("|", ">A")
300
+
301
+ utterance = utterance.replace("i~", "~i")
302
+ utterance = utterance.replace("a~", "~a")
303
+ utterance = utterance.replace("u~", "~u")
304
+
305
+ # Deal with Hamza types that when not followed by a short vowel letter,
306
+ # this short vowel is added automatically
307
+ utterance = re.sub(r"Ai", "<i", utterance)
308
+ utterance = re.sub(r"Aa", ">a", utterance)
309
+ utterance = re.sub(r"Au", ">u", utterance)
310
+ utterance = re.sub(r"^>([^auAw])", ">a\\1", utterance)
311
+ utterance = re.sub(r" >([^auAw ])", " >a\\1", utterance)
312
+ utterance = re.sub(r"<([^i])", "<i\\1", utterance)
313
+
314
+ utterance = re.sub(r"(\S)(\.|\?|,|!)", "\\1 \\2", utterance)
315
+
316
+ utterance = utterance.split(" ")
317
+
318
+ return utterance
319
+
320
+
321
+ def process_word(word):
322
+
323
+ if word in punctuation:
324
+ return word
325
+
326
+ pronunciations = (
327
+ []
328
+ ) # Start with empty set of possible pronunciations of current word
329
+ # Add fixed irregular pronuncations if possible
330
+ isFixedWord(word, "", word, pronunciations)
331
+
332
+ # Indicates whether current character is in an emphatic context or not. Starts with False
333
+ emphaticContext = False
334
+ # This is the end/beginning of word symbol. just for convenience
335
+ word = "bb" + word + "ee"
336
+
337
+ phones = [] # Empty list which will hold individual possible word's pronunciation
338
+
339
+ # -----------------------------------------------------------------------------------
340
+ # MAIN LOOP: here is where the Modern Standard Arabic phonetisation rule-set starts--
341
+ # -----------------------------------------------------------------------------------
342
+ for index in range(2, len(word) - 2):
343
+ letter = word[index] # Current Character
344
+ letter1 = word[index + 1] # Next Character
345
+ letter2 = word[index + 2] # Next-Next Character
346
+ letter_1 = word[index - 1] # Previous Character
347
+ letter_2 = word[index - 2] # Before Previous Character
348
+ # ----------------------------------------------------------------------------------------------------------------
349
+ if letter in consonants + ["w", "y"] and not letter in emphatics + [
350
+ "r" """, u'l'"""
351
+ ]: # non-emphatic consonants (except for Lam and Ra) change emphasis back to False
352
+ emphaticContext = False
353
+ if letter in emphatics: # Emphatic consonants change emphasis context to True
354
+ emphaticContext = True
355
+ # If following letter is backward emphatic, emphasis state is set to True
356
+ if letter1 in emphatics and not letter1 in forwardEmphatics:
357
+ emphaticContext = True
358
+ # ----------------------------------------------------------------------------------------------------------------
359
+ # ----------------------------------------------------------------------------------------------------------------
360
+ # Unambiguous consonant phones. These map to a predetermined phoneme
361
+ if letter in unambiguousConsonantMap:
362
+ phones += [unambiguousConsonantMap[letter]]
363
+ # ----------------------------------------------------------------------------------------------------------------
364
+ if letter == "l": # Lam is a consonant which requires special treatment
365
+ # Lam could be omitted in definite article (sun letters)
366
+ if (not letter1 in diacritics and not letter1 in vowelMap) and letter2 in [
367
+ "~"
368
+ ]:
369
+ phones += [ambiguousConsonantMap["l"][1]] # omit
370
+ else:
371
+ # do not omit
372
+ phones += [ambiguousConsonantMap["l"][0]]
373
+ # ----------------------------------------------------------------------------------------------------------------
374
+ # shadda just doubles the letter before it
375
+ if letter == "~" and not letter_1 in ["w", "y"] and len(phones) > 0:
376
+ phones[-1] += phones[-1]
377
+ # ----------------------------------------------------------------------------------------------------------------
378
+ if letter == "|": # Madda only changes based in emphaticness
379
+ if emphaticContext:
380
+ phones += [maddaMap["|"][1]]
381
+ else:
382
+ phones += [maddaMap["|"][0]]
383
+ # ----------------------------------------------------------------------------------------------------------------
384
+ if (
385
+ letter == "p"
386
+ ): # Ta' marboota is determined by the following if it is a diacritic or not
387
+ if letter1 in diacritics:
388
+ phones += [ambiguousConsonantMap["p"][0]]
389
+ else:
390
+ phones += [ambiguousConsonantMap["p"][1]]
391
+ # ----------------------------------------------------------------------------------------------------------------
392
+ if letter in vowelMap:
393
+ # Waw and Ya are complex they could be consonants or vowels and their gemination is complex as it could be a combination of a vowel and consonants
394
+ if letter in ["w", "y"]:
395
+ if (
396
+ letter1 in diacriticsWithoutShadda + ["A", "Y"]
397
+ or (
398
+ letter1 in ["w", "y"]
399
+ and not letter2 in diacritics + ["A", "w", "y"]
400
+ )
401
+ or (
402
+ letter_1 in diacriticsWithoutShadda
403
+ and letter1 in consonants + ["e"]
404
+ )
405
+ ):
406
+ if (
407
+ letter in ["w"]
408
+ and letter_1 in ["u"]
409
+ and not letter1 in ["a", "i", "A", "Y"]
410
+ ) or (
411
+ letter in ["y"]
412
+ and letter_1 in ["i"]
413
+ and not letter1 in ["a", "u", "A", "Y"]
414
+ ):
415
+ if emphaticContext:
416
+ phones += [vowelMap[letter][1][0]]
417
+ else:
418
+ phones += [vowelMap[letter][0][0]]
419
+ else:
420
+ if letter1 in ["A"] and letter in ["w"] and letter2 in ["e"]:
421
+ phones += [
422
+ [ambiguousConsonantMap[letter], vowelMap[letter][0][0]]
423
+ ]
424
+ else:
425
+ phones += [ambiguousConsonantMap[letter]]
426
+ elif letter1 in ["~"]:
427
+ if (
428
+ letter_1 in ["a"]
429
+ or (letter in ["w"] and letter_1 in ["i", "y"])
430
+ or (letter in ["y"] and letter_1 in ["w", "u"])
431
+ ):
432
+ phones += [
433
+ ambiguousConsonantMap[letter],
434
+ ambiguousConsonantMap[letter],
435
+ ]
436
+ else:
437
+ phones += [
438
+ vowelMap[letter][0][0],
439
+ ambiguousConsonantMap[letter],
440
+ ]
441
+ else: # Waws and Ya's at the end of the word could be shortened
442
+ if emphaticContext:
443
+ if letter_1 in consonants + ["u", "i"] and letter1 in ["e"]:
444
+ phones += [
445
+ [vowelMap[letter][1][0], vowelMap[letter][1][0][1:]]
446
+ ]
447
+ else:
448
+ phones += [vowelMap[letter][1][0]]
449
+ else:
450
+ if letter_1 in consonants + ["u", "i"] and letter1 in ["e"]:
451
+ phones += [
452
+ [vowelMap[letter][0][0], vowelMap[letter][0][0][1:]]
453
+ ]
454
+ else:
455
+ phones += [vowelMap[letter][0][0]]
456
+ # Kasra and Damma could be mildened if before a final silent consonant
457
+ if letter in ["u", "i"]:
458
+ if emphaticContext:
459
+ if (
460
+ (letter1 in unambiguousConsonantMap or letter1 == "l")
461
+ and letter2 == "e"
462
+ and len(word) > 7
463
+ ):
464
+ phones += [vowelMap[letter][1][1]]
465
+ else:
466
+ phones += [vowelMap[letter][1][0]]
467
+ else:
468
+ if (
469
+ (letter1 in unambiguousConsonantMap or letter1 == "l")
470
+ and letter2 == "e"
471
+ and len(word) > 7
472
+ ):
473
+ phones += [vowelMap[letter][0][1]]
474
+ else:
475
+ phones += [vowelMap[letter][0][0]]
476
+ # Alif could be ommited in definite article and beginning of some words
477
+ if letter in ["a", "A", "Y"]:
478
+ if letter in ["A"] and letter_1 in ["w", "k"] and letter_2 == "b":
479
+ phones += [["a", vowelMap[letter][0][0]]]
480
+ elif letter in ["A"] and letter_1 in ["u", "i"]:
481
+ temp = True # do nothing
482
+ # Waw al jama3a: The Alif after is optional
483
+ elif letter in ["A"] and letter_1 in ["w"] and letter1 in ["e"]:
484
+ phones += [[vowelMap[letter][0][0], vowelMap[letter][0][1]]]
485
+ elif letter in ["A", "Y"] and letter1 in ["e"]:
486
+ if emphaticContext:
487
+ phones += [[vowelMap[letter][1][0], vowelMap["a"][1]]]
488
+ else:
489
+ phones += [[vowelMap[letter][0][0], vowelMap["a"][0]]]
490
+ else:
491
+ if emphaticContext:
492
+ phones += [vowelMap[letter][1][0]]
493
+ else:
494
+ phones += [vowelMap[letter][0][0]]
495
+ # -------------------------------------------------------------------------------------------------------------------------
496
+ # End of main loop---------------------------------------------------------------------------------------------------------
497
+ # -------------------------------------------------------------------------------------------------------------------------
498
+ possibilities = 1 # Holds the number of possible pronunciations of a word
499
+
500
+ # count the number of possible pronunciations
501
+ for letter in phones:
502
+ if isinstance(letter, list):
503
+ possibilities = possibilities * len(letter)
504
+
505
+ # Generate all possible pronunciations
506
+ for i in range(0, possibilities):
507
+ pronunciations.append([])
508
+ iterations = 1
509
+ for index, letter in enumerate(phones):
510
+ if isinstance(letter, list):
511
+ curIndex = int(i / iterations) % len(letter)
512
+ if letter[curIndex] != "":
513
+ pronunciations[-1].append(letter[curIndex])
514
+ iterations = iterations * len(letter)
515
+ else:
516
+ if letter != "":
517
+ pronunciations[-1].append(letter)
518
+
519
+ # Iterate through each pronunciation to perform some house keeping. And append pronunciation to dictionary
520
+ # 1- Remove duplicate vowels
521
+ # 2- Remove duplicate y and w
522
+ for pronunciation in pronunciations:
523
+ prevLetter = ""
524
+ toDelete = []
525
+ for i in range(0, len(pronunciation)):
526
+ letter = pronunciation[i]
527
+ # Delete duplicate consecutive vowels
528
+ if (
529
+ letter in ["aa", "uu0", "ii0", "AA", "UU0", "II0"]
530
+ and prevLetter.lower() == letter[1:].lower()
531
+ ):
532
+ toDelete.append(i - 1)
533
+ pronunciation[i] = pronunciation[i - 1][0] + pronunciation[i - 1]
534
+ # Delete duplicates
535
+ if letter in ["u0", "i0"] and prevLetter.lower() == letter.lower():
536
+ toDelete.append(i - 1)
537
+ pronunciation[i] = pronunciation[i - 1]
538
+ if letter in ["y", "w"] and prevLetter == letter: # delete duplicate
539
+ pronunciation[i - 1] += pronunciation[i - 1]
540
+ toDelete.append(i)
541
+
542
+ prevLetter = letter
543
+ for i in reversed(range(0, len(toDelete))):
544
+ del pronunciation[toDelete[i]]
545
+
546
+ return pronunciations[0]
547
+
548
+
549
+ def process_utterance(utterance):
550
+
551
+ utterance = preprocess_utterance(utterance)
552
+ phonemes = []
553
+
554
+ for word in utterance:
555
+ if word in ["-", "sil"]:
556
+ phonemes.append(["sil"])
557
+ continue
558
+
559
+ phonemes_word = process_word(word)
560
+ if phonemes_word in punctuation and phonemes:
561
+ phonemes[-1] += phonemes_word
562
+ else:
563
+ phonemes.append(phonemes_word)
564
+
565
+ final_sequence = " + ".join(
566
+ " ".join(phon for phon in phones) for phones in phonemes
567
+ )
568
+
569
+ return final_sequence
@@ -0,0 +1,64 @@
1
+ PADDING_TOKEN = "_pad_"
2
+ EOS_TOKEN = "_eos_"
3
+ DOUBLING_TOKEN = "_dbl_"
4
+ SEPARATOR_TOKEN = "_+_"
5
+
6
+ EOS_TOKENS = [SEPARATOR_TOKEN, EOS_TOKEN]
7
+
8
+ symbols = [
9
+ # special tokens
10
+ PADDING_TOKEN, # padding
11
+ EOS_TOKEN, # eos-token
12
+ "_sil_", # silence
13
+ DOUBLING_TOKEN, # doubling
14
+ SEPARATOR_TOKEN, # word separator
15
+ # punctuation
16
+ ".",
17
+ ",",
18
+ "?",
19
+ "!",
20
+ ":",
21
+ ";",
22
+ "-",
23
+ ")",
24
+ "(",
25
+ '"',
26
+ # consonants
27
+ "<", # hamza
28
+ "b", # baa'
29
+ "t", # taa'
30
+ "^", # thaa'
31
+ "j", # jiim
32
+ "H", # Haa'
33
+ "x", # xaa'
34
+ "d", # daal
35
+ "*", # dhaal
36
+ "r", # raa'
37
+ "z", # zaay
38
+ "s", # siin
39
+ "$", # shiin
40
+ "S", # Saad
41
+ "D", # Daad
42
+ "T", # Taa'
43
+ "Z", # Zhaa'
44
+ "E", # 3ayn
45
+ "g", # ghain
46
+ "f", # faa'
47
+ "q", # qaaf
48
+ "k", # kaaf
49
+ "l", # laam
50
+ "m", # miim
51
+ "n", # nuun
52
+ "h", # haa'
53
+ "w", # waaw
54
+ "y", # yaa'
55
+ "v", # /v/ for loanwords e.g. in u'fydyw': u'v i0 d y uu1',
56
+ # vowels
57
+ "a", # short
58
+ "u",
59
+ "i",
60
+ "aa", # long
61
+ "uu",
62
+ "ii",
63
+ "aaaa",
64
+ ]