phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,105 @@
1
+ from .phonetise_buckwalter import (arabic_to_buckwalter, buckwalter_to_arabic,
2
+ process_utterance)
3
+ from .symbols import DOUBLING_TOKEN, EOS_TOKEN, SEPARATOR_TOKEN, symbols
4
+
5
+ vowels = [
6
+ "aa",
7
+ "AA",
8
+ "uu0",
9
+ "uu1",
10
+ "UU0",
11
+ "UU1",
12
+ "ii0",
13
+ "ii1",
14
+ "II0",
15
+ "II1",
16
+ "a",
17
+ "A",
18
+ "u0",
19
+ "u1",
20
+ "U0",
21
+ "U1",
22
+ "i0",
23
+ "i1",
24
+ "I0",
25
+ "I1",
26
+ ]
27
+
28
+ vowel_map = {
29
+ "aa": "aa",
30
+ "AA": "aa",
31
+ "uu0": "uu",
32
+ "uu1": "uu",
33
+ "UU0": "uu",
34
+ "UU1": "uu",
35
+ "ii0": "ii",
36
+ "ii1": "ii",
37
+ "II0": "ii",
38
+ "II1": "ii",
39
+ "a": "a",
40
+ "A": "a",
41
+ "u0": "u",
42
+ "u1": "u",
43
+ "U0": "u",
44
+ "U1": "u",
45
+ "i0": "i",
46
+ "i1": "i",
47
+ "I0": "i",
48
+ "I1": "i",
49
+ }
50
+
51
+ phon_to_id_ = {phon: i for i, phon in enumerate(symbols)}
52
+
53
+
54
+ def tokens_to_ids(phonemes, phon_to_id=None):
55
+ if phon_to_id is None:
56
+ return [phon_to_id_[phon] for phon in phonemes]
57
+ return [phon_to_id[phon] for phon in phonemes]
58
+
59
+
60
+ def ids_to_tokens(ids):
61
+ return [symbols[id] for id in ids]
62
+
63
+
64
+ def arabic_to_phonemes(arabic):
65
+ buckw = arabic_to_buckwalter(arabic)
66
+ return process_utterance(buckw)
67
+
68
+
69
+ def buckwalter_to_phonemes(buckw):
70
+ return process_utterance(buckw)
71
+
72
+
73
+ def phonemes_to_tokens(phonemes: str, append_space=False):
74
+ phonemes = phonemes.replace("sil", "").replace("+", "_+_").split()
75
+ for i, phon in enumerate(phonemes):
76
+ if len(phon) == 2 and phon not in vowels and phon[0] == phon[1]:
77
+ phonemes[i] = phon[0]
78
+ phonemes.insert(i + 1, DOUBLING_TOKEN)
79
+ if phonemes[i] in vowels:
80
+ phonemes[i] = vowel_map[phonemes[i]]
81
+
82
+ if append_space:
83
+ phonemes.append(SEPARATOR_TOKEN)
84
+
85
+ phonemes.append(EOS_TOKEN)
86
+
87
+ return phonemes
88
+
89
+
90
+ def buckwalter_to_tokens(buckw, append_space=False):
91
+ phonemes = buckwalter_to_phonemes(buckw)
92
+ tokens = phonemes_to_tokens(phonemes, append_space=append_space)
93
+ return tokens
94
+
95
+
96
+ def arabic_to_tokens(arabic, append_space=False):
97
+ buckw = arabic_to_buckwalter(arabic)
98
+ tokens = buckwalter_to_tokens(buckw, append_space=append_space)
99
+ return tokens
100
+
101
+
102
+ def simplify_phonemes(phonemes):
103
+ for k, v in vowel_map.items():
104
+ phonemes = phonemes.replace(k, v)
105
+ return phonemes
@@ -0,0 +1,37 @@
1
+ import re
2
+ from functools import partial
3
+
4
+ from phoonnx.thirdparty.mantoq.pyarabic import araby
5
+ from phoonnx.thirdparty.mantoq.pyarabic import number as arnum
6
+ from phoonnx.thirdparty.mantoq.pyarabic.trans import normalize_digits
7
+
8
+ NUM_REGEX = re.compile(r"\d+")
9
+ PERCENT_NO_DIAC = "بالمئة"
10
+ PERCENT_DIAC = "بِالْمِئَة"
11
+
12
+
13
+ def _convert_num2words(m: re.Match, *, apply_tashkeel):
14
+ number = m.group(0)
15
+ word_representation = arnum.number2text(number)
16
+ if apply_tashkeel:
17
+ return " ".join(arnum.pre_tashkeel_number(word_representation.split(" ")))
18
+ return word_representation
19
+
20
+
21
+ def num2words(text: str, handle_percent=True, apply_tashkeel: bool = True) -> str:
22
+ """
23
+ Converts numbers in `text` to Arabic words.
24
+ Simple conversion. Does not check if the number is date/currency...etc.
25
+
26
+ Args:
27
+ text: input text that may contain numbers
28
+ apply_tashkeel: diacritize added words
29
+ """
30
+ text = normalize_digits(text)
31
+ output = NUM_REGEX.sub(
32
+ partial(_convert_num2words, apply_tashkeel=apply_tashkeel), text
33
+ )
34
+ if handle_percent:
35
+ replacement = PERCENT_DIAC if apply_tashkeel else PERCENT_NO_DIAC
36
+ output = output.replace("%", f" {replacement}")
37
+ return araby.fix_spaces(output)
@@ -0,0 +1,12 @@
1
+ # PyArabic -- Regression testing
2
+ #
3
+ # Copyright (C) 2010 Taha Zerrouki
4
+ # Author: Taha Zerrouki <taha zerrouki at gawab dot com>
5
+ # URL: <http://pyarabic.sf.net>
6
+ #
7
+ # $Id: __init__.py :
8
+
9
+ """
10
+ Main.
11
+ """
12
+ __docformat__ = "epytext en"
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ Improve repr predifined function to best display of objects containing unicode
5
+ Unicode represention texts
6
+ @author: Taha Zerrouki
7
+ @contact: taha dot zerrouki at gmail dot com
8
+ @copyright: Taha Zerrouki
9
+ @license: GPL
10
+ @date:2014/03/01
11
+ @version: 0.1
12
+ """
13
+ from __future__ import (absolute_import, division, print_function,
14
+ unicode_literals)
15
+
16
+ import sys
17
+
18
+ if sys.version_info < (3, 0):
19
+ import repr as reprlib
20
+ else:
21
+ import reprlib
22
+
23
+
24
+ class ArabicRepr(reprlib.Repr):
25
+ """A redifinition of repr fucntion,
26
+ you can use it like this
27
+
28
+ Example:
29
+ >>> import pyarabic.arabrepr as arabrepr
30
+ >>> arepr = arabrepr.ArabicRepr()
31
+ >>> repr = arepr.repr
32
+ >>> word = u"السلام عليكم ورحمة الله"
33
+ >>> wordlist = word.split(" ")
34
+ >>> print wordlist
35
+ [u'\u0627\u0644\u0633\u0644\u0627\u0645',
36
+ u'\u0639\u0644\u064a\u0643\u0645',
37
+ u'\u0648\u0631\u062d\u0645\u0629',
38
+ u'\u0627\u0644\u0644\u0647']
39
+ >>> print repr(wordlist)
40
+ [u'السلام', u'عليكم', u'ورحمة', u'الله']
41
+ """
42
+
43
+ def repr_unicode(self, obj, level):
44
+ "Modify unicode display"
45
+ return "u'%s'" % obj
46
+
47
+ def arepr_unicode(self, obj, level):
48
+ "Modify unicode display"
49
+ return "u'%s'" % obj
50
+
51
+
52
+ if sys.version_info < (3, 0):
53
+
54
+ def arepr(data):
55
+ """display a dict with arabic text properly"""
56
+ return repr(data).replace("},", "},\n").decode("unicode-escape").encode("utf8")
57
+
58
+ else:
59
+
60
+ def arepr(data):
61
+ """display a dict with arabic text properly"""
62
+ return repr(data).replace(
63
+ "},", "},\n"
64
+ ) # .decode('unicode-escape').encode('utf8')