phoonnx 0.0.2a1__py3-none-any.whl → 0.0.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phoonnx/config.py CHANGED
@@ -6,7 +6,6 @@ from phoonnx.phoneme_ids import (load_phoneme_ids, BlankBetween,
6
6
  DEFAULT_BLANK_WORD_TOKEN, DEFAULT_BLANK_TOKEN,
7
7
  DEFAULT_PAD_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_EOS_TOKEN)
8
8
 
9
-
10
9
  DEFAULT_NOISE_SCALE = 0.667
11
10
  DEFAULT_LENGTH_SCALE = 1.0
12
11
  DEFAULT_NOISE_W_SCALE = 0.8
@@ -22,6 +21,8 @@ class Alphabet(str, Enum):
22
21
  UNICODE = "unicode"
23
22
  IPA = "ipa"
24
23
  ARPA = "arpa" # en
24
+ SAMPA = "sampa"
25
+ XSAMPA = "x-sampa"
25
26
  HANGUL = "hangul" # ko
26
27
  KANA = "kana" # ja
27
28
  HIRA = "hira" # ja
@@ -32,6 +33,7 @@ class Alphabet(str, Enum):
32
33
  ERAAB = "eraab" # fa
33
34
  COTOVIA = "cotovia" # gl
34
35
  HANZI = "hanzi" # zh
36
+ MANTOQ = "mantoq" # ar
35
37
 
36
38
 
37
39
 
phoonnx/phonemizers/ar.py CHANGED
@@ -5,7 +5,7 @@ from phoonnx.config import Alphabet
5
5
  class MantoqPhonemizer(BasePhonemizer):
6
6
 
7
7
  def __init__(self):
8
- super().__init__(Alphabet.IPA)
8
+ super().__init__(Alphabet.MANTOQ)
9
9
 
10
10
  @classmethod
11
11
  def get_lang(cls, target_lang: str) -> str:
phoonnx/phonemizers/gl.py CHANGED
@@ -12,6 +12,57 @@ class CotoviaError(Exception):
12
12
  pass
13
13
 
14
14
 
15
+ COTOVIA2IPA = {
16
+ "pau": " ",
17
+ "a": "a",
18
+ "E": "ɛ",
19
+ "e": "e",
20
+ "i": "i",
21
+ "j": "j",
22
+ "O": "ɔ",
23
+ "o": "o",
24
+ "u": "u",
25
+ "w": "w",
26
+ "p": "p",
27
+ "b": "b",
28
+ "B": "β",
29
+ "t": "t",
30
+ "d": "d",
31
+ "D": "ð",
32
+ "k": "k",
33
+ "g": "g",
34
+ "G": "ɣ",
35
+ "f": "f",
36
+ "T": "θ",
37
+ "s": "s",
38
+ "S": "ʃ",
39
+ "tS": "tʃ",
40
+ "m": "m",
41
+ "n": "n",
42
+ "J": "ɲ",
43
+ "N": "ŋ",
44
+ "l": "l",
45
+ "Z": "ʎ",
46
+ "jj": "ʎ",
47
+ "L": "ʎ",
48
+ "r": "ɾ",
49
+ "rr": "r",
50
+ "X": "x"
51
+ }
52
+
53
+
54
+ def cotovia2ipa(text: str) -> str:
55
+ """
56
+ Converts a string of Cotovía phonemes to IPA.
57
+ """
58
+ # Sort the dictionary keys by length in descending order to handle multi-character phonemes first
59
+ sorted_cotovia_keys = sorted(COTOVIA2IPA.keys(), key=len, reverse=True)
60
+ ipa_str = text
61
+ for cotovia_char in sorted_cotovia_keys:
62
+ ipa_str = ipa_str.replace(cotovia_char, COTOVIA2IPA[cotovia_char])
63
+ return ipa_str
64
+
65
+
15
66
  class CotoviaPhonemizer(BasePhonemizer):
16
67
  """
17
68
  A phonemizer class that uses the Cotovia TTS binary to convert text into phonemes.
@@ -19,7 +70,7 @@ class CotoviaPhonemizer(BasePhonemizer):
19
70
  regular expression transformations to clean and normalize the phonetic representation.
20
71
  """
21
72
 
22
- def __init__(self, cotovia_bin_path: Optional[str] = None):
73
+ def __init__(self, cotovia_bin_path: Optional[str] = None, alphabet: Alphabet = Alphabet.IPA):
23
74
  """
24
75
  Initializes the CotoviaPhonemizer.
25
76
 
@@ -31,7 +82,7 @@ class CotoviaPhonemizer(BasePhonemizer):
31
82
  if not os.path.exists(self.cotovia_bin):
32
83
  raise FileNotFoundError(f"Cotovia binary not found at {self.cotovia_bin}. "
33
84
  "Please ensure it's installed or provide the correct path.")
34
- super().__init__(Alphabet.COTOVIA)
85
+ super().__init__(alphabet)
35
86
 
36
87
  @classmethod
37
88
  def get_lang(cls, target_lang: str) -> str:
@@ -127,6 +178,8 @@ class CotoviaPhonemizer(BasePhonemizer):
127
178
  # substitute ' ( text )' to ', text,'
128
179
  str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \\2,", str_ext)
129
180
 
181
+ if self.alphabet == Alphabet.IPA:
182
+ return cotovia2ipa(str_ext)
130
183
  return str_ext
131
184
 
132
185
 
@@ -138,5 +191,5 @@ if __name__ == "__main__":
138
191
  lang = "gl"
139
192
  text_gl = "Este é un sistema de conversión de texto a voz en lingua galega baseado en redes neuronais artificiais. Ten en conta que as funcionalidades incluídas nesta páxina ofrécense unicamente con fins de demostración. Se tes algún comentario, suxestión ou detectas algún problema durante a demostración, ponte en contacto connosco."
140
193
  print(f"\n--- Getting phonemes for '{text_gl}' (Cotovia) ---")
141
- phonemes_cotovia = cotovia.phonemize(text_gl, lang)
194
+ phonemes_cotovia = cotovia.phonemize_string(text_gl, lang)
142
195
  print(f" Cotovia Phonemes: {phonemes_cotovia}")
phoonnx/version.py CHANGED
@@ -2,5 +2,5 @@
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 0
4
4
  VERSION_BUILD = 2
5
- VERSION_ALPHA = 1
5
+ VERSION_ALPHA = 2
6
6
  # END_VERSION_BLOCK
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.0.2a1
3
+ Version: 0.0.2a2
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -8,7 +8,7 @@ Requires-Dist: numpy
8
8
  Requires-Dist: onnxruntime
9
9
  Requires-Dist: quebra-frases
10
10
  Requires-Dist: langcodes
11
- Requires-Dist: ovos-number-parser>=0.3.3a1
11
+ Requires-Dist: ovos-number-parser>=0.4.0
12
12
  Requires-Dist: ovos-date-parser>=0.6.4a1
13
13
  Provides-Extra: aa
14
14
  Requires-Dist: epitran; extra == "aa"
@@ -214,6 +214,12 @@ Provides-Extra: tpi
214
214
  Requires-Dist: epitran; extra == "tpi"
215
215
  Provides-Extra: tr
216
216
  Requires-Dist: epitran; extra == "tr"
217
+ Provides-Extra: train
218
+ Requires-Dist: cython<1,>=0.29.0; extra == "train"
219
+ Requires-Dist: librosa<1,>=0.9.2; extra == "train"
220
+ Requires-Dist: numpy<2,>=1.19.0; extra == "train"
221
+ Requires-Dist: pytorch-lightning<2.0; extra == "train"
222
+ Requires-Dist: torch<2,>=1.11.0; extra == "train"
217
223
  Provides-Extra: uew
218
224
  Requires-Dist: epitran; extra == "uew"
219
225
  Provides-Extra: ug
@@ -1,19 +1,19 @@
1
1
  phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- phoonnx/config.py,sha256=bO7dx2tfLotkohict3UKlCEVm-BRFB1feYYR1HarUkk,19382
2
+ phoonnx/config.py,sha256=81H34oPG2BaiOA6UM1KapoT341n068LqRprKb5ER6mY,19451
3
3
  phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
4
4
  phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
5
- phoonnx/version.py,sha256=tnwL6H7F2rKiYfzZqG80Wt82MT0mHbzZ38RjwaJcsgs,114
5
+ phoonnx/version.py,sha256=E5lPoALEg4bqb4bmnVYnseCgozkzjZGnhpUom_OlcQI,114
6
6
  phoonnx/voice.py,sha256=FR_LafK1vSi_anPERJjZBuH3Bb9vUIof0MAW6TnALlA,20024
7
7
  phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
8
8
  phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
9
9
  phoonnx/locale/gl/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
10
10
  phoonnx/locale/pt/phonetic_spellings.txt,sha256=KntS8QMynEJ5A3Clvcjq4qlmL-ThSbhfD6v0nKSrlqs,49
11
11
  phoonnx/phonemizers/__init__.py,sha256=QGBZk0QUgJdg2MwUWY9Kpk6ucwrEJYtHb07YcNvXCV4,1647
12
- phoonnx/phonemizers/ar.py,sha256=W3wJ_GI8gV3qb9fng5xeazX8ZlY-aNSywATTdxryiZg,1257
12
+ phoonnx/phonemizers/ar.py,sha256=rPAMGPlyXOlKY99IoHe_vWcLllepiG0YFUbOBia075E,1260
13
13
  phoonnx/phonemizers/base.py,sha256=yPg6-dvscYpl3rR3JEULG1PRF-i8DWC_C3HAZGLbxOo,7648
14
14
  phoonnx/phonemizers/en.py,sha256=N2SVoVhplQao7Ej5TXbxJU-YkAgkY0Fr9iYBFnsjFSE,9271
15
15
  phoonnx/phonemizers/fa.py,sha256=d_DZM2wqomf4gcRH_rFcNA3VkQWKHru8vwBwaNG8Ll8,1452
16
- phoonnx/phonemizers/gl.py,sha256=05DX_9lQa3Uws3RPcWPtw0nXX6Vo3bybrZkpMErguTo,5529
16
+ phoonnx/phonemizers/gl.py,sha256=jEFKJJViHufZtB7lGNwWQCdWGiNKDCVZ_GRYXTaw_2c,6614
17
17
  phoonnx/phonemizers/he.py,sha256=KbRI3XRZa8UtJdNWmn_fd-t5lmFSIp4Mw8UgcO5l-Po,2211
18
18
  phoonnx/phonemizers/ja.py,sha256=Xojsrt715ihnIiEk9K6giYqDo9Iykw-SHfIidrHtHSU,3834
19
19
  phoonnx/phonemizers/ko.py,sha256=kwWoOFqanCB8kv2JRx17A0hP78P1wbXlX6e8VBn1ezQ,2989
@@ -70,7 +70,7 @@ phoonnx_train/vits/attentions.py,sha256=yc_ViF8zR8z68DzphmVVVn27f9xK_5wi8S4ITLXV
70
70
  phoonnx_train/vits/commons.py,sha256=JsD8CdZ3ZcYYubYhw8So5hICBziFlCrKLrv1lMDRCDM,4645
71
71
  phoonnx_train/vits/config.py,sha256=oSuUIhw9Am7BQ5JwDgtCO-P1zRyN7nPgR-U1XuncJls,10789
72
72
  phoonnx_train/vits/dataset.py,sha256=DLLGSCkn3GF9uktoTprH1ERblZ18GO6-QsClQKWa98o,6804
73
- phoonnx_train/vits/lightning.py,sha256=iJJNAT1-jm0wYvTfBy9T_eZapDN62Pny9YdHEpnApso,12298
73
+ phoonnx_train/vits/lightning.py,sha256=ZBuSIiJ7EUU1Za2V8Uh6-_HGGRW_qwpXLLs1cEDirHA,12301
74
74
  phoonnx_train/vits/losses.py,sha256=j-uINhBcYxVXFvFutiewQpTuw-qF-J6M6hdJVeOKqNE,1401
75
75
  phoonnx_train/vits/mel_processing.py,sha256=huIjbQgewSmM39hdzRZvZUCI7fTNSMmLcAv3f8zYb8k,3956
76
76
  phoonnx_train/vits/models.py,sha256=9PziprRtkdBQ6AowFe1vG4QTCk02By-LDS9W8EtZGvE,24303
@@ -80,7 +80,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
80
80
  phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
81
81
  phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
82
82
  phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
83
- phoonnx-0.0.2a1.dist-info/METADATA,sha256=i-4sLAwReU6JT44xM2JVBcAfKP0Rv9mMpWpiEdNUX7U,7868
84
- phoonnx-0.0.2a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
- phoonnx-0.0.2a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
86
- phoonnx-0.0.2a1.dist-info/RECORD,,
83
+ phoonnx-0.0.2a2.dist-info/METADATA,sha256=-uMrs5iV1A4gqenGg1r8ZyhZN1DhBd9HCsezIy4UxPE,8145
84
+ phoonnx-0.0.2a2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
85
+ phoonnx-0.0.2a2.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
86
+ phoonnx-0.0.2a2.dist-info/RECORD,,
@@ -299,9 +299,9 @@ class VitsModel(pl.LightningModule):
299
299
  test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max())))
300
300
 
301
301
  tag = test_utt.text or str(utt_idx)
302
- self.logger.experiment.add_audio(
303
- tag, test_audio, sample_rate=self.hparams.sample_rate
304
- )
302
+ # self.logger.experiment.add_audio(
303
+ # tag, test_audio, sample_rate=self.hparams.sample_rate
304
+ # )
305
305
 
306
306
  return val_loss
307
307