phoonnx 0.1.0a1__tar.gz → 0.1.1a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/PKG-INFO +94 -93
  2. phoonnx-0.1.1a1/README.md +128 -0
  3. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/config.py +11 -1
  4. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/__init__.py +4 -1
  5. phoonnx-0.1.1a1/phoonnx/phonemizers/ar.py +98 -0
  6. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/base.py +27 -1
  7. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/he.py +6 -25
  8. phoonnx-0.1.1a1/phoonnx/phonemizers/mul.py +1219 -0
  9. phoonnx-0.1.1a1/phoonnx/phonemizers/mwl.py +36 -0
  10. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/hangul2ipa.py +1 -0
  11. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/__init__.py +1 -26
  12. phoonnx-0.1.1a1/phoonnx/thirdparty/phonikud/__init__.py +24 -0
  13. phoonnx-0.1.1a1/phoonnx/version.py +10 -0
  14. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/voice.py +4 -16
  15. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx.egg-info/PKG-INFO +94 -93
  16. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx.egg-info/SOURCES.txt +4 -0
  17. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx.egg-info/requires.txt +4 -0
  18. phoonnx-0.1.1a1/phoonnx_train/export_onnx.py +360 -0
  19. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/preprocess.py +36 -9
  20. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/dataset.py +4 -0
  21. phoonnx-0.1.0a1/README.md +0 -73
  22. phoonnx-0.1.0a1/phoonnx/phonemizers/ar.py +0 -106
  23. phoonnx-0.1.0a1/phoonnx/phonemizers/mul.py +0 -606
  24. phoonnx-0.1.0a1/phoonnx/version.py +0 -6
  25. phoonnx-0.1.0a1/phoonnx_train/export_onnx.py +0 -109
  26. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/__init__.py +0 -0
  27. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
  28. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
  29. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
  30. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
  31. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phoneme_ids.py +0 -0
  32. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/en.py +0 -0
  33. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/fa.py +0 -0
  34. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/gl.py +0 -0
  35. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/ja.py +0 -0
  36. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/ko.py +0 -0
  37. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/vi.py +0 -0
  38. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/phonemizers/zh.py +0 -0
  39. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/__init__.py +0 -0
  40. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/arpa2ipa.py +0 -0
  41. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/bw2ipa.py +0 -0
  42. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  43. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  44. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
  45. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
  46. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
  47. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
  48. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
  49. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
  50. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
  51. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
  52. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
  53. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
  54. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  55. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
  56. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
  57. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
  58. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
  59. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
  60. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
  61. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
  62. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
  63. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
  64. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
  65. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
  66. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
  67. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
  68. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
  69. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
  70. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
  71. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
  72. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
  73. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
  74. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  75. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
  76. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/thirdparty/zh_num.py +0 -0
  77. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx/util.py +0 -0
  78. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx.egg-info/dependency_links.txt +0 -0
  79. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx.egg-info/top_level.txt +0 -0
  80. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/__main__.py +0 -0
  81. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/norm_audio/__init__.py +0 -0
  82. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/norm_audio/trim.py +0 -0
  83. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/norm_audio/vad.py +0 -0
  84. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/__init__.py +0 -0
  85. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/attentions.py +0 -0
  86. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/commons.py +0 -0
  87. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/config.py +0 -0
  88. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/lightning.py +0 -0
  89. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/losses.py +0 -0
  90. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/mel_processing.py +0 -0
  91. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/models.py +0 -0
  92. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/modules.py +0 -0
  93. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
  94. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
  95. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/transforms.py +0 -0
  96. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/utils.py +0 -0
  97. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/phoonnx_train/vits/wavfile.py +0 -0
  98. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/setup.cfg +0 -0
  99. {phoonnx-0.1.0a1 → phoonnx-0.1.1a1}/setup.py +0 -0
@@ -1,111 +1,112 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.1.0a1
3
+ Version: 0.1.1a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
7
- Provides-Extra: ceb
8
- Provides-Extra: ka
9
- Provides-Extra: cjy
10
- Provides-Extra: th
11
- Provides-Extra: ta
12
- Provides-Extra: ti
7
+ Provides-Extra: cs
8
+ Provides-Extra: mr
9
+ Provides-Extra: tpi
10
+ Provides-Extra: so
13
11
  Provides-Extra: uz
14
- Provides-Extra: ca
12
+ Provides-Extra: de
13
+ Provides-Extra: th
14
+ Provides-Extra: ka
15
+ Provides-Extra: uk
16
+ Provides-Extra: en
17
+ Provides-Extra: uew
18
+ Provides-Extra: kbd
19
+ Provides-Extra: hr
20
+ Provides-Extra: sv
15
21
  Provides-Extra: ko
16
- Provides-Extra: quy
17
- Provides-Extra: ilo
18
- Provides-Extra: nan
19
- Provides-Extra: jv
20
- Provides-Extra: gan
21
- Provides-Extra: mr
22
- Provides-Extra: aa
23
- Provides-Extra: bn
24
- Provides-Extra: am
25
- Provides-Extra: rn
26
- Provides-Extra: ff
27
- Provides-Extra: km
28
- Provides-Extra: ha
29
- Provides-Extra: lv
30
- Provides-Extra: si
31
- Provides-Extra: ar
32
- Provides-Extra: cs
33
- Provides-Extra: rw
34
- Provides-Extra: mi
35
- Provides-Extra: wuu
36
- Provides-Extra: nl
37
- Provides-Extra: hak
38
- Provides-Extra: mt
22
+ Provides-Extra: got
23
+ Provides-Extra: id
24
+ Provides-Extra: sn
39
25
  Provides-Extra: ug
40
- Provides-Extra: ku
41
- Provides-Extra: hr
42
- Provides-Extra: tr
43
- Provides-Extra: za
44
- Provides-Extra: csb
45
- Provides-Extra: te
46
- Provides-Extra: ur
26
+ Provides-Extra: az
27
+ Provides-Extra: mt
28
+ Provides-Extra: pl
29
+ Provides-Extra: ceb
30
+ Provides-Extra: ky
31
+ Provides-Extra: sq
32
+ Provides-Extra: ny
33
+ Provides-Extra: ti
34
+ Provides-Extra: sg
35
+ Provides-Extra: et
36
+ Provides-Extra: eo
47
37
  Provides-Extra: tk
48
- Provides-Extra: lb
49
- Provides-Extra: ja
50
- Provides-Extra: es
51
- Provides-Extra: lo
52
- Provides-Extra: id
53
- Provides-Extra: pa
54
- Provides-Extra: it
38
+ Provides-Extra: yue
55
39
  Provides-Extra: luy
56
- Provides-Extra: de
57
- Provides-Extra: sn
58
- Provides-Extra: fi
59
- Provides-Extra: gl
60
- Provides-Extra: sr
61
- Provides-Extra: so
62
- Provides-Extra: yo
63
- Provides-Extra: or
64
- Provides-Extra: my
65
- Provides-Extra: xh
66
- Provides-Extra: fr
40
+ Provides-Extra: jam
41
+ Provides-Extra: hsn
42
+ Provides-Extra: fil
43
+ Provides-Extra: ml
44
+ Provides-Extra: mwl
45
+ Provides-Extra: lez
46
+ Provides-Extra: si
47
+ Provides-Extra: kk
67
48
  Provides-Extra: hi
68
- Provides-Extra: sg
69
- Provides-Extra: eu
70
- Provides-Extra: got
71
- Provides-Extra: tpi
72
- Provides-Extra: pt
73
- Provides-Extra: ckb
49
+ Provides-Extra: quy
50
+ Provides-Extra: zh
51
+ Provides-Extra: ru
52
+ Provides-Extra: aii
74
53
  Provides-Extra: train
75
- Provides-Extra: lsm
76
- Provides-Extra: fa
77
- Provides-Extra: av
78
- Provides-Extra: kk
79
- Provides-Extra: ky
80
- Provides-Extra: lez
81
- Provides-Extra: eo
82
- Provides-Extra: uew
83
- Provides-Extra: vi
54
+ Provides-Extra: my
55
+ Provides-Extra: yo
56
+ Provides-Extra: cjy
57
+ Provides-Extra: es
84
58
  Provides-Extra: he
59
+ Provides-Extra: sr
60
+ Provides-Extra: fr
61
+ Provides-Extra: ro
85
62
  Provides-Extra: hmn
86
- Provides-Extra: hsn
87
- Provides-Extra: et
88
63
  Provides-Extra: lt
89
- Provides-Extra: ro
64
+ Provides-Extra: ckb
65
+ Provides-Extra: tg
66
+ Provides-Extra: vi
67
+ Provides-Extra: hu
68
+ Provides-Extra: pt
69
+ Provides-Extra: ms
70
+ Provides-Extra: mi
71
+ Provides-Extra: pa
72
+ Provides-Extra: za
73
+ Provides-Extra: lv
74
+ Provides-Extra: am
75
+ Provides-Extra: ca
76
+ Provides-Extra: om
77
+ Provides-Extra: rw
78
+ Provides-Extra: nl
79
+ Provides-Extra: lsm
80
+ Provides-Extra: bn
81
+ Provides-Extra: fa
82
+ Provides-Extra: eu
83
+ Provides-Extra: te
84
+ Provides-Extra: km
90
85
  Provides-Extra: lij
86
+ Provides-Extra: tr
87
+ Provides-Extra: ku
88
+ Provides-Extra: lb
89
+ Provides-Extra: ta
90
+ Provides-Extra: av
91
+ Provides-Extra: ha
92
+ Provides-Extra: ff
93
+ Provides-Extra: fi
91
94
  Provides-Extra: sw
92
- Provides-Extra: az
93
- Provides-Extra: ms
94
- Provides-Extra: sv
95
- Provides-Extra: hu
95
+ Provides-Extra: ilo
96
+ Provides-Extra: ja
97
+ Provides-Extra: ar
96
98
  Provides-Extra: zu
97
- Provides-Extra: kbd
98
- Provides-Extra: jam
99
- Provides-Extra: sq
100
- Provides-Extra: tg
101
- Provides-Extra: ru
102
- Provides-Extra: ny
103
- Provides-Extra: yue
104
- Provides-Extra: zh
105
- Provides-Extra: uk
106
- Provides-Extra: om
107
- Provides-Extra: en
108
- Provides-Extra: aii
109
- Provides-Extra: fil
110
- Provides-Extra: pl
111
- Provides-Extra: ml
99
+ Provides-Extra: xh
100
+ Provides-Extra: lo
101
+ Provides-Extra: gl
102
+ Provides-Extra: aa
103
+ Provides-Extra: it
104
+ Provides-Extra: ur
105
+ Provides-Extra: hak
106
+ Provides-Extra: wuu
107
+ Provides-Extra: csb
108
+ Provides-Extra: nan
109
+ Provides-Extra: jv
110
+ Provides-Extra: or
111
+ Provides-Extra: gan
112
+ Provides-Extra: rn
@@ -0,0 +1,128 @@
1
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TigreGotico/phoonnx)
2
+
3
+ # Phoonnx
4
+
5
+ A Python library for multilingual phonemization and Text-to-Speech (TTS) using ONNX models.
6
+
7
+ ## Introduction
8
+
9
+ `phoonnx` is a comprehensive toolkit for performing high-quality, efficient TTS inference using ONNX-compatible models.
10
+ It provides a flexible framework for text normalization, phonemization, and speech synthesis, with built-in support for
11
+ multiple languages and phonemic alphabets. The library is also designed to work with models trained using
12
+ `phoonnx_train`, including utilities for dataset preprocessing and exporting models to the ONNX format.
13
+
14
+ ## Features
15
+
16
+ - **Efficient Inference:** Leverages `onnxruntime` for fast and efficient TTS synthesis.
17
+ - **Multilingual Support:** Supports a wide range of languages and phonemic alphabets, including IPA, ARPA, Hangul (
18
+ Korean), and Pinyin (Chinese).
19
+ - **Multiple Phonemizers:** Integrates with various phonemizers like eSpeak, Gruut, and Epitran to convert text to
20
+ phonemes.
21
+ - **Advanced Text Normalization:** Includes robust utilities for expanding contractions and pronouncing numbers and
22
+ dates.
23
+ - **Dataset Preprocessing:** Provides a command-line tool to prepare LJSpeech-style datasets for training.
24
+ - **Model Export:** A script is included to convert trained models into the ONNX format, ready for deployment.
25
+
26
+ ## Installation
27
+
28
+ As `phoonnx` is available on PyPI, you can install it using pip.
29
+
30
+ ```bash
31
+ pip install phoonnx
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ ### Synthesizing Speech
37
+
38
+ The main component for inference is the `TTSVoice` class. You can load a model and synthesize speech from text as
39
+ follows:
40
+
41
+ ```python
42
+ import wave
43
+
44
+ from phoonnx.config import VoiceConfig, SynthesisConfig
45
+ from phoonnx.voice import TTSVoice
46
+
47
+ # Load a pre-trained ONNX model and its configuration
48
+ voice = TTSVoice.load("model.onnx", "config.json")
49
+
50
+ # Configure the synthesis parameters (optional)
51
+ synthesis_config = SynthesisConfig(
52
+ noise_scale=0.667,
53
+ length_scale=1.0,
54
+ noise_w_scale=0.8,
55
+ enable_phonetic_spellings=True, # apply pronunciation fixes, see "locale" folder in this repo
56
+ add_diacritics=False # for arabic and hebrew
57
+ )
58
+
59
+ # Synthesize audio from text
60
+ text = "Hello, this is a test of the phoonnx library."
61
+ slug = f"phoonnx_{voice.config.phoneme_type.value}_{voice.config.lang_code}"
62
+ with wave.open(f"{slug}.wav", "wb") as wav_file:
63
+ voice.synthesize_wav(text, wav_file, synthesis_config)
64
+
65
+ ```
66
+
67
+ ### Training
68
+
69
+ See the dedicated [training.md](/TRAINING.md)
70
+
71
+ ## Supported Phonemizers
72
+
73
+ `phoonnx` leverages several external Grapheme-to-Phoneme (G2P) and text-processing libraries to provide flexible and
74
+ high-quality phonemization across many languages.
75
+
76
+ You should prefer phonemizers trained on full sentences vs individual words if available
77
+
78
+ The core phonemizer classes are summarized in the table below, listing the supported languages, the source library they
79
+ wrap, and the output alphabets they can generate.
80
+
81
+ ***
82
+
83
+ | Language(s) | Phonemizer Class | Source/Library | Output Alphabets |
84
+ |:--------------------|:-----------------------|:-------------------------------------------------------------------------------------------------------------------|:-----------------------------|
85
+ | **Multilingual** | `ByT5Phonemizer` | [OpenVoiceOS ByT5](https://huggingface.co/collections/OpenVoiceOS/g2p-models-6886a8d612825c3fe65befa0) ONNX Models | IPA | High-quality, model-based G2P for an extensive list of languages. |
86
+ | **Multilingual** | `CharsiuPhonemizer` | [Charsiu](https://github.com/lingjzhu/CharsiuG2P) ByT5 ONNX Model | IPA | Very extensive multilingual support, including many regional dialects and variants (e.g., `eng-uk`, `spa-me`, `zho-s`). |
87
+ | **Multilingual** | `EspeakPhonemizer` | `espeak-ng` command-line tool | IPA | Broad language coverage, relying on the widely-used `espeak-ng` engine. |
88
+ | **Multilingual** | `GruutPhonemizer` | [gruut](https://github.com/rhasspy/gruut) | IPA | A tokenizer, text cleaner, and IPA phonemizer for several human languages that supports SSML. |
89
+ | **Multilingual** | `MisakiPhonemizer` | [misaki](https://github.com/hexgrad/misaki) | IPA | Misaki is a G2P engine designed for Kokoro models. |
90
+ | **Multilingual** | `TransphonePhonemizer` | [transphone](https://github.com/xinjli/transphone) | IPA | It provides approximated phoneme tokenizers and G2P model for 7546 languages registered in the Glottolog database. |
91
+ | **Multilingual** | `EpitranPhonemizer` | [epitran](https://github.com/dmort27/epitran) | IPA | A tool for transcribing orthographic text as IPA |
92
+ | **Mirandese (mwl)** | `MirandesePhonemizer` | [mwl_phonemizer](https://github.com/TigreGotico/mwl_phonemizer) | IPA | A tool for transcribing orthographic text as IPA |
93
+ | **Arabic (ar)** | `MantoqPhonemizer` | [mantoq](https://github.com/mush42/mantoq) | BUCKWALTER, IPA | Translates unvoweled Arabic to phonemes, with optional conversion to IPA. |
94
+ | **Chinese (zh)** | `JiebaPhonemizer` | [jieba](https://github.com/fxsjy/jieba) | HANZI | Segments Chinese text into words with spaces; useful for pre-processing. |
95
+ | **Chinese (zh)** | `G2pMPhonemizer` | [g2pC](https://github.com/Kyubyong/g2pC) | IPA, Pinyin | CRF-based Grapheme-to-Phoneme converter |
96
+ | **Chinese (zh)** | `G2pMPhonemizer` | [g2pm](https://github.com/kakaobrain/g2pm) | IPA, Pinyin | A Neural Grapheme-to-Phoneme Conversion Package for Mandarin Chinese |
97
+ | **Chinese (zh)** | `XpinyinPhonemizer` | [xpinyin](https://github.com/lxneng/xpinyin) | IPA, Pinyin | basic pinyin generator with optional tone marks |
98
+ | **Chinese (zh)** | `PypinyinPhonemizer` | [pypinyin](https://github.com/rainforest32/pypinyin) | IPA, Pinyin | comprehensive and accurate pinyin library |
99
+ | **English (en)** | `G2PEnPhonemizer` | [g2pE](https://github.com/Kyubyong/g2p) | IPA | A deep learning seq2seq framework based on TensorFlow |
100
+ | **English (en)** | `OpenPhonemizer` | [OpenPhonemizer](https://github.com/NeuralVox/OpenPhonemizer) | IPA | IPA Phonemizer powered by deep learning. This Phonemizer attempts to replicate the espeak Phonemizer while remaining permissively-licensed. |
101
+ | **English (en)** | `DeepPhonemizer` | [DeepPhonemizer](https://github.com/spring-media/DeepPhonemizer) | IPA / ARPA | Uses pre-trained deep learning models for English. |
102
+ | **Galician (gl)** | `CotoviaPhonemizer` | [cotovia](https://github.com/TigreGotico/cotovia-mirror) | IPA, Native Cotovia Phonemes | Relies on the `cotovia`executable for Galician phonemization. |
103
+ | **Hebrew (he)** | `PhonikudPhonemizer` | [phonikud](https://github.com/thewh1teagle/phonikud) | IPA | Converts Hebrew text to IPA phonemes. |
104
+ | **Japanese (ja)** | `OpenJTaklPhonemizer` | [pyopenjtalk](https://github.com/r9y9/pyopenjtalk) | HEPBURN, KANA | High-quality Japanese G2P. |
105
+ | **Japanese (ja)** | `CutletPhonemizer` | [cutlet](https://github.com/polm/cutlet) | HEPBURN, KUNREI, NIHON | Provides various Romanization standards. |
106
+ | **Japanese (ja)** | `PyKakasiPhonemizer` | [pykakasi](https://codeberg.org/miurahr/pykakasi) | HEPBURN, KANA, HIRA | Romanization and Kana conversion. |
107
+ | **Korean (ko)** | `G2PKPhonemizer` | [g2pK](https://github.com/Kyubyong/g2pK) | IPA, HANGUL | Provides G2P for Korean, with optional IPA conversion. |
108
+ | **Korean (ko)** | `KoG2PPhonemizer` | [KoG2P](https://github.com/scarletcho/KoG2P) | IPA, HANGUL | Provides G2P for Korean, with optional IPA conversion. |
109
+ | **Persian (fa)** | `PersianPhonemizer` | [persian_phonemizer](https://github.com/de-mh/persian_phonemizer) | ERAAB, IPA | Supports both standard IPA and the native ERAAB (diacritical) representations. |
110
+ | **Vietnamese (vi)** | `VIPhonemePhonemizer` | [Viphoneme](https://github.com/v-nhandt21/Viphoneme) | IPA | Uses the `viphoneme` library for Vietnamese G2P. |
111
+
112
+ ### Credits
113
+
114
+ Phoonnx is built in the shoulders of giants
115
+
116
+ - [jaywalnut310/vits](https://github.com/jaywalnut310/vits) - the original VITS implementation, the back-bone architecture of phoonnx models
117
+ - [MycroftAI/mimic3](https://github.com/MycroftAI/mimic3) and [rhasspy/piper](https://github.com/rhasspy/piper) - for inspiration and reference implementation of a phonemizer for pre-processing inputs
118
+
119
+ Individual languages greatly benefit from domain-specific knowledge, for convenience phoonnx also bundles code from
120
+
121
+ - [uvigo/cotovia](https://github.com/TigreGotico/cotovia-mirror) for galician phonemization (pre-compiled binaries bundled)
122
+ - [mush42/mantoq](https://github.com/mush42/mantoq) for arabic phonemization
123
+ - [mush42/libtashkeel](https://github.com/mush42/libtashkeel) for arabic diacritics
124
+ - [scarletcho/KoG2P](https://github.com/scarletcho/KoG2P) for korean phonemization
125
+ - [stannam/hangul_to_ipa](https://github.com/stannam/hangul_to_ipa) a converter from Hangul to IPA
126
+ - [chorusai/arpa2ipa](https://github.com/chorusai/arpa2ipa) a converter from Arpabet to IPA
127
+ - [PaddlePaddle/PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/8097a56be811a540f4f62a95a9094296c374351a/paddlespeech/t2s/frontend/zh_normalization/) for chinese number verbalization
128
+
@@ -33,7 +33,7 @@ class Alphabet(str, Enum):
33
33
  ERAAB = "eraab" # fa
34
34
  COTOVIA = "cotovia" # gl
35
35
  HANZI = "hanzi" # zh
36
- MANTOQ = "mantoq" # ar
36
+ BUCKWALTER = "buckwalter" # ar
37
37
 
38
38
 
39
39
 
@@ -48,6 +48,8 @@ class PhonemeType(str, Enum):
48
48
  EPITRAN = "epitran"
49
49
  BYT5 = "byt5"
50
50
  CHARSIU = "charsiu" # technically same as byt5, but needs special handling for whitespace
51
+ TRANSPHONE = "transphone"
52
+ MIRANDESE = "mwl_phonemizer"
51
53
 
52
54
  DEEPPHONEMIZER = "deepphonemizer" # en
53
55
  OPENPHONEMIZER = "openphonemizer" # en
@@ -382,6 +384,9 @@ class SynthesisConfig:
382
384
 
383
385
  enable_phonetic_spellings: bool = True
384
386
 
387
+ """for arabic and hebrew models"""
388
+ add_diacritics: bool = True
389
+
385
390
 
386
391
  def get_phonemizer(phoneme_type: PhonemeType,
387
392
  alphabet: Alphabet = Alphabet.IPA,
@@ -389,6 +394,7 @@ def get_phonemizer(phoneme_type: PhonemeType,
389
394
  from phoonnx.phonemizers import (EpitranPhonemizer, EspeakPhonemizer, OpenPhonemizer, OpenJTaklPhonemizer,
390
395
  ByT5Phonemizer, CharsiuPhonemizer, DeepPhonemizer, PersianPhonemizer,
391
396
  G2pCPhonemizer, G2pMPhonemizer, G2PKPhonemizer, G2PEnPhonemizer,
397
+ TransphonePhonemizer, MirandesePhonemizer,
392
398
  GruutPhonemizer, GraphemePhonemizer, MantoqPhonemizer, MisakiPhonemizer,
393
399
  KoG2PPhonemizer, PypinyinPhonemizer, PyKakasiPhonemizer, CotoviaPhonemizer,
394
400
  CutletPhonemizer, PhonikudPhonemizer, VIPhonemePhonemizer, XpinyinPhonemizer,
@@ -405,6 +411,10 @@ def get_phonemizer(phoneme_type: PhonemeType,
405
411
  phonemizer = EpitranPhonemizer()
406
412
  elif phoneme_type == PhonemeType.MISAKI:
407
413
  phonemizer = MisakiPhonemizer()
414
+ elif phoneme_type == PhonemeType.TRANSPHONE:
415
+ phonemizer = TransphonePhonemizer()
416
+ elif phoneme_type == PhonemeType.MIRANDESE:
417
+ phonemizer = MirandesePhonemizer()
408
418
  elif phoneme_type == PhonemeType.DEEPPHONEMIZER:
409
419
  phonemizer = DeepPhonemizer(model)
410
420
  elif phoneme_type == PhonemeType.OPENPHONEMIZER:
@@ -12,7 +12,8 @@ from phoonnx.phonemizers.ko import KoG2PPhonemizer, G2PKPhonemizer
12
12
  from phoonnx.phonemizers.zh import (G2pCPhonemizer, G2pMPhonemizer, PypinyinPhonemizer,
13
13
  XpinyinPhonemizer, JiebaPhonemizer)
14
14
  from phoonnx.phonemizers.mul import (EspeakPhonemizer, EpitranPhonemizer, MisakiPhonemizer,
15
- GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer)
15
+ GruutPhonemizer, ByT5Phonemizer, CharsiuPhonemizer, TransphonePhonemizer)
16
+ from phoonnx.phonemizers.mwl import MirandesePhonemizer
16
17
 
17
18
  Phonemizer = Union[
18
19
  MisakiPhonemizer,
@@ -22,6 +23,8 @@ Phonemizer = Union[
22
23
  EspeakPhonemizer,
23
24
  GruutPhonemizer,
24
25
  EpitranPhonemizer,
26
+ TransphonePhonemizer,
27
+ MirandesePhonemizer,
25
28
  OpenJTaklPhonemizer,
26
29
  CutletPhonemizer,
27
30
  PyKakasiPhonemizer,
@@ -0,0 +1,98 @@
1
+ from phoonnx.config import Alphabet
2
+ from phoonnx.phonemizers.base import BasePhonemizer
3
+ from phoonnx.thirdparty.bw2ipa import translate as bw2ipa
4
+ from phoonnx.thirdparty.mantoq import g2p as mantoq
5
+
6
+
7
+ class MantoqPhonemizer(BasePhonemizer):
8
+
9
+ def __init__(self, alphabet=Alphabet.BUCKWALTER):
10
+ if alphabet not in [Alphabet.IPA, Alphabet.BUCKWALTER]:
11
+ raise ValueError("unsupported alphabet")
12
+ super().__init__(alphabet)
13
+
14
+ @classmethod
15
+ def get_lang(cls, target_lang: str) -> str:
16
+ """
17
+ Validates and returns the closest supported language code.
18
+
19
+ Args:
20
+ target_lang (str): The language code to validate.
21
+
22
+ Returns:
23
+ str: The validated language code.
24
+
25
+ Raises:
26
+ ValueError: If the language code is unsupported.
27
+ """
28
+ # this check is here only to throw an exception if invalid language is provided
29
+ return cls.match_lang(target_lang, ["ar"])
30
+
31
+ def phonemize_string(self, text: str, lang: str = "ar") -> str:
32
+ """
33
+ Phonemizes an Arabic string using the Mantoq G2P tool.
34
+ If the alphabet is set to IPA, it then converts the result using bw2ipa.
35
+ """
36
+ lang = self.get_lang(lang)
37
+ # The mantoq function returns a tuple of (normalized_text, phonemes)
38
+ normalized_text, phonemes = mantoq(text)
39
+
40
+ # The phonemes are a list of characters, we join them into a string
41
+ # and replace the word separator token with a space.
42
+ phonemes = "".join(phonemes).replace("_+_", " ")
43
+
44
+ if self.alphabet == Alphabet.IPA:
45
+ # If the alphabet is IPA, we use the bw2ipa function to translate
46
+ # the Buckwalter-like phonemes into IPA.
47
+ return bw2ipa(phonemes)
48
+
49
+ # Otherwise, we return the phonemes in the default Mantoq alphabet.
50
+ return phonemes
51
+
52
+
53
+ if __name__ == "__main__":
54
+ from phoonnx.phonemizers.mul import EspeakPhonemizer
55
+
56
+ espeak = EspeakPhonemizer()
57
+
58
+ # Initialize phonemizers for both MANTOQ and IPA alphabets
59
+ pho_mantoq = MantoqPhonemizer(alphabet=Alphabet.IPA)
60
+
61
+
62
+ def compare(text):
63
+ print(f"Original Text: {text}")
64
+ print(f" Mantoq: {pho_mantoq.phonemize_string(text, 'ar')}")
65
+ print(f" Espeak: {espeak.phonemize_string(text, 'ar')}")
66
+
67
+ ts = pho_mantoq.add_diacritics(text, 'ar')
68
+ print(f"Tashkeel Text: {ts}")
69
+ print(f" Mantoq: {pho_mantoq.phonemize_string(ts, 'ar')}")
70
+ print(f" Espeak: {espeak.phonemize_string(ts, 'ar')}")
71
+ print("\n#########################")
72
+
73
+
74
+ text = "مرحبا بالعالم"
75
+ compare(text)
76
+
77
+ text = "ذهب الطالب إلى المكتبة لقراءة كتاب عن تاريخ الأندلس."
78
+ compare(text)
79
+
80
+ # 1. Test for gemination of a sun letter (e.g., ash-shams)
81
+ text = "الشمس"
82
+ compare(text)
83
+
84
+ # 2. Test for long vowels (e.g., 'fil' - elephant)
85
+ text = "فيل"
86
+ compare(text)
87
+
88
+ # 3. Test for glide (e.g., 'yawm' - day)
89
+ text = "يوم"
90
+ compare(text)
91
+
92
+ # 4. Test for long vowels (e.g., 'suwr' - wall)
93
+ text = "سور"
94
+ compare(text)
95
+
96
+ # 5. Test for glide (e.g., 'law' - if)
97
+ text = "لو"
98
+ compare(text)
@@ -8,6 +8,8 @@ from langcodes import tag_distance
8
8
  from quebra_frases import sentence_tokenize
9
9
  from phoonnx.config import Alphabet
10
10
  from phoonnx.util import normalize
11
+ from phoonnx.thirdparty.phonikud import PhonikudDiacritizer
12
+ from phoonnx.thirdparty.tashkeel import TashkeelDiacritizer
11
13
 
12
14
  # list of (substring, terminator, end_of_sentence) tuples.
13
15
  TextChunks = List[Tuple[str, str, bool]]
@@ -18,10 +20,27 @@ PhonemizedChunks = list[list[str]]
18
20
 
19
21
 
20
22
  class BasePhonemizer(metaclass=abc.ABCMeta):
21
- def __init__(self, alphabet: Alphabet = Alphabet.UNICODE):
23
+ def __init__(self, alphabet: Alphabet = Alphabet.UNICODE,
24
+ taskeen_threshold: Optional[float] = 0.8):
22
25
  super().__init__()
23
26
  self.alphabet = alphabet
24
27
 
28
+ self.taskeen_threshold = taskeen_threshold # arabic only
29
+ self._tashkeel: Optional[TashkeelDiacritizer] = None
30
+ self._phonikud: Optional[PhonikudDiacritizer] = None # hebrew only
31
+
32
+ @property
33
+ def phonikud(self) -> PhonikudDiacritizer:
34
+ if self._phonikud is None:
35
+ self._phonikud = PhonikudDiacritizer()
36
+ return self._phonikud
37
+
38
+ @property
39
+ def tashkeel(self) -> TashkeelDiacritizer:
40
+ if self._tashkeel is None:
41
+ self._tashkeel = TashkeelDiacritizer()
42
+ return self._tashkeel
43
+
25
44
  @abc.abstractmethod
26
45
  def phonemize_string(self, text: str, lang: str) -> str:
27
46
  raise NotImplementedError
@@ -29,6 +48,13 @@ class BasePhonemizer(metaclass=abc.ABCMeta):
29
48
  def phonemize_to_list(self, text: str, lang: str) -> List[str]:
30
49
  return list(self.phonemize_string(text, lang))
31
50
 
51
+ def add_diacritics(self, text: str, lang: str) -> str:
52
+ if lang.startswith("he"):
53
+ return self.phonikud.diacritize(text)
54
+ elif lang.startswith("ar"):
55
+ return self.tashkeel.diacritize(text, self.taskeen_threshold)
56
+ return text
57
+
32
58
  def phonemize(self, text: str, lang: str) -> PhonemizedChunks:
33
59
  if not text:
34
60
  return [('', '', True)]
@@ -1,30 +1,12 @@
1
- import os.path
2
-
3
- import requests
4
-
5
- from phoonnx.phonemizers.base import BasePhonemizer
6
1
  from phoonnx.config import Alphabet
2
+ from phoonnx.phonemizers.base import BasePhonemizer
7
3
 
8
4
 
9
5
  class PhonikudPhonemizer(BasePhonemizer):
10
- dl_url = "https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx"
11
6
 
12
- def __init__(self, model: str = None, diacritics=True):
13
- from phonikud_onnx import Phonikud
7
+ def __init__(self):
14
8
  from phonikud import phonemize
15
9
  self.g2p = phonemize
16
- self.diacritics = diacritics
17
- if model is None:
18
- base_path = os.path.expanduser("~/.local/share/phonikud")
19
- fname = self.dl_url.split("/")[-1]
20
- model = f"{base_path}/{fname}"
21
- if not os.path.isfile(model):
22
- os.makedirs(base_path, exist_ok=True)
23
- # TODO - streaming download
24
- data = requests.get(self.dl_url).content
25
- with open(model, "wb") as f:
26
- f.write(data)
27
- self.phonikud = Phonikud(model) if diacritics else None
28
10
  super().__init__(Alphabet.IPA)
29
11
 
30
12
  @classmethod
@@ -48,20 +30,19 @@ class PhonikudPhonemizer(BasePhonemizer):
48
30
  """
49
31
  """
50
32
  lang = self.get_lang(lang)
51
- if self.diacritics:
52
- text = self.phonikud.add_diacritics(text)
53
33
  return self.g2p(text)
54
34
 
55
35
 
56
36
  if __name__ == "__main__":
57
- #text = "מתכת יקרה"
37
+ # text = "מתכת יקרה"
58
38
  text = 'שָׁלוֹם עוֹלָם'
59
39
 
60
- pho = PhonikudPhonemizer(diacritics=False)
40
+ pho = PhonikudPhonemizer()
61
41
  lang = "he"
62
42
 
63
43
  print(f"\n--- Getting phonemes for '{text}' ---")
44
+ # text = pho.add_diacritics(text, lang)
64
45
  phonemes = pho.phonemize(text, lang)
65
46
  print(f" Phonemes: {phonemes}")
66
47
  # --- Getting phonemes for 'שָׁלוֹם עוֹלָם' ---
67
- # Phonemes: [('ʃalˈom ʔolˈam', '.', True)]
48
+ # Phonemes: [('ʃalˈom ʔolˈam', '.', True)]