phoonnx 0.0.2a1__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/PKG-INFO +93 -92
- phoonnx-0.1.0/README.md +127 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/config.py +6 -1
- phoonnx-0.1.0/phoonnx/phonemizers/ar.py +98 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/base.py +27 -1
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/gl.py +56 -3
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/he.py +6 -25
- phoonnx-0.1.0/phoonnx/phonemizers/mul.py +1219 -0
- phoonnx-0.1.0/phoonnx/thirdparty/bw2ipa.py +66 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/hangul2ipa.py +1 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/__init__.py +1 -26
- phoonnx-0.1.0/phoonnx/thirdparty/phonikud/__init__.py +24 -0
- phoonnx-0.1.0/phoonnx/version.py +10 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/voice.py +4 -16
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx.egg-info/PKG-INFO +93 -92
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx.egg-info/SOURCES.txt +4 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx.egg-info/requires.txt +9 -1
- phoonnx-0.1.0/phoonnx_train/export_onnx.py +360 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/preprocess.py +36 -9
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/dataset.py +4 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/lightning.py +3 -3
- phoonnx-0.0.2a1/README.md +0 -73
- phoonnx-0.0.2a1/phoonnx/phonemizers/ar.py +0 -42
- phoonnx-0.0.2a1/phoonnx/phonemizers/mul.py +0 -606
- phoonnx-0.0.2a1/phoonnx/version.py +0 -6
- phoonnx-0.0.2a1/phoonnx_train/export_onnx.py +0 -109
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phoneme_ids.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/en.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/fa.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/ja.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/ko.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/vi.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/phonemizers/zh.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/arpa2ipa.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/thirdparty/zh_num.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx/util.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx.egg-info/dependency_links.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx.egg-info/top_level.txt +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/__main__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/norm_audio/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/norm_audio/trim.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/norm_audio/vad.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/attentions.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/commons.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/config.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/losses.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/mel_processing.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/models.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/modules.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/transforms.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/utils.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/phoonnx_train/vits/wavfile.py +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/setup.cfg +0 -0
- {phoonnx-0.0.2a1 → phoonnx-0.1.0}/setup.py +0 -0
@@ -1,110 +1,111 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: phoonnx
|
3
|
-
Version: 0.0
|
3
|
+
Version: 0.1.0
|
4
4
|
Home-page: https://github.com/TigreGotico/phoonnx
|
5
5
|
Author: JarbasAi
|
6
6
|
Author-email: jarbasai@mailfence.com
|
7
|
-
Provides-Extra:
|
8
|
-
Provides-Extra:
|
9
|
-
Provides-Extra:
|
10
|
-
Provides-Extra:
|
11
|
-
Provides-Extra: ta
|
12
|
-
Provides-Extra: ti
|
7
|
+
Provides-Extra: cs
|
8
|
+
Provides-Extra: mr
|
9
|
+
Provides-Extra: tpi
|
10
|
+
Provides-Extra: so
|
13
11
|
Provides-Extra: uz
|
14
|
-
Provides-Extra:
|
12
|
+
Provides-Extra: de
|
13
|
+
Provides-Extra: th
|
14
|
+
Provides-Extra: ka
|
15
|
+
Provides-Extra: uk
|
16
|
+
Provides-Extra: en
|
17
|
+
Provides-Extra: uew
|
18
|
+
Provides-Extra: kbd
|
19
|
+
Provides-Extra: hr
|
20
|
+
Provides-Extra: sv
|
15
21
|
Provides-Extra: ko
|
16
|
-
Provides-Extra:
|
17
|
-
Provides-Extra:
|
18
|
-
Provides-Extra:
|
19
|
-
Provides-Extra: jv
|
20
|
-
Provides-Extra: gan
|
21
|
-
Provides-Extra: mr
|
22
|
-
Provides-Extra: aa
|
23
|
-
Provides-Extra: bn
|
24
|
-
Provides-Extra: am
|
25
|
-
Provides-Extra: rn
|
26
|
-
Provides-Extra: ff
|
27
|
-
Provides-Extra: km
|
28
|
-
Provides-Extra: ha
|
29
|
-
Provides-Extra: lv
|
30
|
-
Provides-Extra: si
|
31
|
-
Provides-Extra: ar
|
32
|
-
Provides-Extra: cs
|
33
|
-
Provides-Extra: rw
|
34
|
-
Provides-Extra: mi
|
35
|
-
Provides-Extra: wuu
|
36
|
-
Provides-Extra: nl
|
37
|
-
Provides-Extra: hak
|
38
|
-
Provides-Extra: mt
|
22
|
+
Provides-Extra: got
|
23
|
+
Provides-Extra: id
|
24
|
+
Provides-Extra: sn
|
39
25
|
Provides-Extra: ug
|
40
|
-
Provides-Extra:
|
41
|
-
Provides-Extra:
|
42
|
-
Provides-Extra:
|
43
|
-
Provides-Extra:
|
44
|
-
Provides-Extra:
|
45
|
-
Provides-Extra:
|
46
|
-
Provides-Extra:
|
26
|
+
Provides-Extra: az
|
27
|
+
Provides-Extra: mt
|
28
|
+
Provides-Extra: pl
|
29
|
+
Provides-Extra: ceb
|
30
|
+
Provides-Extra: ky
|
31
|
+
Provides-Extra: sq
|
32
|
+
Provides-Extra: ny
|
33
|
+
Provides-Extra: ti
|
34
|
+
Provides-Extra: sg
|
35
|
+
Provides-Extra: et
|
36
|
+
Provides-Extra: eo
|
47
37
|
Provides-Extra: tk
|
48
|
-
Provides-Extra:
|
49
|
-
Provides-Extra: ja
|
50
|
-
Provides-Extra: es
|
51
|
-
Provides-Extra: lo
|
52
|
-
Provides-Extra: id
|
53
|
-
Provides-Extra: pa
|
54
|
-
Provides-Extra: it
|
38
|
+
Provides-Extra: yue
|
55
39
|
Provides-Extra: luy
|
56
|
-
Provides-Extra:
|
57
|
-
Provides-Extra:
|
58
|
-
Provides-Extra:
|
59
|
-
Provides-Extra:
|
60
|
-
Provides-Extra:
|
61
|
-
Provides-Extra:
|
62
|
-
Provides-Extra:
|
63
|
-
Provides-Extra:
|
40
|
+
Provides-Extra: jam
|
41
|
+
Provides-Extra: hsn
|
42
|
+
Provides-Extra: fil
|
43
|
+
Provides-Extra: ml
|
44
|
+
Provides-Extra: lez
|
45
|
+
Provides-Extra: si
|
46
|
+
Provides-Extra: kk
|
47
|
+
Provides-Extra: hi
|
48
|
+
Provides-Extra: quy
|
49
|
+
Provides-Extra: zh
|
50
|
+
Provides-Extra: ru
|
51
|
+
Provides-Extra: aii
|
52
|
+
Provides-Extra: train
|
64
53
|
Provides-Extra: my
|
65
|
-
Provides-Extra:
|
54
|
+
Provides-Extra: yo
|
55
|
+
Provides-Extra: cjy
|
56
|
+
Provides-Extra: es
|
57
|
+
Provides-Extra: he
|
58
|
+
Provides-Extra: sr
|
66
59
|
Provides-Extra: fr
|
67
|
-
Provides-Extra:
|
68
|
-
Provides-Extra:
|
69
|
-
Provides-Extra:
|
70
|
-
Provides-Extra: got
|
71
|
-
Provides-Extra: tpi
|
72
|
-
Provides-Extra: pt
|
60
|
+
Provides-Extra: ro
|
61
|
+
Provides-Extra: hmn
|
62
|
+
Provides-Extra: lt
|
73
63
|
Provides-Extra: ckb
|
64
|
+
Provides-Extra: tg
|
65
|
+
Provides-Extra: vi
|
66
|
+
Provides-Extra: hu
|
67
|
+
Provides-Extra: pt
|
68
|
+
Provides-Extra: ms
|
69
|
+
Provides-Extra: mi
|
70
|
+
Provides-Extra: pa
|
71
|
+
Provides-Extra: za
|
72
|
+
Provides-Extra: lv
|
73
|
+
Provides-Extra: am
|
74
|
+
Provides-Extra: ca
|
75
|
+
Provides-Extra: om
|
76
|
+
Provides-Extra: rw
|
77
|
+
Provides-Extra: nl
|
74
78
|
Provides-Extra: lsm
|
79
|
+
Provides-Extra: bn
|
75
80
|
Provides-Extra: fa
|
76
|
-
Provides-Extra:
|
77
|
-
Provides-Extra:
|
78
|
-
Provides-Extra:
|
79
|
-
Provides-Extra: lez
|
80
|
-
Provides-Extra: eo
|
81
|
-
Provides-Extra: uew
|
82
|
-
Provides-Extra: vi
|
83
|
-
Provides-Extra: he
|
84
|
-
Provides-Extra: hmn
|
85
|
-
Provides-Extra: hsn
|
86
|
-
Provides-Extra: et
|
87
|
-
Provides-Extra: lt
|
88
|
-
Provides-Extra: ro
|
81
|
+
Provides-Extra: eu
|
82
|
+
Provides-Extra: te
|
83
|
+
Provides-Extra: km
|
89
84
|
Provides-Extra: lij
|
85
|
+
Provides-Extra: tr
|
86
|
+
Provides-Extra: ku
|
87
|
+
Provides-Extra: lb
|
88
|
+
Provides-Extra: ta
|
89
|
+
Provides-Extra: av
|
90
|
+
Provides-Extra: ha
|
91
|
+
Provides-Extra: ff
|
92
|
+
Provides-Extra: fi
|
90
93
|
Provides-Extra: sw
|
91
|
-
Provides-Extra:
|
92
|
-
Provides-Extra:
|
93
|
-
Provides-Extra:
|
94
|
-
Provides-Extra: hu
|
94
|
+
Provides-Extra: ilo
|
95
|
+
Provides-Extra: ja
|
96
|
+
Provides-Extra: ar
|
95
97
|
Provides-Extra: zu
|
96
|
-
Provides-Extra:
|
97
|
-
Provides-Extra:
|
98
|
-
Provides-Extra:
|
99
|
-
Provides-Extra:
|
100
|
-
Provides-Extra:
|
101
|
-
Provides-Extra:
|
102
|
-
Provides-Extra:
|
103
|
-
Provides-Extra:
|
104
|
-
Provides-Extra:
|
105
|
-
Provides-Extra:
|
106
|
-
Provides-Extra:
|
107
|
-
Provides-Extra:
|
108
|
-
Provides-Extra:
|
109
|
-
Provides-Extra:
|
110
|
-
Provides-Extra: ml
|
98
|
+
Provides-Extra: xh
|
99
|
+
Provides-Extra: lo
|
100
|
+
Provides-Extra: gl
|
101
|
+
Provides-Extra: aa
|
102
|
+
Provides-Extra: it
|
103
|
+
Provides-Extra: ur
|
104
|
+
Provides-Extra: hak
|
105
|
+
Provides-Extra: wuu
|
106
|
+
Provides-Extra: csb
|
107
|
+
Provides-Extra: nan
|
108
|
+
Provides-Extra: jv
|
109
|
+
Provides-Extra: or
|
110
|
+
Provides-Extra: gan
|
111
|
+
Provides-Extra: rn
|
phoonnx-0.1.0/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
[](https://deepwiki.com/TigreGotico/phoonnx)
|
2
|
+
|
3
|
+
# Phoonnx
|
4
|
+
|
5
|
+
A Python library for multilingual phonemization and Text-to-Speech (TTS) using ONNX models.
|
6
|
+
|
7
|
+
## Introduction
|
8
|
+
|
9
|
+
`phoonnx` is a comprehensive toolkit for performing high-quality, efficient TTS inference using ONNX-compatible models.
|
10
|
+
It provides a flexible framework for text normalization, phonemization, and speech synthesis, with built-in support for
|
11
|
+
multiple languages and phonemic alphabets. The library is also designed to work with models trained using
|
12
|
+
`phoonnx_train`, including utilities for dataset preprocessing and exporting models to the ONNX format.
|
13
|
+
|
14
|
+
## Features
|
15
|
+
|
16
|
+
- **Efficient Inference:** Leverages `onnxruntime` for fast and efficient TTS synthesis.
|
17
|
+
- **Multilingual Support:** Supports a wide range of languages and phonemic alphabets, including IPA, ARPA, Hangul (
|
18
|
+
Korean), and Pinyin (Chinese).
|
19
|
+
- **Multiple Phonemizers:** Integrates with various phonemizers like eSpeak, Gruut, and Epitran to convert text to
|
20
|
+
phonemes.
|
21
|
+
- **Advanced Text Normalization:** Includes robust utilities for expanding contractions and pronouncing numbers and
|
22
|
+
dates.
|
23
|
+
- **Dataset Preprocessing:** Provides a command-line tool to prepare LJSpeech-style datasets for training.
|
24
|
+
- **Model Export:** A script is included to convert trained models into the ONNX format, ready for deployment.
|
25
|
+
|
26
|
+
## Installation
|
27
|
+
|
28
|
+
As `phoonnx` is available on PyPI, you can install it using pip.
|
29
|
+
|
30
|
+
```bash
|
31
|
+
pip install phoonnx
|
32
|
+
```
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
### Synthesizing Speech
|
37
|
+
|
38
|
+
The main component for inference is the `TTSVoice` class. You can load a model and synthesize speech from text as
|
39
|
+
follows:
|
40
|
+
|
41
|
+
```python
|
42
|
+
import wave
|
43
|
+
|
44
|
+
from phoonnx.config import VoiceConfig, SynthesisConfig
|
45
|
+
from phoonnx.voice import TTSVoice
|
46
|
+
|
47
|
+
# Load a pre-trained ONNX model and its configuration
|
48
|
+
voice = TTSVoice.load("model.onnx", "config.json")
|
49
|
+
|
50
|
+
# Configure the synthesis parameters (optional)
|
51
|
+
synthesis_config = SynthesisConfig(
|
52
|
+
noise_scale=0.667,
|
53
|
+
length_scale=1.0,
|
54
|
+
noise_w_scale=0.8,
|
55
|
+
enable_phonetic_spellings=True, # apply pronunciation fixes, see "locale" folder in this repo
|
56
|
+
add_diacritics=False # for arabic and hebrew
|
57
|
+
)
|
58
|
+
|
59
|
+
# Synthesize audio from text
|
60
|
+
text = "Hello, this is a test of the phoonnx library."
|
61
|
+
slug = f"phoonnx_{voice.config.phoneme_type.value}_{voice.config.lang_code}"
|
62
|
+
with wave.open(f"{slug}.wav", "wb") as wav_file:
|
63
|
+
voice.synthesize_wav(text, wav_file, synthesis_config)
|
64
|
+
|
65
|
+
```
|
66
|
+
|
67
|
+
### Training
|
68
|
+
|
69
|
+
See the dedicated [training.md](/TRAINING.md)
|
70
|
+
|
71
|
+
## Supported Phonemizers
|
72
|
+
|
73
|
+
`phoonnx` leverages several external Grapheme-to-Phoneme (G2P) and text-processing libraries to provide flexible and
|
74
|
+
high-quality phonemization across many languages.
|
75
|
+
|
76
|
+
You should prefer phonemizers trained on full sentences vs individual words if available
|
77
|
+
|
78
|
+
The core phonemizer classes are summarized in the table below, listing the supported languages, the source library they
|
79
|
+
wrap, and the output alphabets they can generate.
|
80
|
+
|
81
|
+
***
|
82
|
+
|
83
|
+
| Language(s) | Phonemizer Class | Source/Library | Output Alphabets |
|
84
|
+
|:--------------------|:-----------------------|:-------------------------------------------------------------------------------------------------------------------|:-----------------------------|
|
85
|
+
| **Multilingual** | `ByT5Phonemizer` | [OpenVoiceOS ByT5](https://huggingface.co/collections/OpenVoiceOS/g2p-models-6886a8d612825c3fe65befa0) ONNX Models | IPA | High-quality, model-based G2P for an extensive list of languages. |
|
86
|
+
| **Multilingual** | `CharsiuPhonemizer` | [Charsiu](https://github.com/lingjzhu/CharsiuG2P) ByT5 ONNX Model | IPA | Very extensive multilingual support, including many regional dialects and variants (e.g., `eng-uk`, `spa-me`, `zho-s`). |
|
87
|
+
| **Multilingual** | `EspeakPhonemizer` | `espeak-ng` command-line tool | IPA | Broad language coverage, relying on the widely-used `espeak-ng` engine. |
|
88
|
+
| **Multilingual** | `GruutPhonemizer` | [gruut](https://github.com/rhasspy/gruut) | IPA | A tokenizer, text cleaner, and IPA phonemizer for several human languages that supports SSML. |
|
89
|
+
| **Multilingual** | `MisakiPhonemizer` | [misaki](https://github.com/hexgrad/misaki) | IPA | Misaki is a G2P engine designed for Kokoro models. |
|
90
|
+
| **Multilingual** | `TransphonePhonemizer` | [transphone](https://github.com/xinjli/transphone) | IPA | It provides approximated phoneme tokenizers and G2P model for 7546 languages registered in the Glottolog database. |
|
91
|
+
| **Multilingual** | `EpitranPhonemizer` | [epitran](https://github.com/dmort27/epitran) | IPA | A tool for transcribing orthographic text as IPA |
|
92
|
+
| **Arabic (ar)** | `MantoqPhonemizer` | [mantoq](https://github.com/mush42/mantoq) | BUCKWALTER, IPA | Translates unvoweled Arabic to phonemes, with optional conversion to IPA. |
|
93
|
+
| **Chinese (zh)** | `JiebaPhonemizer` | [jieba](https://github.com/fxsjy/jieba) | HANZI | Segments Chinese text into words with spaces; useful for pre-processing. |
|
94
|
+
| **Chinese (zh)** | `G2pMPhonemizer` | [g2pC](https://github.com/Kyubyong/g2pC) | IPA, Pinyin | CRF-based Grapheme-to-Phoneme converter |
|
95
|
+
| **Chinese (zh)** | `G2pMPhonemizer` | [g2pm](https://github.com/kakaobrain/g2pm) | IPA, Pinyin | A Neural Grapheme-to-Phoneme Conversion Package for Mandarin Chinese |
|
96
|
+
| **Chinese (zh)** | `XpinyinPhonemizer` | [xpinyin](https://github.com/lxneng/xpinyin) | IPA, Pinyin | basic pinyin generator with optional tone marks |
|
97
|
+
| **Chinese (zh)** | `PypinyinPhonemizer` | [pypinyin](https://github.com/rainforest32/pypinyin) | IPA, Pinyin | comprehensive and accurate pinyin library |
|
98
|
+
| **English (en)** | `G2PEnPhonemizer` | [g2pE](https://github.com/Kyubyong/g2p) | IPA | A deep learning seq2seq framework based on TensorFlow |
|
99
|
+
| **English (en)** | `OpenPhonemizer` | [OpenPhonemizer](https://github.com/NeuralVox/OpenPhonemizer) | IPA | IPA Phonemizer powered by deep learning. This Phonemizer attempts to replicate the espeak Phonemizer while remaining permissively-licensed. |
|
100
|
+
| **English (en)** | `DeepPhonemizer` | [DeepPhonemizer](https://github.com/spring-media/DeepPhonemizer) | IPA / ARPA | Uses pre-trained deep learning models for English. |
|
101
|
+
| **Galician (gl)** | `CotoviaPhonemizer` | [cotovia](https://github.com/TigreGotico/cotovia-mirror) | IPA, Native Cotovia Phonemes | Relies on the `cotovia`executable for Galician phonemization. |
|
102
|
+
| **Hebrew (he)** | `PhonikudPhonemizer` | [phonikud](https://github.com/thewh1teagle/phonikud) | IPA | Converts Hebrew text to IPA phonemes. |
|
103
|
+
| **Japanese (ja)** | `OpenJTaklPhonemizer` | [pyopenjtalk](https://github.com/r9y9/pyopenjtalk) | HEPBURN, KANA | High-quality Japanese G2P. |
|
104
|
+
| **Japanese (ja)** | `CutletPhonemizer` | [cutlet](https://github.com/polm/cutlet) | HEPBURN, KUNREI, NIHON | Provides various Romanization standards. |
|
105
|
+
| **Japanese (ja)** | `PyKakasiPhonemizer` | [pykakasi](https://codeberg.org/miurahr/pykakasi) | HEPBURN, KANA, HIRA | Romanization and Kana conversion. |
|
106
|
+
| **Korean (ko)** | `G2PKPhonemizer` | [g2pK](https://github.com/Kyubyong/g2pK) | IPA, HANGUL | Provides G2P for Korean, with optional IPA conversion. |
|
107
|
+
| **Korean (ko)** | `KoG2PPhonemizer` | [KoG2P](https://github.com/scarletcho/KoG2P) | IPA, HANGUL | Provides G2P for Korean, with optional IPA conversion. |
|
108
|
+
| **Persian (fa)** | `PersianPhonemizer` | [persian_phonemizer](https://github.com/de-mh/persian_phonemizer) | ERAAB, IPA | Supports both standard IPA and the native ERAAB (diacritical) representations. |
|
109
|
+
| **Vietnamese (vi)** | `VIPhonemePhonemizer` | [Viphoneme](https://github.com/v-nhandt21/Viphoneme) | IPA | Uses the `viphoneme` library for Vietnamese G2P. |
|
110
|
+
|
111
|
+
### Credits
|
112
|
+
|
113
|
+
Phoonnx is built in the shoulders of giants
|
114
|
+
|
115
|
+
- [jaywalnut310/vits](https://github.com/jaywalnut310/vits) - the original VITS implementation, the back-bone architecture of phoonnx models
|
116
|
+
- [MycroftAI/mimic3](https://github.com/MycroftAI/mimic3) and [rhasspy/piper](https://github.com/rhasspy/piper) - for inspiration and reference implementation of a phonemizer for pre-processing inputs
|
117
|
+
|
118
|
+
Individual languages greatly benefit from domain-specific knowledge, for convenience phoonnx also bundles code from
|
119
|
+
|
120
|
+
- [uvigo/cotovia](https://github.com/TigreGotico/cotovia-mirror) for galician phonemization (pre-compiled binaries bundled)
|
121
|
+
- [mush42/mantoq](https://github.com/mush42/mantoq) for arabic phonemization
|
122
|
+
- [mush42/libtashkeel](https://github.com/mush42/libtashkeel) for arabic diacritics
|
123
|
+
- [scarletcho/KoG2P](https://github.com/scarletcho/KoG2P) for korean phonemization
|
124
|
+
- [stannam/hangul_to_ipa](https://github.com/stannam/hangul_to_ipa) a converter from Hangul to IPA
|
125
|
+
- [chorusai/arpa2ipa](https://github.com/chorusai/arpa2ipa) a converter from Arpabet to IPA
|
126
|
+
- [PaddlePaddle/PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/8097a56be811a540f4f62a95a9094296c374351a/paddlespeech/t2s/frontend/zh_normalization/) for chinese number verbalization
|
127
|
+
|
@@ -6,7 +6,6 @@ from phoonnx.phoneme_ids import (load_phoneme_ids, BlankBetween,
|
|
6
6
|
DEFAULT_BLANK_WORD_TOKEN, DEFAULT_BLANK_TOKEN,
|
7
7
|
DEFAULT_PAD_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_EOS_TOKEN)
|
8
8
|
|
9
|
-
|
10
9
|
DEFAULT_NOISE_SCALE = 0.667
|
11
10
|
DEFAULT_LENGTH_SCALE = 1.0
|
12
11
|
DEFAULT_NOISE_W_SCALE = 0.8
|
@@ -22,6 +21,8 @@ class Alphabet(str, Enum):
|
|
22
21
|
UNICODE = "unicode"
|
23
22
|
IPA = "ipa"
|
24
23
|
ARPA = "arpa" # en
|
24
|
+
SAMPA = "sampa"
|
25
|
+
XSAMPA = "x-sampa"
|
25
26
|
HANGUL = "hangul" # ko
|
26
27
|
KANA = "kana" # ja
|
27
28
|
HIRA = "hira" # ja
|
@@ -32,6 +33,7 @@ class Alphabet(str, Enum):
|
|
32
33
|
ERAAB = "eraab" # fa
|
33
34
|
COTOVIA = "cotovia" # gl
|
34
35
|
HANZI = "hanzi" # zh
|
36
|
+
BUCKWALTER = "buckwalter" # ar
|
35
37
|
|
36
38
|
|
37
39
|
|
@@ -380,6 +382,9 @@ class SynthesisConfig:
|
|
380
382
|
|
381
383
|
enable_phonetic_spellings: bool = True
|
382
384
|
|
385
|
+
"""for arabic and hebrew models"""
|
386
|
+
add_diacritics: bool = True
|
387
|
+
|
383
388
|
|
384
389
|
def get_phonemizer(phoneme_type: PhonemeType,
|
385
390
|
alphabet: Alphabet = Alphabet.IPA,
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from phoonnx.config import Alphabet
|
2
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
3
|
+
from phoonnx.thirdparty.bw2ipa import translate as bw2ipa
|
4
|
+
from phoonnx.thirdparty.mantoq import g2p as mantoq
|
5
|
+
|
6
|
+
|
7
|
+
class MantoqPhonemizer(BasePhonemizer):
|
8
|
+
|
9
|
+
def __init__(self, alphabet=Alphabet.BUCKWALTER):
|
10
|
+
if alphabet not in [Alphabet.IPA, Alphabet.BUCKWALTER]:
|
11
|
+
raise ValueError("unsupported alphabet")
|
12
|
+
super().__init__(alphabet)
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def get_lang(cls, target_lang: str) -> str:
|
16
|
+
"""
|
17
|
+
Validates and returns the closest supported language code.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
target_lang (str): The language code to validate.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
str: The validated language code.
|
24
|
+
|
25
|
+
Raises:
|
26
|
+
ValueError: If the language code is unsupported.
|
27
|
+
"""
|
28
|
+
# this check is here only to throw an exception if invalid language is provided
|
29
|
+
return cls.match_lang(target_lang, ["ar"])
|
30
|
+
|
31
|
+
def phonemize_string(self, text: str, lang: str = "ar") -> str:
|
32
|
+
"""
|
33
|
+
Phonemizes an Arabic string using the Mantoq G2P tool.
|
34
|
+
If the alphabet is set to IPA, it then converts the result using bw2ipa.
|
35
|
+
"""
|
36
|
+
lang = self.get_lang(lang)
|
37
|
+
# The mantoq function returns a tuple of (normalized_text, phonemes)
|
38
|
+
normalized_text, phonemes = mantoq(text)
|
39
|
+
|
40
|
+
# The phonemes are a list of characters, we join them into a string
|
41
|
+
# and replace the word separator token with a space.
|
42
|
+
phonemes = "".join(phonemes).replace("_+_", " ")
|
43
|
+
|
44
|
+
if self.alphabet == Alphabet.IPA:
|
45
|
+
# If the alphabet is IPA, we use the bw2ipa function to translate
|
46
|
+
# the Buckwalter-like phonemes into IPA.
|
47
|
+
return bw2ipa(phonemes)
|
48
|
+
|
49
|
+
# Otherwise, we return the phonemes in the default Mantoq alphabet.
|
50
|
+
return phonemes
|
51
|
+
|
52
|
+
|
53
|
+
if __name__ == "__main__":
|
54
|
+
from phoonnx.phonemizers.mul import EspeakPhonemizer
|
55
|
+
|
56
|
+
espeak = EspeakPhonemizer()
|
57
|
+
|
58
|
+
# Initialize phonemizers for both MANTOQ and IPA alphabets
|
59
|
+
pho_mantoq = MantoqPhonemizer(alphabet=Alphabet.IPA)
|
60
|
+
|
61
|
+
|
62
|
+
def compare(text):
|
63
|
+
print(f"Original Text: {text}")
|
64
|
+
print(f" Mantoq: {pho_mantoq.phonemize_string(text, 'ar')}")
|
65
|
+
print(f" Espeak: {espeak.phonemize_string(text, 'ar')}")
|
66
|
+
|
67
|
+
ts = pho_mantoq.add_diacritics(text, 'ar')
|
68
|
+
print(f"Tashkeel Text: {ts}")
|
69
|
+
print(f" Mantoq: {pho_mantoq.phonemize_string(ts, 'ar')}")
|
70
|
+
print(f" Espeak: {espeak.phonemize_string(ts, 'ar')}")
|
71
|
+
print("\n#########################")
|
72
|
+
|
73
|
+
|
74
|
+
text = "مرحبا بالعالم"
|
75
|
+
compare(text)
|
76
|
+
|
77
|
+
text = "ذهب الطالب إلى المكتبة لقراءة كتاب عن تاريخ الأندلس."
|
78
|
+
compare(text)
|
79
|
+
|
80
|
+
# 1. Test for gemination of a sun letter (e.g., ash-shams)
|
81
|
+
text = "الشمس"
|
82
|
+
compare(text)
|
83
|
+
|
84
|
+
# 2. Test for long vowels (e.g., 'fil' - elephant)
|
85
|
+
text = "فيل"
|
86
|
+
compare(text)
|
87
|
+
|
88
|
+
# 3. Test for glide (e.g., 'yawm' - day)
|
89
|
+
text = "يوم"
|
90
|
+
compare(text)
|
91
|
+
|
92
|
+
# 4. Test for long vowels (e.g., 'suwr' - wall)
|
93
|
+
text = "سور"
|
94
|
+
compare(text)
|
95
|
+
|
96
|
+
# 5. Test for glide (e.g., 'law' - if)
|
97
|
+
text = "لو"
|
98
|
+
compare(text)
|
@@ -8,6 +8,8 @@ from langcodes import tag_distance
|
|
8
8
|
from quebra_frases import sentence_tokenize
|
9
9
|
from phoonnx.config import Alphabet
|
10
10
|
from phoonnx.util import normalize
|
11
|
+
from phoonnx.thirdparty.phonikud import PhonikudDiacritizer
|
12
|
+
from phoonnx.thirdparty.tashkeel import TashkeelDiacritizer
|
11
13
|
|
12
14
|
# list of (substring, terminator, end_of_sentence) tuples.
|
13
15
|
TextChunks = List[Tuple[str, str, bool]]
|
@@ -18,10 +20,27 @@ PhonemizedChunks = list[list[str]]
|
|
18
20
|
|
19
21
|
|
20
22
|
class BasePhonemizer(metaclass=abc.ABCMeta):
|
21
|
-
def __init__(self, alphabet: Alphabet = Alphabet.UNICODE
|
23
|
+
def __init__(self, alphabet: Alphabet = Alphabet.UNICODE,
|
24
|
+
taskeen_threshold: Optional[float] = 0.8):
|
22
25
|
super().__init__()
|
23
26
|
self.alphabet = alphabet
|
24
27
|
|
28
|
+
self.taskeen_threshold = taskeen_threshold # arabic only
|
29
|
+
self._tashkeel: Optional[TashkeelDiacritizer] = None
|
30
|
+
self._phonikud: Optional[PhonikudDiacritizer] = None # hebrew only
|
31
|
+
|
32
|
+
@property
|
33
|
+
def phonikud(self) -> PhonikudDiacritizer:
|
34
|
+
if self._phonikud is None:
|
35
|
+
self._phonikud = PhonikudDiacritizer()
|
36
|
+
return self._phonikud
|
37
|
+
|
38
|
+
@property
|
39
|
+
def tashkeel(self) -> TashkeelDiacritizer:
|
40
|
+
if self._tashkeel is None:
|
41
|
+
self._tashkeel = TashkeelDiacritizer()
|
42
|
+
return self._tashkeel
|
43
|
+
|
25
44
|
@abc.abstractmethod
|
26
45
|
def phonemize_string(self, text: str, lang: str) -> str:
|
27
46
|
raise NotImplementedError
|
@@ -29,6 +48,13 @@ class BasePhonemizer(metaclass=abc.ABCMeta):
|
|
29
48
|
def phonemize_to_list(self, text: str, lang: str) -> List[str]:
|
30
49
|
return list(self.phonemize_string(text, lang))
|
31
50
|
|
51
|
+
def add_diacritics(self, text: str, lang: str) -> str:
|
52
|
+
if lang.startswith("he"):
|
53
|
+
return self.phonikud.diacritize(text)
|
54
|
+
elif lang.startswith("ar"):
|
55
|
+
return self.tashkeel.diacritize(text, self.taskeen_threshold)
|
56
|
+
return text
|
57
|
+
|
32
58
|
def phonemize(self, text: str, lang: str) -> PhonemizedChunks:
|
33
59
|
if not text:
|
34
60
|
return [('', '', True)]
|
@@ -12,6 +12,57 @@ class CotoviaError(Exception):
|
|
12
12
|
pass
|
13
13
|
|
14
14
|
|
15
|
+
COTOVIA2IPA = {
|
16
|
+
"pau": " ",
|
17
|
+
"a": "a",
|
18
|
+
"E": "ɛ",
|
19
|
+
"e": "e",
|
20
|
+
"i": "i",
|
21
|
+
"j": "j",
|
22
|
+
"O": "ɔ",
|
23
|
+
"o": "o",
|
24
|
+
"u": "u",
|
25
|
+
"w": "w",
|
26
|
+
"p": "p",
|
27
|
+
"b": "b",
|
28
|
+
"B": "β",
|
29
|
+
"t": "t",
|
30
|
+
"d": "d",
|
31
|
+
"D": "ð",
|
32
|
+
"k": "k",
|
33
|
+
"g": "g",
|
34
|
+
"G": "ɣ",
|
35
|
+
"f": "f",
|
36
|
+
"T": "θ",
|
37
|
+
"s": "s",
|
38
|
+
"S": "ʃ",
|
39
|
+
"tS": "tʃ",
|
40
|
+
"m": "m",
|
41
|
+
"n": "n",
|
42
|
+
"J": "ɲ",
|
43
|
+
"N": "ŋ",
|
44
|
+
"l": "l",
|
45
|
+
"Z": "ʎ",
|
46
|
+
"jj": "ʎ",
|
47
|
+
"L": "ʎ",
|
48
|
+
"r": "ɾ",
|
49
|
+
"rr": "r",
|
50
|
+
"X": "x"
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
def cotovia2ipa(text: str) -> str:
|
55
|
+
"""
|
56
|
+
Converts a string of Cotovía phonemes to IPA.
|
57
|
+
"""
|
58
|
+
# Sort the dictionary keys by length in descending order to handle multi-character phonemes first
|
59
|
+
sorted_cotovia_keys = sorted(COTOVIA2IPA.keys(), key=len, reverse=True)
|
60
|
+
ipa_str = text
|
61
|
+
for cotovia_char in sorted_cotovia_keys:
|
62
|
+
ipa_str = ipa_str.replace(cotovia_char, COTOVIA2IPA[cotovia_char])
|
63
|
+
return ipa_str
|
64
|
+
|
65
|
+
|
15
66
|
class CotoviaPhonemizer(BasePhonemizer):
|
16
67
|
"""
|
17
68
|
A phonemizer class that uses the Cotovia TTS binary to convert text into phonemes.
|
@@ -19,7 +70,7 @@ class CotoviaPhonemizer(BasePhonemizer):
|
|
19
70
|
regular expression transformations to clean and normalize the phonetic representation.
|
20
71
|
"""
|
21
72
|
|
22
|
-
def __init__(self, cotovia_bin_path: Optional[str] = None):
|
73
|
+
def __init__(self, cotovia_bin_path: Optional[str] = None, alphabet: Alphabet = Alphabet.IPA):
|
23
74
|
"""
|
24
75
|
Initializes the CotoviaPhonemizer.
|
25
76
|
|
@@ -31,7 +82,7 @@ class CotoviaPhonemizer(BasePhonemizer):
|
|
31
82
|
if not os.path.exists(self.cotovia_bin):
|
32
83
|
raise FileNotFoundError(f"Cotovia binary not found at {self.cotovia_bin}. "
|
33
84
|
"Please ensure it's installed or provide the correct path.")
|
34
|
-
super().__init__(
|
85
|
+
super().__init__(alphabet)
|
35
86
|
|
36
87
|
@classmethod
|
37
88
|
def get_lang(cls, target_lang: str) -> str:
|
@@ -127,6 +178,8 @@ class CotoviaPhonemizer(BasePhonemizer):
|
|
127
178
|
# substitute ' ( text )' to ', text,'
|
128
179
|
str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \\2,", str_ext)
|
129
180
|
|
181
|
+
if self.alphabet == Alphabet.IPA:
|
182
|
+
return cotovia2ipa(str_ext)
|
130
183
|
return str_ext
|
131
184
|
|
132
185
|
|
@@ -138,5 +191,5 @@ if __name__ == "__main__":
|
|
138
191
|
lang = "gl"
|
139
192
|
text_gl = "Este é un sistema de conversión de texto a voz en lingua galega baseado en redes neuronais artificiais. Ten en conta que as funcionalidades incluídas nesta páxina ofrécense unicamente con fins de demostración. Se tes algún comentario, suxestión ou detectas algún problema durante a demostración, ponte en contacto connosco."
|
140
193
|
print(f"\n--- Getting phonemes for '{text_gl}' (Cotovia) ---")
|
141
|
-
phonemes_cotovia = cotovia.
|
194
|
+
phonemes_cotovia = cotovia.phonemize_string(text_gl, lang)
|
142
195
|
print(f" Cotovia Phonemes: {phonemes_cotovia}")
|
@@ -1,30 +1,12 @@
|
|
1
|
-
import os.path
|
2
|
-
|
3
|
-
import requests
|
4
|
-
|
5
|
-
from phoonnx.phonemizers.base import BasePhonemizer
|
6
1
|
from phoonnx.config import Alphabet
|
2
|
+
from phoonnx.phonemizers.base import BasePhonemizer
|
7
3
|
|
8
4
|
|
9
5
|
class PhonikudPhonemizer(BasePhonemizer):
|
10
|
-
dl_url = "https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx"
|
11
6
|
|
12
|
-
def __init__(self
|
13
|
-
from phonikud_onnx import Phonikud
|
7
|
+
def __init__(self):
|
14
8
|
from phonikud import phonemize
|
15
9
|
self.g2p = phonemize
|
16
|
-
self.diacritics = diacritics
|
17
|
-
if model is None:
|
18
|
-
base_path = os.path.expanduser("~/.local/share/phonikud")
|
19
|
-
fname = self.dl_url.split("/")[-1]
|
20
|
-
model = f"{base_path}/{fname}"
|
21
|
-
if not os.path.isfile(model):
|
22
|
-
os.makedirs(base_path, exist_ok=True)
|
23
|
-
# TODO - streaming download
|
24
|
-
data = requests.get(self.dl_url).content
|
25
|
-
with open(model, "wb") as f:
|
26
|
-
f.write(data)
|
27
|
-
self.phonikud = Phonikud(model) if diacritics else None
|
28
10
|
super().__init__(Alphabet.IPA)
|
29
11
|
|
30
12
|
@classmethod
|
@@ -48,20 +30,19 @@ class PhonikudPhonemizer(BasePhonemizer):
|
|
48
30
|
"""
|
49
31
|
"""
|
50
32
|
lang = self.get_lang(lang)
|
51
|
-
if self.diacritics:
|
52
|
-
text = self.phonikud.add_diacritics(text)
|
53
33
|
return self.g2p(text)
|
54
34
|
|
55
35
|
|
56
36
|
if __name__ == "__main__":
|
57
|
-
#text = "מתכת יקרה"
|
37
|
+
# text = "מתכת יקרה"
|
58
38
|
text = 'שָׁלוֹם עוֹלָם'
|
59
39
|
|
60
|
-
pho = PhonikudPhonemizer(
|
40
|
+
pho = PhonikudPhonemizer()
|
61
41
|
lang = "he"
|
62
42
|
|
63
43
|
print(f"\n--- Getting phonemes for '{text}' ---")
|
44
|
+
# text = pho.add_diacritics(text, lang)
|
64
45
|
phonemes = pho.phonemize(text, lang)
|
65
46
|
print(f" Phonemes: {phonemes}")
|
66
47
|
# --- Getting phonemes for 'שָׁלוֹם עוֹלָם' ---
|
67
|
-
# Phonemes: [('ʃalˈom ʔolˈam', '.', True)]
|
48
|
+
# Phonemes: [('ʃalˈom ʔolˈam', '.', True)]
|