phoonnx 0.0.1a1__tar.gz → 0.0.2a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/PKG-INFO +1 -1
- phoonnx-0.0.2a1/README.md +73 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/util.py +19 -12
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/version.py +1 -1
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx.egg-info/PKG-INFO +1 -1
- phoonnx-0.0.1a1/README.md +0 -3
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/config.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phoneme_ids.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/ar.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/base.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/en.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/fa.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/gl.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/he.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/ja.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/ko.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/mul.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/vi.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/phonemizers/zh.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/arpa2ipa.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/hangul2ipa.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/thirdparty/zh_num.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx/voice.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx.egg-info/SOURCES.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx.egg-info/dependency_links.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx.egg-info/requires.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx.egg-info/top_level.txt +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/__main__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/export_onnx.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/norm_audio/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/norm_audio/trim.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/norm_audio/vad.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/preprocess.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/attentions.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/commons.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/config.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/dataset.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/lightning.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/losses.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/mel_processing.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/models.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/modules.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/transforms.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/utils.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/phoonnx_train/vits/wavfile.py +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/setup.cfg +0 -0
- {phoonnx-0.0.1a1 → phoonnx-0.0.2a1}/setup.py +0 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
[](https://deepwiki.com/TigreGotico/phoonnx)
|
2
|
+
|
3
|
+
# Phoonnx
|
4
|
+
|
5
|
+
A Python library for multilingual phonemization and Text-to-Speech (TTS) using ONNX models.
|
6
|
+
|
7
|
+
## Introduction
|
8
|
+
|
9
|
+
`phoonnx` is a comprehensive toolkit for performing high-quality, efficient TTS inference using ONNX-compatible models. It provides a flexible framework for text normalization, phonemization, and speech synthesis, with built-in support for multiple languages and phonemic alphabets. The library is also designed to work with models trained using `phoonnx_train`, including utilities for dataset preprocessing and exporting models to the ONNX format.
|
10
|
+
|
11
|
+
## Features
|
12
|
+
|
13
|
+
- **Efficient Inference:** Leverages `onnxruntime` for fast and efficient TTS synthesis.
|
14
|
+
- **Multilingual Support:** Supports a wide range of languages and phonemic alphabets, including IPA, ARPA, Hangul (Korean), and Pinyin (Chinese).
|
15
|
+
- **Multiple Phonemizers:** Integrates with various phonemizers like eSpeak, Gruut, and Epitran to convert text to phonemes.
|
16
|
+
- **Advanced Text Normalization:** Includes robust utilities for expanding contractions and pronouncing numbers and dates.
|
17
|
+
- **Dataset Preprocessing:** Provides a command-line tool to prepare LJSpeech-style datasets for training.
|
18
|
+
- **Model Export:** A script is included to convert trained models into the ONNX format, ready for deployment.
|
19
|
+
|
20
|
+
## Installation
|
21
|
+
|
22
|
+
As `phoonnx` is available on PyPI, you can install it using pip.
|
23
|
+
|
24
|
+
```bash
|
25
|
+
pip install phoonnx
|
26
|
+
```
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
### Synthesizing Speech
|
31
|
+
|
32
|
+
The main component for inference is the `TTSVoice` class. You can load a model and synthesize speech from text as follows:
|
33
|
+
|
34
|
+
```python
|
35
|
+
from phoonnx.config import VoiceConfig, SynthesisConfig
|
36
|
+
from phoonnx.voice import TTSVoice
|
37
|
+
|
38
|
+
# Load a pre-trained ONNX model and its configuration
|
39
|
+
# Assume 'model.onnx' and 'config.json' are available
|
40
|
+
voice = TTSVoice.load("model.onnx", "config.json")
|
41
|
+
|
42
|
+
# Configure the synthesis parameters (optional)
|
43
|
+
synthesis_config = SynthesisConfig(
|
44
|
+
noise_scale=0.667,
|
45
|
+
length_scale=1.0,
|
46
|
+
noise_w_scale=0.8
|
47
|
+
)
|
48
|
+
|
49
|
+
# Synthesize audio from text
|
50
|
+
text = "Hello, this is a test of the phoonnx library."
|
51
|
+
audio_chunk = voice.synthesize(text, synthesis_config=synthesis_config)
|
52
|
+
|
53
|
+
# Save the audio to a WAV file
|
54
|
+
audio_chunk.write_wav("output.wav")
|
55
|
+
```
|
56
|
+
|
57
|
+
### Preprocessing Datasets
|
58
|
+
|
59
|
+
Use the `preprocess.py` script to prepare your audio and text data for training:
|
60
|
+
|
61
|
+
```bash
|
62
|
+
python phoonnx_train/preprocess.py --dataset-dir /path/to/my/dataset --output-dir /path/to/output
|
63
|
+
```
|
64
|
+
|
65
|
+
### Exporting Models
|
66
|
+
|
67
|
+
After training, you can export a PyTorch Lightning checkpoint (`.ckpt`) to an ONNX model:
|
68
|
+
|
69
|
+
```bash
|
70
|
+
python phoonnx_train/export_onnx.py /path/to/my/model.ckpt output.onnx
|
71
|
+
```
|
72
|
+
|
73
|
+
This script will convert the model to an ONNX file with an `opset_version` of 15.
|
@@ -567,15 +567,18 @@ def _normalize_units(text: str, full_lang: str) -> str:
|
|
567
567
|
symbolic_pattern = re.compile(number_pattern_str + r"\s*(" + symbolic_pattern_str + r")", re.IGNORECASE)
|
568
568
|
|
569
569
|
def replace_symbolic(match):
|
570
|
-
|
570
|
+
number = match.group(1)
|
571
571
|
# Remove thousands separator and replace decimal separator for parsing
|
572
|
-
number
|
572
|
+
if thousands_separator in number and decimal_separator in number:
|
573
|
+
number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
|
574
|
+
elif decimal_separator != "." and decimal_separator in number:
|
575
|
+
number = number.replace(decimal_separator, ".")
|
573
576
|
unit_symbol = match.group(2)
|
574
577
|
unit_word = symbolic_units[unit_symbol]
|
575
578
|
try:
|
576
|
-
return f"{pronounce_number(float(number), full_lang)} {unit_word}"
|
579
|
+
return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
|
577
580
|
except Exception as e:
|
578
|
-
LOG.error(f"Failed to pronounce number with unit: {
|
581
|
+
LOG.error(f"Failed to pronounce number with unit: {number}{unit_symbol} - ({e})")
|
579
582
|
return match.group(0)
|
580
583
|
text = symbolic_pattern.sub(replace_symbolic, text)
|
581
584
|
|
@@ -588,12 +591,15 @@ def _normalize_units(text: str, full_lang: str) -> str:
|
|
588
591
|
re.IGNORECASE)
|
589
592
|
|
590
593
|
def replace_alphanumeric(match):
|
591
|
-
|
594
|
+
number = match.group(1)
|
592
595
|
# Remove thousands separator and replace decimal separator for parsing
|
593
|
-
number
|
596
|
+
if thousands_separator in number and decimal_separator in number:
|
597
|
+
number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
|
598
|
+
elif decimal_separator != "." and decimal_separator in number:
|
599
|
+
number = number.replace(decimal_separator, ".")
|
594
600
|
unit_symbol = match.group(2)
|
595
601
|
unit_word = alphanumeric_units[unit_symbol]
|
596
|
-
return f"{pronounce_number(float(number), full_lang)} {unit_word}"
|
602
|
+
return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
|
597
603
|
|
598
604
|
text = alphanumeric_pattern.sub(replace_alphanumeric, text)
|
599
605
|
return text
|
@@ -667,7 +673,8 @@ if __name__ == "__main__":
|
|
667
673
|
|
668
674
|
# General normalization examples
|
669
675
|
print("General English example: " + normalize('I\'m Dr. Prof. 3/3 0.5% of 12345€, 5ft, and 10kg', 'en'))
|
670
|
-
print(f"
|
676
|
+
print(f"Word Salad Portuguese (Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg', 'pt')}")
|
677
|
+
print(f"Word Salad Portuguese (Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
|
671
678
|
|
672
679
|
# Portuguese examples with comma decimal separator
|
673
680
|
print("\n--- Portuguese Decimal Separator Examples ---")
|
@@ -691,10 +698,10 @@ if __name__ == "__main__":
|
|
691
698
|
|
692
699
|
# Portuguese dates and times
|
693
700
|
print("\n--- Portuguese Date & Time Examples ---")
|
694
|
-
print(f"Portuguese date (
|
695
|
-
print(f"Portuguese ambiguous date (
|
696
|
-
print(f"Portuguese date with dashes: {normalize('O evento é no dia 25-10-2024', 'pt')}")
|
697
|
-
print(f"Portuguese military time: {normalize('O encontro é às 14h30', 'pt')}")
|
701
|
+
print(f"Portuguese date (A data é 03/08/2025): {normalize('A data é 03/08/2025', 'pt')}")
|
702
|
+
print(f"Portuguese ambiguous date (O relatório é para 15/05/2025): {normalize('O relatório é para 15/05/2025', 'pt')}")
|
703
|
+
print(f"Portuguese date with dashes (O evento é no dia 25-10-2024): {normalize('O evento é no dia 25-10-2024', 'pt')}")
|
704
|
+
print(f"Portuguese military time (O encontro é às 14h30): {normalize('O encontro é às 14h30', 'pt')}")
|
698
705
|
|
699
706
|
# Other examples
|
700
707
|
print(f"\n--- Other Examples ---")
|
phoonnx-0.0.1a1/README.md
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|