PyPI - phoonnx - Versions diffs - 0.0.1a1__tar.gz → 0.0.2a2__tar.gz - Mend

phoonnx 0.0.1a1tar.gz → 0.0.2a2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: phoonnx
-Version: 0.0.1a1
+Version: 0.0.2a2
 Home-page: https://github.com/TigreGotico/phoonnx
 Author: JarbasAi
 Author-email: jarbasai@mailfence.com
@@ -71,6 +71,7 @@ Provides-Extra: got
 Provides-Extra: tpi
 Provides-Extra: pt
 Provides-Extra: ckb
+Provides-Extra: train
 Provides-Extra: lsm
 Provides-Extra: fa
 Provides-Extra: av

phoonnx-0.0.2a2/README.md ADDED Viewed

@@ -0,0 +1,73 @@
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TigreGotico/phoonnx)
+# Phoonnx
+A Python library for multilingual phonemization and Text-to-Speech (TTS) using ONNX models.
+## Introduction
+`phoonnx` is a comprehensive toolkit for performing high-quality, efficient TTS inference using ONNX-compatible models. It provides a flexible framework for text normalization, phonemization, and speech synthesis, with built-in support for multiple languages and phonemic alphabets. The library is also designed to work with models trained using `phoonnx_train`, including utilities for dataset preprocessing and exporting models to the ONNX format.
+## Features
+  - **Efficient Inference:** Leverages `onnxruntime` for fast and efficient TTS synthesis.
+  - **Multilingual Support:** Supports a wide range of languages and phonemic alphabets, including IPA, ARPA, Hangul (Korean), and Pinyin (Chinese).
+  - **Multiple Phonemizers:** Integrates with various phonemizers like eSpeak, Gruut, and Epitran to convert text to phonemes.
+  - **Advanced Text Normalization:** Includes robust utilities for expanding contractions and pronouncing numbers and dates.
+  - **Dataset Preprocessing:** Provides a command-line tool to prepare LJSpeech-style datasets for training.
+  - **Model Export:** A script is included to convert trained models into the ONNX format, ready for deployment.
+## Installation
+As `phoonnx` is available on PyPI, you can install it using pip.
+```bash
+pip install phoonnx
+```
+## Usage
+### Synthesizing Speech
+The main component for inference is the `TTSVoice` class. You can load a model and synthesize speech from text as follows:
+```python
+from phoonnx.config import VoiceConfig, SynthesisConfig
+from phoonnx.voice import TTSVoice
+# Load a pre-trained ONNX model and its configuration
+# Assume 'model.onnx' and 'config.json' are available
+voice = TTSVoice.load("model.onnx", "config.json")
+# Configure the synthesis parameters (optional)
+synthesis_config = SynthesisConfig(
+    noise_scale=0.667,
+    length_scale=1.0,
+    noise_w_scale=0.8
+)
+# Synthesize audio from text
+text = "Hello, this is a test of the phoonnx library."
+audio_chunk = voice.synthesize(text, synthesis_config=synthesis_config)
+# Save the audio to a WAV file
+audio_chunk.write_wav("output.wav")
+```
+### Preprocessing Datasets
+Use the `preprocess.py` script to prepare your audio and text data for training:
+```bash
+python phoonnx_train/preprocess.py --dataset-dir /path/to/my/dataset --output-dir /path/to/output
+```
+### Exporting Models
+After training, you can export a PyTorch Lightning checkpoint (`.ckpt`) to an ONNX model:
+```bash
+python phoonnx_train/export_onnx.py /path/to/my/model.ckpt output.onnx
+```
+This script will convert the model to an ONNX file with an `opset_version` of 15.

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx/config.py RENAMED Viewed

@@ -6,7 +6,6 @@ from phoonnx.phoneme_ids import (load_phoneme_ids, BlankBetween,
                                  DEFAULT_BLANK_WORD_TOKEN, DEFAULT_BLANK_TOKEN,
                                  DEFAULT_PAD_TOKEN, DEFAULT_BOS_TOKEN, DEFAULT_EOS_TOKEN)
 DEFAULT_NOISE_SCALE = 0.667
 DEFAULT_LENGTH_SCALE = 1.0
 DEFAULT_NOISE_W_SCALE = 0.8
@@ -22,6 +21,8 @@ class Alphabet(str, Enum):
     UNICODE = "unicode"
     IPA = "ipa"
     ARPA = "arpa" # en
+    SAMPA = "sampa"
+    XSAMPA = "x-sampa"
     HANGUL = "hangul" # ko
     KANA = "kana" # ja
     HIRA = "hira" # ja
@@ -32,6 +33,7 @@ class Alphabet(str, Enum):
     ERAAB = "eraab" # fa
     COTOVIA = "cotovia" # gl
     HANZI = "hanzi" # zh
+    MANTOQ = "mantoq" # ar

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx/phonemizers/ar.py RENAMED Viewed

@@ -5,7 +5,7 @@ from phoonnx.config import Alphabet
 class MantoqPhonemizer(BasePhonemizer):
     def __init__(self):
-        super().__init__(Alphabet.IPA)
+        super().__init__(Alphabet.MANTOQ)
     @classmethod
     def get_lang(cls, target_lang: str) -> str:

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx/phonemizers/gl.py RENAMED Viewed

@@ -12,6 +12,57 @@ class CotoviaError(Exception):
     pass
+COTOVIA2IPA = {
+    "pau": " ",
+    "a": "a",
+    "E": "ɛ",
+    "e": "e",
+    "i": "i",
+    "j": "j",
+    "O": "ɔ",
+    "o": "o",
+    "u": "u",
+    "w": "w",
+    "p": "p",
+    "b": "b",
+    "B": "β",
+    "t": "t",
+    "d": "d",
+    "D": "ð",
+    "k": "k",
+    "g": "g",
+    "G": "ɣ",
+    "f": "f",
+    "T": "θ",
+    "s": "s",
+    "S": "ʃ",
+    "tS": "tʃ",
+    "m": "m",
+    "n": "n",
+    "J": "ɲ",
+    "N": "ŋ",
+    "l": "l",
+    "Z": "ʎ",
+    "jj": "ʎ",
+    "L": "ʎ",
+    "r": "ɾ",
+    "rr": "r",
+    "X": "x"
+}
+def cotovia2ipa(text: str) -> str:
+    """
+    Converts a string of Cotovía phonemes to IPA.
+    """
+    # Sort the dictionary keys by length in descending order to handle multi-character phonemes first
+    sorted_cotovia_keys = sorted(COTOVIA2IPA.keys(), key=len, reverse=True)
+    ipa_str = text
+    for cotovia_char in sorted_cotovia_keys:
+        ipa_str = ipa_str.replace(cotovia_char, COTOVIA2IPA[cotovia_char])
+    return ipa_str
 class CotoviaPhonemizer(BasePhonemizer):
     """
     A phonemizer class that uses the Cotovia TTS binary to convert text into phonemes.
@@ -19,7 +70,7 @@ class CotoviaPhonemizer(BasePhonemizer):
     regular expression transformations to clean and normalize the phonetic representation.
     """
-    def __init__(self, cotovia_bin_path: Optional[str] = None):
+    def __init__(self, cotovia_bin_path: Optional[str] = None, alphabet: Alphabet = Alphabet.IPA):
         """
         Initializes the CotoviaPhonemizer.
@@ -31,7 +82,7 @@ class CotoviaPhonemizer(BasePhonemizer):
         if not os.path.exists(self.cotovia_bin):
             raise FileNotFoundError(f"Cotovia binary not found at {self.cotovia_bin}. "
                                     "Please ensure it's installed or provide the correct path.")
-        super().__init__(Alphabet.COTOVIA)
+        super().__init__(alphabet)
     @classmethod
     def get_lang(cls, target_lang: str) -> str:
@@ -127,6 +178,8 @@ class CotoviaPhonemizer(BasePhonemizer):
         # substitute ' ( text )' to ', text,'
         str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \\2,", str_ext)
+        if self.alphabet == Alphabet.IPA:
+            return cotovia2ipa(str_ext)
         return str_ext
@@ -138,5 +191,5 @@ if __name__ == "__main__":
     lang = "gl"
     text_gl = "Este é un sistema de conversión de texto a voz en lingua galega baseado en redes neuronais artificiais. Ten en conta que as funcionalidades incluídas nesta páxina ofrécense unicamente con fins de demostración. Se tes algún comentario, suxestión ou detectas algún problema durante a demostración, ponte en contacto connosco."
     print(f"\n--- Getting phonemes for '{text_gl}' (Cotovia) ---")
-    phonemes_cotovia = cotovia.phonemize(text_gl, lang)
+    phonemes_cotovia = cotovia.phonemize_string(text_gl, lang)
     print(f"  Cotovia Phonemes: {phonemes_cotovia}")

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx/util.py RENAMED Viewed

@@ -567,15 +567,18 @@ def _normalize_units(text: str, full_lang: str) -> str:
             symbolic_pattern = re.compile(number_pattern_str + r"\s*(" + symbolic_pattern_str + r")", re.IGNORECASE)
             def replace_symbolic(match):
-                number_str = match.group(1)
+                number = match.group(1)
                 # Remove thousands separator and replace decimal separator for parsing
-                number = number_str.replace(thousands_separator, "").replace(decimal_separator, ".")
+                if thousands_separator in number and decimal_separator in number:
+                    number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
+                elif decimal_separator != "." and decimal_separator in number:
+                    number = number.replace(decimal_separator, ".")
                 unit_symbol = match.group(2)
                 unit_word = symbolic_units[unit_symbol]
                 try:
-                    return f"{pronounce_number(float(number), full_lang)} {unit_word}"
+                    return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
                 except Exception as e:
-                    LOG.error(f"Failed to pronounce number with unit: {number_str}{unit_symbol} - ({e})")
+                    LOG.error(f"Failed to pronounce number with unit: {number}{unit_symbol} - ({e})")
                     return match.group(0)
             text = symbolic_pattern.sub(replace_symbolic, text)
@@ -588,12 +591,15 @@ def _normalize_units(text: str, full_lang: str) -> str:
                                               re.IGNORECASE)
             def replace_alphanumeric(match):
-                number_str = match.group(1)
+                number = match.group(1)
                 # Remove thousands separator and replace decimal separator for parsing
-                number = number_str.replace(thousands_separator, "").replace(decimal_separator, ".")
+                if thousands_separator in number and decimal_separator in number:
+                    number = number.replace(thousands_separator, "").replace(decimal_separator, ".")
+                elif decimal_separator != "." and decimal_separator in number:
+                    number = number.replace(decimal_separator, ".")
                 unit_symbol = match.group(2)
                 unit_word = alphanumeric_units[unit_symbol]
-                return f"{pronounce_number(float(number), full_lang)} {unit_word}"
+                return f"{pronounce_number(float(number) if '.' in number else int(number), full_lang)} {unit_word}"
             text = alphanumeric_pattern.sub(replace_alphanumeric, text)
     return text
@@ -667,7 +673,8 @@ if __name__ == "__main__":
     # General normalization examples
     print("General English example: " + normalize('I\'m Dr. Prof. 3/3 0.5% of 12345€, 5ft, and 10kg', 'en'))
-    print(f"General Portuguese example: {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
+    print(f"Word Salad Portuguese (Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0,5% de 12345€, 5m, e 10kg', 'pt')}")
+    print(f"Word Salad Portuguese (Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg): {normalize('Dr. Prof. 3/3 0.5% de 12345€, 5m, e 10kg', 'pt')}")
     # Portuguese examples with comma decimal separator
     print("\n--- Portuguese Decimal Separator Examples ---")
@@ -691,10 +698,10 @@ if __name__ == "__main__":
     # Portuguese dates and times
     print("\n--- Portuguese Date & Time Examples ---")
-    print(f"Portuguese date (DMY format): {normalize('A data é 03/08/2025', 'pt')}")
-    print(f"Portuguese ambiguous date (DMY assumed): {normalize('O relatório é para 15/05/2025', 'pt')}")
-    print(f"Portuguese date with dashes: {normalize('O evento é no dia 25-10-2024', 'pt')}")
-    print(f"Portuguese military time: {normalize('O encontro é às 14h30', 'pt')}")
+    print(f"Portuguese date (A data é 03/08/2025): {normalize('A data é 03/08/2025', 'pt')}")
+    print(f"Portuguese ambiguous date (O relatório é para 15/05/2025): {normalize('O relatório é para 15/05/2025', 'pt')}")
+    print(f"Portuguese date with dashes (O evento é no dia 25-10-2024): {normalize('O evento é no dia 25-10-2024', 'pt')}")
+    print(f"Portuguese military time (O encontro é às 14h30): {normalize('O encontro é às 14h30', 'pt')}")
     # Other examples
     print(f"\n--- Other Examples ---")

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx/version.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # START_VERSION_BLOCK
 VERSION_MAJOR = 0
 VERSION_MINOR = 0
-VERSION_BUILD = 1
-VERSION_ALPHA = 1
+VERSION_BUILD = 2
+VERSION_ALPHA = 2
 # END_VERSION_BLOCK

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: phoonnx
-Version: 0.0.1a1
+Version: 0.0.2a2
 Home-page: https://github.com/TigreGotico/phoonnx
 Author: JarbasAi
 Author-email: jarbasai@mailfence.com
@@ -71,6 +71,7 @@ Provides-Extra: got
 Provides-Extra: tpi
 Provides-Extra: pt
 Provides-Extra: ckb
+Provides-Extra: train
 Provides-Extra: lsm
 Provides-Extra: fa
 Provides-Extra: av

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx.egg-info/requires.txt RENAMED Viewed

@@ -2,7 +2,7 @@ numpy
 onnxruntime
 quebra-frases
 langcodes
-ovos-number-parser>=0.3.3a1
+ovos-number-parser>=0.4.0
 ovos-date-parser>=0.6.4a1
 [aa]
@@ -300,6 +300,13 @@ epitran
 [tr]
 epitran
+[train]
+cython<1,>=0.29.0
+librosa<1,>=0.9.2
+numpy<2,>=1.19.0
+pytorch-lightning<2.0
+torch<2,>=1.11.0
 [uew]
 epitran

{phoonnx-0.0.1a1 → phoonnx-0.0.2a2}/phoonnx_train/vits/lightning.py RENAMED Viewed

@@ -299,9 +299,9 @@ class VitsModel(pl.LightningModule):
             test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max())))
             tag = test_utt.text or str(utt_idx)
-            self.logger.experiment.add_audio(
-                tag, test_audio, sample_rate=self.hparams.sample_rate
-            )
+           # self.logger.experiment.add_audio(
+           #     tag, test_audio, sample_rate=self.hparams.sample_rate
+           # )
         return val_loss