phoonnx 0.2.1a1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/PKG-INFO +1 -1
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/version.py +2 -2
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx.egg-info/PKG-INFO +1 -1
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/preprocess.py +58 -14
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/train.py +2 -2
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/README.md +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/config.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phoneme_ids.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/ar.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/base.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/en.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/fa.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/gl.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/he.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/ja.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/ko.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/mul.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/mwl.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/vi.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/phonemizers/zh.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/arpa2ipa.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/bw2ipa.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/hangul2ipa.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/phonikud/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/thirdparty/zh_num.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/util.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx/voice.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx.egg-info/SOURCES.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx.egg-info/dependency_links.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx.egg-info/requires.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx.egg-info/top_level.txt +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/export_onnx.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/norm_audio/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/norm_audio/trim.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/norm_audio/vad.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/attentions.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/commons.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/config.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/dataset.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/lightning.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/losses.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/mel_processing.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/models.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/modules.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/transforms.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/utils.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/phoonnx_train/vits/wavfile.py +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/setup.cfg +0 -0
- {phoonnx-0.2.1a1 → phoonnx-0.2.3}/setup.py +0 -0
@@ -218,7 +218,8 @@ def phonemize_worker(
|
|
218
218
|
utterance = phonemizer.add_diacritics(utterance, config.language)
|
219
219
|
|
220
220
|
# Phonemize the text
|
221
|
-
utt.phonemes = phonemizer.phonemize_to_list(utterance, config.language)
|
221
|
+
utt.phonemes = [p for p in phonemizer.phonemize_to_list(utterance, config.language)
|
222
|
+
if p != "\n"] # HACK: not sure where this is coming from
|
222
223
|
if not utt.phonemes:
|
223
224
|
raise RuntimeError(f"Phonemes not found for '{utterance}'")
|
224
225
|
|
@@ -268,13 +269,28 @@ def phonemize_worker(
|
|
268
269
|
required=True,
|
269
270
|
help="phonemizer language code (e.g., 'en', 'es', 'fr')",
|
270
271
|
)
|
272
|
+
@click.option(
|
273
|
+
"-c",
|
274
|
+
"--prev-config",
|
275
|
+
"prev_config",
|
276
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
277
|
+
default=None,
|
278
|
+
help="Optional path to a previous config.json from which to reuse phoneme_id_map. (for fine-tuning only)",
|
279
|
+
)
|
280
|
+
@click.option(
|
281
|
+
"--drop-extra-phonemes",
|
282
|
+
"drop_extra_phonemes",
|
283
|
+
type=bool,
|
284
|
+
default=True,
|
285
|
+
help="If training data has more symbols than base model, discard new symbols. (for fine-tuning only)",
|
286
|
+
)
|
271
287
|
@click.option(
|
272
288
|
"-r",
|
273
289
|
"--sample-rate",
|
274
290
|
"sample_rate",
|
275
291
|
type=int,
|
276
|
-
|
277
|
-
help="Target sample rate for voice (hertz,
|
292
|
+
default=22050,
|
293
|
+
help="Target sample rate for voice (hertz, Default: 22050)",
|
278
294
|
)
|
279
295
|
@click.option(
|
280
296
|
"--cache-dir",
|
@@ -365,6 +381,8 @@ def cli(
|
|
365
381
|
input_dir: Path,
|
366
382
|
output_dir: Path,
|
367
383
|
language: str,
|
384
|
+
prev_config: Path,
|
385
|
+
drop_extra_phonemes: bool,
|
368
386
|
sample_rate: int,
|
369
387
|
cache_dir: Optional[Path],
|
370
388
|
max_workers: Optional[int],
|
@@ -493,23 +511,49 @@ def cli(
|
|
493
511
|
for proc in processes:
|
494
512
|
proc.join()
|
495
513
|
|
496
|
-
# --- Build the final phoneme map from the collected phonemes ---
|
497
|
-
_LOGGER.info("Building a complete phoneme map from collected phonemes...")
|
498
514
|
|
499
|
-
|
500
|
-
|
501
|
-
|
515
|
+
# --- Build the final phoneme map from the collected phonemes ---
|
516
|
+
_LOGGER.info("Building a phoneme map from collected dataset phonemes...")
|
517
|
+
|
518
|
+
if prev_config:
|
519
|
+
with open(prev_config) as f:
|
520
|
+
prev_phoneme_id_map = json.load(f)["phoneme_id_map"]
|
521
|
+
_LOGGER.info(f"Loaded phoneme map from previous config: '{prev_config}'")
|
522
|
+
all_phonemes.update(prev_phoneme_id_map.keys())
|
523
|
+
final_phoneme_id_map = prev_phoneme_id_map
|
524
|
+
_LOGGER.info("previous phoneme map contains %d symbols.", len(final_phoneme_id_map))
|
525
|
+
else:
|
526
|
+
final_phoneme_id_map: Dict[str, int] = DEFAULT_SPECIAL_PHONEME_ID_MAP.copy()
|
527
|
+
if phonemizer.alphabet == Alphabet.IPA:
|
528
|
+
all_phonemes.update(DEFAULT_IPA_PHONEME_ID_MAP.keys())
|
502
529
|
|
503
|
-
# Filter out
|
530
|
+
# Filter out tokens that are already in the map
|
504
531
|
existing_keys: Set[str] = set(final_phoneme_id_map.keys())
|
505
|
-
new_phonemes: List[str] = sorted([p for p in all_phonemes
|
532
|
+
new_phonemes: List[str] = sorted([p for p in all_phonemes
|
533
|
+
if p not in existing_keys]
|
534
|
+
)
|
535
|
+
|
536
|
+
_LOGGER.info("Collected %d new symbols.", len(new_phonemes))
|
537
|
+
|
538
|
+
finetune_error = prev_config and len(new_phonemes)
|
539
|
+
if finetune_error:
|
540
|
+
if not drop_extra_phonemes:
|
541
|
+
raise ValueError("training data contains different phonemes than previous phoneme map! Can not finetune model")
|
542
|
+
else:
|
543
|
+
_LOGGER.error("training data contains different phonemes than previous phoneme map! "
|
544
|
+
"Discarding new phonemes to still allow model finetuning")
|
506
545
|
|
507
546
|
current_id: int = len(final_phoneme_id_map)
|
508
547
|
for pho in new_phonemes:
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
548
|
+
if finetune_error:
|
549
|
+
_LOGGER.info(f"Discarded phoneme: {pho}")
|
550
|
+
else:
|
551
|
+
final_phoneme_id_map[pho] = current_id
|
552
|
+
current_id += 1
|
553
|
+
_LOGGER.debug(f"New phoneme: {pho}")
|
554
|
+
|
555
|
+
if new_phonemes:
|
556
|
+
_LOGGER.info("Final phoneme map contains %d symbols.", len(final_phoneme_id_map))
|
513
557
|
|
514
558
|
# --- Write the final config.json ---
|
515
559
|
_LOGGER.info("Writing dataset config...")
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from pathlib import Path
|
4
|
-
|
4
|
+
import os
|
5
5
|
import torch
|
6
6
|
import click
|
7
7
|
from pytorch_lightning import Trainer
|
@@ -42,7 +42,7 @@ def load_state_dict(model, saved_state_dict):
|
|
42
42
|
# Model-specific arguments
|
43
43
|
@click.option('--learning-rate', type=float, default=2e-4, help='Learning rate for optimizer (default: 2e-4)')
|
44
44
|
@click.option('--batch-size', type=int, default=16, help='Training batch size (default: 16)')
|
45
|
-
@click.option('--num-workers', type=click.IntRange(min=1), default=1, help='Number of data loader workers (default:
|
45
|
+
@click.option('--num-workers', type=click.IntRange(min=1), default=os.cpu_count() or 1, help='Number of data loader workers (default: CPU count)')
|
46
46
|
@click.option('--validation-split', type=float, default=0.05, help='Proportion of data used for validation (default: 0.05)')
|
47
47
|
def main(
|
48
48
|
dataset_dir,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|