phoonnx 0.2.1a1__tar.gz → 0.2.3a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/PKG-INFO +1 -1
  2. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/version.py +1 -1
  3. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx.egg-info/PKG-INFO +1 -1
  4. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/preprocess.py +58 -14
  5. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/train.py +2 -2
  6. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/README.md +0 -0
  7. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/__init__.py +0 -0
  8. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/config.py +0 -0
  9. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/locale/ca/phonetic_spellings.txt +0 -0
  10. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/locale/en/phonetic_spellings.txt +0 -0
  11. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/locale/gl/phonetic_spellings.txt +0 -0
  12. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/locale/pt/phonetic_spellings.txt +0 -0
  13. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phoneme_ids.py +0 -0
  14. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/__init__.py +0 -0
  15. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/ar.py +0 -0
  16. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/base.py +0 -0
  17. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/en.py +0 -0
  18. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/fa.py +0 -0
  19. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/gl.py +0 -0
  20. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/he.py +0 -0
  21. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/ja.py +0 -0
  22. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/ko.py +0 -0
  23. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/mul.py +0 -0
  24. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/mwl.py +0 -0
  25. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/vi.py +0 -0
  26. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/phonemizers/zh.py +0 -0
  27. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/__init__.py +0 -0
  28. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/arpa2ipa.py +0 -0
  29. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/bw2ipa.py +0 -0
  30. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  31. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  32. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/hangul2ipa.py +0 -0
  33. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/aspiration.csv +0 -0
  34. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/assimilation.csv +0 -0
  35. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/double_coda.csv +0 -0
  36. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/hanja.tsv +0 -0
  37. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/ipa.csv +0 -0
  38. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/neutralization.csv +0 -0
  39. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/tensification.csv +0 -0
  40. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/ko_tables/yale.csv +0 -0
  41. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/kog2p/__init__.py +0 -0
  42. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/kog2p/rulebook.txt +0 -0
  43. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/__init__.py +0 -0
  44. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  45. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +0 -0
  46. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/buck/symbols.py +0 -0
  47. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/buck/tokenization.py +0 -0
  48. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/num2words.py +0 -0
  49. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/__init__.py +0 -0
  50. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +0 -0
  51. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/araby.py +0 -0
  52. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/named_const.py +0 -0
  53. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/normalize.py +0 -0
  54. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/number.py +0 -0
  55. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/number_const.py +0 -0
  56. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/stack.py +0 -0
  57. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/pyarabic/trans.py +0 -0
  58. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/mantoq/unicode_symbol2label.py +0 -0
  59. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/phonikud/__init__.py +0 -0
  60. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/LICENSE +0 -0
  61. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/SOURCE +0 -0
  62. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/__init__.py +0 -0
  63. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/hint_id_map.json +0 -0
  64. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/input_id_map.json +0 -0
  65. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  66. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/tashkeel/target_id_map.json +0 -0
  67. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/thirdparty/zh_num.py +0 -0
  68. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/util.py +0 -0
  69. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx/voice.py +0 -0
  70. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx.egg-info/SOURCES.txt +0 -0
  71. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx.egg-info/dependency_links.txt +0 -0
  72. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx.egg-info/requires.txt +0 -0
  73. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx.egg-info/top_level.txt +0 -0
  74. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/export_onnx.py +0 -0
  75. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/norm_audio/__init__.py +0 -0
  76. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/norm_audio/trim.py +0 -0
  77. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/norm_audio/vad.py +0 -0
  78. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/__init__.py +0 -0
  79. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/attentions.py +0 -0
  80. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/commons.py +0 -0
  81. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/config.py +0 -0
  82. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/dataset.py +0 -0
  83. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/lightning.py +0 -0
  84. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/losses.py +0 -0
  85. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/mel_processing.py +0 -0
  86. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/models.py +0 -0
  87. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/modules.py +0 -0
  88. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/monotonic_align/__init__.py +0 -0
  89. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/monotonic_align/setup.py +0 -0
  90. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/transforms.py +0 -0
  91. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/utils.py +0 -0
  92. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/phoonnx_train/vits/wavfile.py +0 -0
  93. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/setup.cfg +0 -0
  94. {phoonnx-0.2.1a1 → phoonnx-0.2.3a1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.2.1a1
3
+ Version: 0.2.3a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -1,7 +1,7 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 2
4
- VERSION_BUILD = 1
4
+ VERSION_BUILD = 3
5
5
  VERSION_ALPHA = 1
6
6
  # END_VERSION_BLOCK
7
7
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.2.1a1
3
+ Version: 0.2.3a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -218,7 +218,8 @@ def phonemize_worker(
218
218
  utterance = phonemizer.add_diacritics(utterance, config.language)
219
219
 
220
220
  # Phonemize the text
221
- utt.phonemes = phonemizer.phonemize_to_list(utterance, config.language)
221
+ utt.phonemes = [p for p in phonemizer.phonemize_to_list(utterance, config.language)
222
+ if p != "\n"] # HACK: not sure where this is coming from
222
223
  if not utt.phonemes:
223
224
  raise RuntimeError(f"Phonemes not found for '{utterance}'")
224
225
 
@@ -268,13 +269,28 @@ def phonemize_worker(
268
269
  required=True,
269
270
  help="phonemizer language code (e.g., 'en', 'es', 'fr')",
270
271
  )
272
+ @click.option(
273
+ "-c",
274
+ "--prev-config",
275
+ "prev_config",
276
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
277
+ default=None,
278
+ help="Optional path to a previous config.json from which to reuse phoneme_id_map. (for fine-tuning only)",
279
+ )
280
+ @click.option(
281
+ "--drop-extra-phonemes",
282
+ "drop_extra_phonemes",
283
+ type=bool,
284
+ default=True,
285
+ help="If training data has more symbols than base model, discard new symbols. (for fine-tuning only)",
286
+ )
271
287
  @click.option(
272
288
  "-r",
273
289
  "--sample-rate",
274
290
  "sample_rate",
275
291
  type=int,
276
- required=True,
277
- help="Target sample rate for voice (hertz, e.g., 22050)",
292
+ default=22050,
293
+ help="Target sample rate for voice (hertz, Default: 22050)",
278
294
  )
279
295
  @click.option(
280
296
  "--cache-dir",
@@ -365,6 +381,8 @@ def cli(
365
381
  input_dir: Path,
366
382
  output_dir: Path,
367
383
  language: str,
384
+ prev_config: Path,
385
+ drop_extra_phonemes: bool,
368
386
  sample_rate: int,
369
387
  cache_dir: Optional[Path],
370
388
  max_workers: Optional[int],
@@ -493,23 +511,49 @@ def cli(
493
511
  for proc in processes:
494
512
  proc.join()
495
513
 
496
- # --- Build the final phoneme map from the collected phonemes ---
497
- _LOGGER.info("Building a complete phoneme map from collected phonemes...")
498
514
 
499
- final_phoneme_id_map: Dict[str, int] = DEFAULT_SPECIAL_PHONEME_ID_MAP.copy()
500
- if phonemizer.alphabet == Alphabet.IPA:
501
- all_phonemes.update(DEFAULT_IPA_PHONEME_ID_MAP.keys())
515
+ # --- Build the final phoneme map from the collected phonemes ---
516
+ _LOGGER.info("Building a phoneme map from collected dataset phonemes...")
517
+
518
+ if prev_config:
519
+ with open(prev_config) as f:
520
+ prev_phoneme_id_map = json.load(f)["phoneme_id_map"]
521
+ _LOGGER.info(f"Loaded phoneme map from previous config: '{prev_config}'")
522
+ all_phonemes.update(prev_phoneme_id_map.keys())
523
+ final_phoneme_id_map = prev_phoneme_id_map
524
+ _LOGGER.info("previous phoneme map contains %d symbols.", len(final_phoneme_id_map))
525
+ else:
526
+ final_phoneme_id_map: Dict[str, int] = DEFAULT_SPECIAL_PHONEME_ID_MAP.copy()
527
+ if phonemizer.alphabet == Alphabet.IPA:
528
+ all_phonemes.update(DEFAULT_IPA_PHONEME_ID_MAP.keys())
502
529
 
503
- # Filter out special tokens that are already in the map
530
+ # Filter out tokens that are already in the map
504
531
  existing_keys: Set[str] = set(final_phoneme_id_map.keys())
505
- new_phonemes: List[str] = sorted([p for p in all_phonemes if p not in existing_keys])
532
+ new_phonemes: List[str] = sorted([p for p in all_phonemes
533
+ if p not in existing_keys]
534
+ )
535
+
536
+ _LOGGER.info("Collected %d new symbols.", len(new_phonemes))
537
+
538
+ finetune_error = prev_config and len(new_phonemes)
539
+ if finetune_error:
540
+ if not drop_extra_phonemes:
541
+ raise ValueError("training data contains different phonemes than previous phoneme map! Can not finetune model")
542
+ else:
543
+ _LOGGER.error("training data contains different phonemes than previous phoneme map! "
544
+ "Discarding new phonemes to still allow model finetuning")
506
545
 
507
546
  current_id: int = len(final_phoneme_id_map)
508
547
  for pho in new_phonemes:
509
- final_phoneme_id_map[pho] = current_id
510
- current_id += 1
511
-
512
- _LOGGER.info("Final phoneme map contains %d symbols.", len(final_phoneme_id_map))
548
+ if finetune_error:
549
+ _LOGGER.info(f"Discarded phoneme: {pho}")
550
+ else:
551
+ final_phoneme_id_map[pho] = current_id
552
+ current_id += 1
553
+ _LOGGER.debug(f"New phoneme: {pho}")
554
+
555
+ if new_phonemes:
556
+ _LOGGER.info("Final phoneme map contains %d symbols.", len(final_phoneme_id_map))
513
557
 
514
558
  # --- Write the final config.json ---
515
559
  _LOGGER.info("Writing dataset config...")
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  from pathlib import Path
4
-
4
+ import os
5
5
  import torch
6
6
  import click
7
7
  from pytorch_lightning import Trainer
@@ -42,7 +42,7 @@ def load_state_dict(model, saved_state_dict):
42
42
  # Model-specific arguments
43
43
  @click.option('--learning-rate', type=float, default=2e-4, help='Learning rate for optimizer (default: 2e-4)')
44
44
  @click.option('--batch-size', type=int, default=16, help='Training batch size (default: 16)')
45
- @click.option('--num-workers', type=click.IntRange(min=1), default=1, help='Number of data loader workers (default: 1)')
45
+ @click.option('--num-workers', type=click.IntRange(min=1), default=os.cpu_count() or 1, help='Number of data loader workers (default: CPU count)')
46
46
  @click.option('--validation-split', type=float, default=0.05, help='Proportion of data used for validation (default: 0.05)')
47
47
  def main(
48
48
  dataset_dir,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes