phoonnx 0.2.6a2__py3-none-any.whl → 0.2.7a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
phoonnx/version.py CHANGED
@@ -1,8 +1,8 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
3
  VERSION_MINOR = 2
4
- VERSION_BUILD = 6
5
- VERSION_ALPHA = 2
4
+ VERSION_BUILD = 7
5
+ VERSION_ALPHA = 1
6
6
  # END_VERSION_BLOCK
7
7
 
8
8
  VERSION_STR = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_BUILD}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phoonnx
3
- Version: 0.2.6a2
3
+ Version: 0.2.7a1
4
4
  Home-page: https://github.com/TigreGotico/phoonnx
5
5
  Author: JarbasAi
6
6
  Author-email: jarbasai@mailfence.com
@@ -2,7 +2,7 @@ phoonnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  phoonnx/config.py,sha256=DKgsU03g8jrAuMcVqbu-w3MWPXOUihFtRnavg6WGQ1Y,19983
3
3
  phoonnx/phoneme_ids.py,sha256=FiNgZwV6naEsBh6XwFLh3_FyOgPiCsK9qo7S0v-CmI4,13667
4
4
  phoonnx/util.py,sha256=XSjFEoqSFcujFTHxednacgC9GrSYyF-Il5L6Utmxmu4,25909
5
- phoonnx/version.py,sha256=XtQ8LLEKPJw1x3fELi5iAV6qNgCNVf_oTmwVBTXYBqA,237
5
+ phoonnx/version.py,sha256=_KNtUP6KVTuU3xs9ekGaRTElqbX7r7YOhKIJPjmLQtc,237
6
6
  phoonnx/voice.py,sha256=Fh60o87-_sIw4zTP8NEGvEV7XzTC0FhW9Jkq7Fr60lQ,19382
7
7
  phoonnx/locale/ca/phonetic_spellings.txt,sha256=igv3t7jxLSRE5GHsdn57HOpxiWNcEmECPql6m02wbO0,47
8
8
  phoonnx/locale/en/phonetic_spellings.txt,sha256=xGQlWOABLzbttpQvopl9CU-NnwEJRqKx8iuylsdUoQA,27
@@ -63,7 +63,7 @@ phoonnx/thirdparty/tashkeel/input_id_map.json,sha256=cnpJqjx-k53AbzKyfC4GxMS771l
63
63
  phoonnx/thirdparty/tashkeel/model.onnx,sha256=UsQNQsoJT_n_B6CR0KHq_XuqXPI4jmCpzIm6zY5elV8,4788213
64
64
  phoonnx/thirdparty/tashkeel/target_id_map.json,sha256=baNAJL_UwP9U91mLt01aAEBRRNdGr-csFB_O6roh7TA,181
65
65
  phoonnx_train/export_onnx.py,sha256=UFn1id5-p9tIXxK9FvHOry59WkotNqXtupinkeKcSDs,12881
66
- phoonnx_train/preprocess.py,sha256=VYyU8XaCroa6tnLhQkQJ8wq1w1Gf9PPnIIM1N0PIcNE,22042
66
+ phoonnx_train/preprocess.py,sha256=dAuHEF4-yEVhVpsVTRNat_S6_jljlwa4VpM5SZezyWc,23146
67
67
  phoonnx_train/train.py,sha256=nsINvDQ3dYvBne5UWPgLZ0a4qZFdSsOKk8HzZHGTLY4,8757
68
68
  phoonnx_train/norm_audio/__init__.py,sha256=Al_YwqMnENXRWp0c79cDZqbdd7pFYARXKxCfBaedr1c,3030
69
69
  phoonnx_train/norm_audio/trim.py,sha256=_ZsE3SYhahQSdEdBLeSwyFJGcvEbt-5E_lnWwTT4tcY,1698
@@ -83,7 +83,7 @@ phoonnx_train/vits/utils.py,sha256=exiyrtPHbnnGvcHWSbaH9-gR6srH5ZPHlKiqV2IHUrQ,4
83
83
  phoonnx_train/vits/wavfile.py,sha256=oQZiTIrdw0oLTbcVwKfGXye1WtKte6qK_52qVwiMvfc,26396
84
84
  phoonnx_train/vits/monotonic_align/__init__.py,sha256=5IdAOD1Z7UloMb6d_9NRFsXoNIjEQ3h9mvOSh_AtO3k,636
85
85
  phoonnx_train/vits/monotonic_align/setup.py,sha256=0K5iJJ2mKIklx6ncEfCQS34skm5hHPiz9vRlQEvevvY,266
86
- phoonnx-0.2.6a2.dist-info/METADATA,sha256=E3QnJVGElqhpgVBWDPL5ZT1PGH2fxB4lQaeXZYO99Kg,8250
87
- phoonnx-0.2.6a2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
88
- phoonnx-0.2.6a2.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
89
- phoonnx-0.2.6a2.dist-info/RECORD,,
86
+ phoonnx-0.2.7a1.dist-info/METADATA,sha256=h_Ob321ENQZXjPpllqP5W3IO6ifQVgTtz8vK4AoKosE,8250
87
+ phoonnx-0.2.7a1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
88
+ phoonnx-0.2.7a1.dist-info/top_level.txt,sha256=ZrnHXe-4HqbOSX6fbdY-JiP7YEu2Bok9T0ji351MrmM,22
89
+ phoonnx-0.2.7a1.dist-info/RECORD,,
@@ -377,6 +377,16 @@ def phonemize_worker(
377
377
  is_flag=True,
378
378
  help="Add diacritics to text (phonemizer specific, e.g., to denote stress).",
379
379
  )
380
+ @click.option(
381
+ "--jsonl-audio-path",
382
+ default=None,
383
+ help="override audio_path base directory (everything before '/wav') in generated dataset.jsonl"
384
+ )
385
+ @click.option(
386
+ "--jsonl-audio-spec-path",
387
+ default=None,
388
+ help="override audio_norm_path/audio_spec_path base directory (everything before '/cache') in generated dataset.jsonl"
389
+ )
380
390
  def cli(
381
391
  input_dir: Path,
382
392
  output_dir: Path,
@@ -397,6 +407,8 @@ def cli(
397
407
  skip_audio: bool,
398
408
  debug: bool,
399
409
  add_diacritics: bool,
410
+ jsonl_audio_path: Optional[str],
411
+ jsonl_audio_spec_path: Optional[str],
400
412
  ) -> None:
401
413
  """
402
414
  Preprocess a TTS dataset (e.g., LJSpeech format) for training a VITS-style model.
@@ -609,6 +621,17 @@ def cli(
609
621
  _LOGGER.warning("Skipping utterance with invalid phoneme_ids before writing: %s", utt.audio_path)
610
622
  continue
611
623
 
624
+ # apply path overrides if needed
625
+ # this allows pre-processing the dataset in one system and then train in other
626
+ if jsonl_audio_path:
627
+ base_path, fname = str(utt.audio_path).split("/wav/")
628
+ utt.audio_path = Path(f"{jsonl_audio_path}/wav/{fname}")
629
+ if jsonl_audio_spec_path:
630
+ base_path, fname = str(utt.audio_norm_path).split("/cache/")
631
+ utt.audio_norm_path = Path(f"{jsonl_audio_spec_path}/cache/{fname}")
632
+ base_path, fname = str(utt.audio_spec_path).split("/cache/")
633
+ utt.audio_spec_path = Path(f"{jsonl_audio_spec_path}/cache/{fname}")
634
+
612
635
  json.dump(
613
636
  utt.asdict(),
614
637
  dataset_file,