lt-tensor 0.0.1a40__py3-none-any.whl → 0.0.1a41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lt_tensor/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.0.1a40"
1
+ __version__ = "0.0.1a41"
2
2
 
3
3
  from . import (
4
4
  lr_schedulers,
lt_tensor/losses.py CHANGED
@@ -140,7 +140,6 @@ class MultiMelScaleLoss(Model):
140
140
  onesided: Optional[bool] = None,
141
141
  std: int = 4,
142
142
  mean: int = -4,
143
- use_istft_norm: bool = True,
144
143
  use_pitch_loss: bool = True,
145
144
  use_rms_loss: bool = True,
146
145
  norm_pitch_fn: Callable[[Tensor], Tensor] = normalize_minmax,
@@ -165,7 +164,6 @@ class MultiMelScaleLoss(Model):
165
164
  self.loss_rms_fn = loss_rms_fn
166
165
  self.lambda_mel = lambda_mel
167
166
  self.weight = weight
168
- self.use_istft_norm = use_istft_norm
169
167
  self.use_pitch_loss = use_pitch_loss
170
168
  self.use_rms_loss = use_rms_loss
171
169
  self.lambda_pitch = lambda_pitch
@@ -251,36 +249,26 @@ class MultiMelScaleLoss(Model):
251
249
  def forward(
252
250
  self, input_wave: torch.Tensor, target_wave: torch.Tensor
253
251
  ) -> torch.Tensor:
254
- assert self.use_istft_norm or input_wave.shape[-1] == target_wave.shape[-1], (
255
- f"Size mismatch! input_wave {input_wave.shape[-1]} must match target_wave: {target_wave.shape[-1]}. "
256
- "Alternatively 'use_istft_norm' can be set to Trie with will automatically force the audio to that size."
257
- )
258
252
  target_wave = target_wave.to(input_wave.device)
259
253
  losses = 0.0
260
254
  for M in self.mel_spectrograms:
261
- if self.use_istft_norm and input_proc.shape[-1] != target_proc.shape[-1]:
262
- input_proc = M.istft_norm(input_wave, length=target_wave.shape[-1])
263
- target_proc = M.istft_norm(target_wave, length=target_wave.shape[-1])
264
- else:
265
- input_proc, target_proc = input_wave, target_wave
266
-
267
- x_mels = M(input_proc)
268
- y_mels = M(target_proc)
255
+ x_mels = M.compute_mel(input_wave)
256
+ y_mels = M.compute_mel(target_wave)
269
257
 
270
258
  loss = self.loss_mel_fn(x_mels.squeeze(), y_mels.squeeze())
271
259
  losses += loss * self.lambda_mel
272
260
 
273
261
  # pitch/f0 loss
274
262
  if self.use_pitch_loss:
275
- x_pitch = self.norm_pitch_fn(M.compute_pitch(input_proc))
276
- y_pitch = self.norm_pitch_fn(M.compute_pitch(target_proc))
263
+ x_pitch = self.norm_pitch_fn(M.compute_pitch(input_wave))
264
+ y_pitch = self.norm_pitch_fn(M.compute_pitch(target_wave))
277
265
  f0_loss = self.loss_pitch_fn(x_pitch, y_pitch)
278
266
  losses += f0_loss * self.lambda_pitch
279
267
 
280
268
  # energy/rms loss
281
269
  if self.use_rms_loss:
282
- x_rms = self.norm_rms(M.compute_rms(input_proc, x_mels))
283
- y_rms = self.norm_rms(M.compute_rms(target_proc, y_mels))
270
+ x_rms = self.norm_rms(M.compute_rms(input_wave, x_mels))
271
+ y_rms = self.norm_rms(M.compute_rms(target_wave, y_mels))
284
272
  rms_loss = self.loss_rms_fn(x_rms, y_rms)
285
273
  losses += rms_loss * self.lambda_rms
286
274
 
@@ -1,3 +1,3 @@
1
- from . import diffwave, istft, hifigan, bigvgan
1
+ from . import diffwave, istft, hifigan, bigvgan, bemaganv2
2
2
 
3
- __all__ = ["diffwave", "istft", "hifigan", "bigvgan"]
3
+ __all__ = ["diffwave", "istft", "hifigan", "bigvgan", "bemaganv2"]
@@ -698,66 +698,4 @@ class AudioProcessor(Model):
698
698
  rms_batch_dim: bool = False,
699
699
  spec_phase_batch_dim: bool = False,
700
700
  ):
701
- results = {
702
- "wave": None,
703
- "mel": None,
704
- "pitch": None,
705
- "rms": None,
706
- "spec": None,
707
- "phase": None,
708
- }
709
- results["wave"] = (
710
- x.squeeze()
711
- if isinstance(x, Tensor)
712
- else self.load_audio(x, istft_norm=True).squeeze()
713
- )
714
- results["mel"] = self.compute_mel_librosa(
715
- wave=(
716
- results["wave"]
717
- if results["wave"].ndim == 3
718
- else results["wave"].unsqueeze(0)
719
- ),
720
- spectral_norm=spectral_norm,
721
- ).squeeze()
722
- try:
723
- results["pitch"] = self.compute_pitch(results["wave"]).squeeze()
724
- except Exception as e:
725
- results["pitch"] = e
726
- try:
727
- results["rms"] = self.compute_rms(results["wave"], results["mel"]).squeeze()
728
- except Exception as e:
729
- results["rms"] = e
730
- try:
731
- sp_ph = self.stft(results["wave"], return_complex=False)
732
- spec, phase = sp_ph.split(1, -1)
733
- results["spec"] = spec.squeeze()
734
- results["phase"] = phase.squeeze()
735
- except Exception as e:
736
- results["spec"] = e
737
- results["phase"] = e
738
-
739
- if (add_batch_to_all or wave_batch_dim) and results["wave"].ndim == 1:
740
- results["wave"] = results["wave"].unsqueeze(0)
741
- if (add_batch_to_all or mel_batch_dim) and results["mel"].ndim == 2:
742
- results["mel"] = results["mel"].unsqueeze(0)
743
- if (
744
- isinstance(results["rms"], Tensor)
745
- and (add_batch_to_all or rms_batch_dim)
746
- and results["rms"].ndim == 1
747
- ):
748
- results["rms"] = results["rms"].unsqueeze(0)
749
- if (
750
- isinstance(results["pitch"], Tensor)
751
- and (add_batch_to_all or pitch_batch_dim)
752
- and results["pitch"].ndim == 1
753
- ):
754
- results["pitch"] = results["pitch"].unsqueeze(0)
755
- if (
756
- isinstance(results["spec"], Tensor)
757
- and (add_batch_to_all or spec_phase_batch_dim)
758
- and results["spec"].ndim == 2
759
- ):
760
- results["spec"] = results["spec"].unsqueeze(0)
761
- results["phase"] = results["phase"].unsqueeze(0)
762
-
763
- return results
701
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lt-tensor
3
- Version: 0.0.1a40
3
+ Version: 0.0.1a41
4
4
  Summary: General utilities for PyTorch and others. Built for general use.
5
5
  Home-page: https://github.com/gr1336/lt-tensor/
6
6
  Author: gr1336
@@ -1,6 +1,6 @@
1
- lt_tensor/__init__.py,sha256=rw5hRFaXgcEkqKGJfvXoWIxoKGgaCn9D8VvyncRStZ4,441
1
+ lt_tensor/__init__.py,sha256=amg5g81XgBpy_yEwvZpFNx40cR1nWGSv4QfUuHsajKQ,441
2
2
  lt_tensor/config_templates.py,sha256=F9UvL8paAjkSvio890kp8WznpYeI50pYnm9iqQroBxk,2797
3
- lt_tensor/losses.py,sha256=e-YyKMmI0FwWQ3VLfJLDGSH4_rNpnYj0-htuk4eYboE,9283
3
+ lt_tensor/losses.py,sha256=1Fb7urFxyrxXQBJg_COvCfg5s8UfhXb-MlWimSQvotI,8555
4
4
  lt_tensor/lr_schedulers.py,sha256=6_vcfaPHrozfH3wvmNEdKSFYl6iTIijYoHL8vuG-45U,7651
5
5
  lt_tensor/math_ops.py,sha256=ahX6Z1Mt3X-FhmwSZYZea5mB1B0S8GDuvKPfAm5e_FQ,2646
6
6
  lt_tensor/misc_utils.py,sha256=stL6q3M7S2N4FBICFYbgYpdPDrJRlwmr24-iCXMRifM,28933
@@ -23,7 +23,7 @@ lt_tensor/model_zoo/activations/alias_free/act.py,sha256=1wxmab2kMD88L6wsQgf3t25
23
23
  lt_tensor/model_zoo/activations/alias_free/filter.py,sha256=5TvXESv31toD5sePBe_OUJJfMXv6Ohwmx2YawjQL-pk,6004
24
24
  lt_tensor/model_zoo/activations/alias_free/resample.py,sha256=3iM4fNr9fLNXXMyXvzW-MwkSjOZOrMZLfS80UHs6zk0,3386
25
25
  lt_tensor/model_zoo/activations/snake/__init__.py,sha256=AtOAbJuMinxmKkppITGMzRbcbPQaALnl9mCtl1c3x0Q,4356
26
- lt_tensor/model_zoo/audio_models/__init__.py,sha256=WwiP9MekJreMOfKPWLl24VkRJIpLk6hhL8ch0aKgOss,103
26
+ lt_tensor/model_zoo/audio_models/__init__.py,sha256=naBBinbzrykogFd9O6hbP80bMjfX2W6_RG3c7KfkCn0,127
27
27
  lt_tensor/model_zoo/audio_models/resblocks.py,sha256=u-foHxaFDUICjxSkpyHXljQYQG9zMxVYaOGqLR_nJ-k,7978
28
28
  lt_tensor/model_zoo/audio_models/bemaganv2/__init__.py,sha256=dR849yDsnj7qwHOpmuRFVxPA7o55MNQalm7c2cPDDBw,7374
29
29
  lt_tensor/model_zoo/audio_models/bigvgan/__init__.py,sha256=mkjtErhENXCN8XeQe_eZ9iYJMH5p1wy1m6G2wvgp2j0,8099
@@ -37,9 +37,9 @@ lt_tensor/model_zoo/losses/CQT/transforms.py,sha256=Vkid0J9dqLnlINfyyUlQf-qB3gOQ
37
37
  lt_tensor/model_zoo/losses/CQT/utils.py,sha256=twGw6FVD7V5Ksfx_1BUEN3EP1tAS6wo-9LL3VnuHB8c,16751
38
38
  lt_tensor/model_zoo/losses/_envelope_disc/__init__.py,sha256=EIPat8Q1sjxYBKxL3qdLENYtPkVs0RIuIblx2KrtkB0,4503
39
39
  lt_tensor/processors/__init__.py,sha256=Pvxhh0KR65zLCgUd53_k5Z0y5JWWcO0ZBXFK9rv0o5w,109
40
- lt_tensor/processors/audio.py,sha256=_chhfH1hN7_Zx_yCqzhExrOlh21vmsHUThWAnVV5Pt8,25763
41
- lt_tensor-0.0.1a40.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
42
- lt_tensor-0.0.1a40.dist-info/METADATA,sha256=S0WvGGrixOwsATo-hNcL0eJ0-dEHgNKAdR5kZ-DQr7w,1071
43
- lt_tensor-0.0.1a40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- lt_tensor-0.0.1a40.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
45
- lt_tensor-0.0.1a40.dist-info/RECORD,,
40
+ lt_tensor/processors/audio.py,sha256=zjnGLDHcViId-mYcDce5IwnqDlJsN9-6fHUsx1VgX4Q,23480
41
+ lt_tensor-0.0.1a41.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
42
+ lt_tensor-0.0.1a41.dist-info/METADATA,sha256=s9cZCc_4E3_m1VxI3jX7MWXiiHBpYtkauJTcXwRn-1c,1071
43
+ lt_tensor-0.0.1a41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ lt_tensor-0.0.1a41.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
45
+ lt_tensor-0.0.1a41.dist-info/RECORD,,