lt-tensor 0.0.1a40__py3-none-any.whl → 0.0.1a41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/__init__.py +1 -1
- lt_tensor/losses.py +6 -18
- lt_tensor/model_zoo/audio_models/__init__.py +2 -2
- lt_tensor/processors/audio.py +1 -63
- {lt_tensor-0.0.1a40.dist-info → lt_tensor-0.0.1a41.dist-info}/METADATA +1 -1
- {lt_tensor-0.0.1a40.dist-info → lt_tensor-0.0.1a41.dist-info}/RECORD +9 -9
- {lt_tensor-0.0.1a40.dist-info → lt_tensor-0.0.1a41.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a40.dist-info → lt_tensor-0.0.1a41.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a40.dist-info → lt_tensor-0.0.1a41.dist-info}/top_level.txt +0 -0
lt_tensor/__init__.py
CHANGED
lt_tensor/losses.py
CHANGED
@@ -140,7 +140,6 @@ class MultiMelScaleLoss(Model):
|
|
140
140
|
onesided: Optional[bool] = None,
|
141
141
|
std: int = 4,
|
142
142
|
mean: int = -4,
|
143
|
-
use_istft_norm: bool = True,
|
144
143
|
use_pitch_loss: bool = True,
|
145
144
|
use_rms_loss: bool = True,
|
146
145
|
norm_pitch_fn: Callable[[Tensor], Tensor] = normalize_minmax,
|
@@ -165,7 +164,6 @@ class MultiMelScaleLoss(Model):
|
|
165
164
|
self.loss_rms_fn = loss_rms_fn
|
166
165
|
self.lambda_mel = lambda_mel
|
167
166
|
self.weight = weight
|
168
|
-
self.use_istft_norm = use_istft_norm
|
169
167
|
self.use_pitch_loss = use_pitch_loss
|
170
168
|
self.use_rms_loss = use_rms_loss
|
171
169
|
self.lambda_pitch = lambda_pitch
|
@@ -251,36 +249,26 @@ class MultiMelScaleLoss(Model):
|
|
251
249
|
def forward(
|
252
250
|
self, input_wave: torch.Tensor, target_wave: torch.Tensor
|
253
251
|
) -> torch.Tensor:
|
254
|
-
assert self.use_istft_norm or input_wave.shape[-1] == target_wave.shape[-1], (
|
255
|
-
f"Size mismatch! input_wave {input_wave.shape[-1]} must match target_wave: {target_wave.shape[-1]}. "
|
256
|
-
"Alternatively 'use_istft_norm' can be set to Trie with will automatically force the audio to that size."
|
257
|
-
)
|
258
252
|
target_wave = target_wave.to(input_wave.device)
|
259
253
|
losses = 0.0
|
260
254
|
for M in self.mel_spectrograms:
|
261
|
-
|
262
|
-
|
263
|
-
target_proc = M.istft_norm(target_wave, length=target_wave.shape[-1])
|
264
|
-
else:
|
265
|
-
input_proc, target_proc = input_wave, target_wave
|
266
|
-
|
267
|
-
x_mels = M(input_proc)
|
268
|
-
y_mels = M(target_proc)
|
255
|
+
x_mels = M.compute_mel(input_wave)
|
256
|
+
y_mels = M.compute_mel(target_wave)
|
269
257
|
|
270
258
|
loss = self.loss_mel_fn(x_mels.squeeze(), y_mels.squeeze())
|
271
259
|
losses += loss * self.lambda_mel
|
272
260
|
|
273
261
|
# pitch/f0 loss
|
274
262
|
if self.use_pitch_loss:
|
275
|
-
x_pitch = self.norm_pitch_fn(M.compute_pitch(
|
276
|
-
y_pitch = self.norm_pitch_fn(M.compute_pitch(
|
263
|
+
x_pitch = self.norm_pitch_fn(M.compute_pitch(input_wave))
|
264
|
+
y_pitch = self.norm_pitch_fn(M.compute_pitch(target_wave))
|
277
265
|
f0_loss = self.loss_pitch_fn(x_pitch, y_pitch)
|
278
266
|
losses += f0_loss * self.lambda_pitch
|
279
267
|
|
280
268
|
# energy/rms loss
|
281
269
|
if self.use_rms_loss:
|
282
|
-
x_rms = self.norm_rms(M.compute_rms(
|
283
|
-
y_rms = self.norm_rms(M.compute_rms(
|
270
|
+
x_rms = self.norm_rms(M.compute_rms(input_wave, x_mels))
|
271
|
+
y_rms = self.norm_rms(M.compute_rms(target_wave, y_mels))
|
284
272
|
rms_loss = self.loss_rms_fn(x_rms, y_rms)
|
285
273
|
losses += rms_loss * self.lambda_rms
|
286
274
|
|
@@ -1,3 +1,3 @@
|
|
1
|
-
from . import diffwave, istft, hifigan, bigvgan
|
1
|
+
from . import diffwave, istft, hifigan, bigvgan, bemaganv2
|
2
2
|
|
3
|
-
__all__ = ["diffwave", "istft", "hifigan", "bigvgan"]
|
3
|
+
__all__ = ["diffwave", "istft", "hifigan", "bigvgan", "bemaganv2"]
|
lt_tensor/processors/audio.py
CHANGED
@@ -698,66 +698,4 @@ class AudioProcessor(Model):
|
|
698
698
|
rms_batch_dim: bool = False,
|
699
699
|
spec_phase_batch_dim: bool = False,
|
700
700
|
):
|
701
|
-
|
702
|
-
"wave": None,
|
703
|
-
"mel": None,
|
704
|
-
"pitch": None,
|
705
|
-
"rms": None,
|
706
|
-
"spec": None,
|
707
|
-
"phase": None,
|
708
|
-
}
|
709
|
-
results["wave"] = (
|
710
|
-
x.squeeze()
|
711
|
-
if isinstance(x, Tensor)
|
712
|
-
else self.load_audio(x, istft_norm=True).squeeze()
|
713
|
-
)
|
714
|
-
results["mel"] = self.compute_mel_librosa(
|
715
|
-
wave=(
|
716
|
-
results["wave"]
|
717
|
-
if results["wave"].ndim == 3
|
718
|
-
else results["wave"].unsqueeze(0)
|
719
|
-
),
|
720
|
-
spectral_norm=spectral_norm,
|
721
|
-
).squeeze()
|
722
|
-
try:
|
723
|
-
results["pitch"] = self.compute_pitch(results["wave"]).squeeze()
|
724
|
-
except Exception as e:
|
725
|
-
results["pitch"] = e
|
726
|
-
try:
|
727
|
-
results["rms"] = self.compute_rms(results["wave"], results["mel"]).squeeze()
|
728
|
-
except Exception as e:
|
729
|
-
results["rms"] = e
|
730
|
-
try:
|
731
|
-
sp_ph = self.stft(results["wave"], return_complex=False)
|
732
|
-
spec, phase = sp_ph.split(1, -1)
|
733
|
-
results["spec"] = spec.squeeze()
|
734
|
-
results["phase"] = phase.squeeze()
|
735
|
-
except Exception as e:
|
736
|
-
results["spec"] = e
|
737
|
-
results["phase"] = e
|
738
|
-
|
739
|
-
if (add_batch_to_all or wave_batch_dim) and results["wave"].ndim == 1:
|
740
|
-
results["wave"] = results["wave"].unsqueeze(0)
|
741
|
-
if (add_batch_to_all or mel_batch_dim) and results["mel"].ndim == 2:
|
742
|
-
results["mel"] = results["mel"].unsqueeze(0)
|
743
|
-
if (
|
744
|
-
isinstance(results["rms"], Tensor)
|
745
|
-
and (add_batch_to_all or rms_batch_dim)
|
746
|
-
and results["rms"].ndim == 1
|
747
|
-
):
|
748
|
-
results["rms"] = results["rms"].unsqueeze(0)
|
749
|
-
if (
|
750
|
-
isinstance(results["pitch"], Tensor)
|
751
|
-
and (add_batch_to_all or pitch_batch_dim)
|
752
|
-
and results["pitch"].ndim == 1
|
753
|
-
):
|
754
|
-
results["pitch"] = results["pitch"].unsqueeze(0)
|
755
|
-
if (
|
756
|
-
isinstance(results["spec"], Tensor)
|
757
|
-
and (add_batch_to_all or spec_phase_batch_dim)
|
758
|
-
and results["spec"].ndim == 2
|
759
|
-
):
|
760
|
-
results["spec"] = results["spec"].unsqueeze(0)
|
761
|
-
results["phase"] = results["phase"].unsqueeze(0)
|
762
|
-
|
763
|
-
return results
|
701
|
+
pass
|
@@ -1,6 +1,6 @@
|
|
1
|
-
lt_tensor/__init__.py,sha256=
|
1
|
+
lt_tensor/__init__.py,sha256=amg5g81XgBpy_yEwvZpFNx40cR1nWGSv4QfUuHsajKQ,441
|
2
2
|
lt_tensor/config_templates.py,sha256=F9UvL8paAjkSvio890kp8WznpYeI50pYnm9iqQroBxk,2797
|
3
|
-
lt_tensor/losses.py,sha256=
|
3
|
+
lt_tensor/losses.py,sha256=1Fb7urFxyrxXQBJg_COvCfg5s8UfhXb-MlWimSQvotI,8555
|
4
4
|
lt_tensor/lr_schedulers.py,sha256=6_vcfaPHrozfH3wvmNEdKSFYl6iTIijYoHL8vuG-45U,7651
|
5
5
|
lt_tensor/math_ops.py,sha256=ahX6Z1Mt3X-FhmwSZYZea5mB1B0S8GDuvKPfAm5e_FQ,2646
|
6
6
|
lt_tensor/misc_utils.py,sha256=stL6q3M7S2N4FBICFYbgYpdPDrJRlwmr24-iCXMRifM,28933
|
@@ -23,7 +23,7 @@ lt_tensor/model_zoo/activations/alias_free/act.py,sha256=1wxmab2kMD88L6wsQgf3t25
|
|
23
23
|
lt_tensor/model_zoo/activations/alias_free/filter.py,sha256=5TvXESv31toD5sePBe_OUJJfMXv6Ohwmx2YawjQL-pk,6004
|
24
24
|
lt_tensor/model_zoo/activations/alias_free/resample.py,sha256=3iM4fNr9fLNXXMyXvzW-MwkSjOZOrMZLfS80UHs6zk0,3386
|
25
25
|
lt_tensor/model_zoo/activations/snake/__init__.py,sha256=AtOAbJuMinxmKkppITGMzRbcbPQaALnl9mCtl1c3x0Q,4356
|
26
|
-
lt_tensor/model_zoo/audio_models/__init__.py,sha256=
|
26
|
+
lt_tensor/model_zoo/audio_models/__init__.py,sha256=naBBinbzrykogFd9O6hbP80bMjfX2W6_RG3c7KfkCn0,127
|
27
27
|
lt_tensor/model_zoo/audio_models/resblocks.py,sha256=u-foHxaFDUICjxSkpyHXljQYQG9zMxVYaOGqLR_nJ-k,7978
|
28
28
|
lt_tensor/model_zoo/audio_models/bemaganv2/__init__.py,sha256=dR849yDsnj7qwHOpmuRFVxPA7o55MNQalm7c2cPDDBw,7374
|
29
29
|
lt_tensor/model_zoo/audio_models/bigvgan/__init__.py,sha256=mkjtErhENXCN8XeQe_eZ9iYJMH5p1wy1m6G2wvgp2j0,8099
|
@@ -37,9 +37,9 @@ lt_tensor/model_zoo/losses/CQT/transforms.py,sha256=Vkid0J9dqLnlINfyyUlQf-qB3gOQ
|
|
37
37
|
lt_tensor/model_zoo/losses/CQT/utils.py,sha256=twGw6FVD7V5Ksfx_1BUEN3EP1tAS6wo-9LL3VnuHB8c,16751
|
38
38
|
lt_tensor/model_zoo/losses/_envelope_disc/__init__.py,sha256=EIPat8Q1sjxYBKxL3qdLENYtPkVs0RIuIblx2KrtkB0,4503
|
39
39
|
lt_tensor/processors/__init__.py,sha256=Pvxhh0KR65zLCgUd53_k5Z0y5JWWcO0ZBXFK9rv0o5w,109
|
40
|
-
lt_tensor/processors/audio.py,sha256=
|
41
|
-
lt_tensor-0.0.
|
42
|
-
lt_tensor-0.0.
|
43
|
-
lt_tensor-0.0.
|
44
|
-
lt_tensor-0.0.
|
45
|
-
lt_tensor-0.0.
|
40
|
+
lt_tensor/processors/audio.py,sha256=zjnGLDHcViId-mYcDce5IwnqDlJsN9-6fHUsx1VgX4Q,23480
|
41
|
+
lt_tensor-0.0.1a41.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
|
42
|
+
lt_tensor-0.0.1a41.dist-info/METADATA,sha256=s9cZCc_4E3_m1VxI3jX7MWXiiHBpYtkauJTcXwRn-1c,1071
|
43
|
+
lt_tensor-0.0.1a41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
44
|
+
lt_tensor-0.0.1a41.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
45
|
+
lt_tensor-0.0.1a41.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|