lt-tensor 0.0.1a35__py3-none-any.whl → 0.0.1a36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/__init__.py +1 -1
- lt_tensor/model_zoo/audio_models/bigvgan/__init__.py +10 -10
- lt_tensor/model_zoo/audio_models/hifigan/__init__.py +6 -10
- lt_tensor/model_zoo/losses/CQT/__init__.py +0 -0
- lt_tensor/model_zoo/losses/CQT/transforms.py +336 -0
- lt_tensor/model_zoo/losses/CQT/utils.py +519 -0
- lt_tensor/model_zoo/losses/discriminators.py +232 -0
- lt_tensor/processors/audio.py +67 -57
- {lt_tensor-0.0.1a35.dist-info → lt_tensor-0.0.1a36.dist-info}/METADATA +1 -1
- {lt_tensor-0.0.1a35.dist-info → lt_tensor-0.0.1a36.dist-info}/RECORD +13 -10
- {lt_tensor-0.0.1a35.dist-info → lt_tensor-0.0.1a36.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a35.dist-info → lt_tensor-0.0.1a36.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a35.dist-info → lt_tensor-0.0.1a36.dist-info}/top_level.txt +0 -0
@@ -726,3 +726,235 @@ class MultiResolutionDiscriminator(_MultiDiscriminatorT):
|
|
726
726
|
y_d_gs.append(y_d_g)
|
727
727
|
fmap_gs.append(fmap_g)
|
728
728
|
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
729
|
+
|
730
|
+
|
731
|
+
class DiscriminatorCQT(ConvNets):
|
732
|
+
"""Adapted from https://github.com/open-mmlab/Amphion/blob/main/models/vocoders/gan/discriminator/mssbcqtd.py under the MIT license."""
|
733
|
+
|
734
|
+
def __init__(
|
735
|
+
self,
|
736
|
+
hop_length: int,
|
737
|
+
n_octaves: int,
|
738
|
+
bins_per_octave: int,
|
739
|
+
sampling_rate: int,
|
740
|
+
cqtd_filters: int = 128,
|
741
|
+
cqtd_max_filters: int = 1024,
|
742
|
+
cqtd_filters_scale: int = 1,
|
743
|
+
cqtd_dilations: list = [1, 2, 4],
|
744
|
+
cqtd_in_channels: int = 1,
|
745
|
+
cqtd_out_channels: int = 1,
|
746
|
+
cqtd_normalize_volume: bool = False,
|
747
|
+
):
|
748
|
+
super().__init__()
|
749
|
+
self.filters = cqtd_filters
|
750
|
+
self.max_filters = cqtd_max_filters
|
751
|
+
self.filters_scale = cqtd_filters_scale
|
752
|
+
self.kernel_size = (3, 9)
|
753
|
+
self.dilations = cqtd_dilations
|
754
|
+
self.stride = (1, 2)
|
755
|
+
|
756
|
+
self.fs = sampling_rate
|
757
|
+
self.in_channels = cqtd_in_channels
|
758
|
+
self.out_channels = cqtd_out_channels
|
759
|
+
self.hop_length = hop_length
|
760
|
+
self.n_octaves = n_octaves
|
761
|
+
self.bins_per_octave = bins_per_octave
|
762
|
+
|
763
|
+
# Lazy-load
|
764
|
+
from lt_tensor.model_zoo.losses.CQT.transforms import CQT2010v2
|
765
|
+
|
766
|
+
self.cqt_transform = CQT2010v2(
|
767
|
+
sr=self.fs * 2,
|
768
|
+
hop_length=self.hop_length,
|
769
|
+
n_bins=self.bins_per_octave * self.n_octaves,
|
770
|
+
bins_per_octave=self.bins_per_octave,
|
771
|
+
output_format="Complex",
|
772
|
+
pad_mode="constant",
|
773
|
+
)
|
774
|
+
|
775
|
+
self.conv_pres = nn.ModuleList()
|
776
|
+
for _ in range(self.n_octaves):
|
777
|
+
self.conv_pres.append(
|
778
|
+
nn.Conv2d(
|
779
|
+
self.in_channels * 2,
|
780
|
+
self.in_channels * 2,
|
781
|
+
kernel_size=self.kernel_size,
|
782
|
+
padding=self.get_2d_padding(self.kernel_size),
|
783
|
+
)
|
784
|
+
)
|
785
|
+
|
786
|
+
self.convs = nn.ModuleList()
|
787
|
+
|
788
|
+
self.convs.append(
|
789
|
+
nn.Conv2d(
|
790
|
+
self.in_channels * 2,
|
791
|
+
self.filters,
|
792
|
+
kernel_size=self.kernel_size,
|
793
|
+
padding=self.get_2d_padding(self.kernel_size),
|
794
|
+
)
|
795
|
+
)
|
796
|
+
|
797
|
+
in_chs = min(self.filters_scale * self.filters, self.max_filters)
|
798
|
+
for i, dilation in enumerate(self.dilations):
|
799
|
+
out_chs = min(
|
800
|
+
(self.filters_scale ** (i + 1)) * self.filters, self.max_filters
|
801
|
+
)
|
802
|
+
self.convs.append(
|
803
|
+
weight_norm(
|
804
|
+
nn.Conv2d(
|
805
|
+
in_chs,
|
806
|
+
out_chs,
|
807
|
+
kernel_size=self.kernel_size,
|
808
|
+
stride=self.stride,
|
809
|
+
dilation=(dilation, 1),
|
810
|
+
padding=self.get_2d_padding(self.kernel_size, (dilation, 1)),
|
811
|
+
)
|
812
|
+
)
|
813
|
+
)
|
814
|
+
in_chs = out_chs
|
815
|
+
out_chs = min(
|
816
|
+
(self.filters_scale ** (len(self.dilations) + 1)) * self.filters,
|
817
|
+
self.max_filters,
|
818
|
+
)
|
819
|
+
self.convs.append(
|
820
|
+
weight_norm(
|
821
|
+
nn.Conv2d(
|
822
|
+
in_chs,
|
823
|
+
out_chs,
|
824
|
+
kernel_size=(self.kernel_size[0], self.kernel_size[0]),
|
825
|
+
padding=self.get_2d_padding(
|
826
|
+
(self.kernel_size[0], self.kernel_size[0])
|
827
|
+
),
|
828
|
+
)
|
829
|
+
)
|
830
|
+
)
|
831
|
+
|
832
|
+
self.conv_post = weight_norm(
|
833
|
+
nn.Conv2d(
|
834
|
+
out_chs,
|
835
|
+
self.out_channels,
|
836
|
+
kernel_size=(self.kernel_size[0], self.kernel_size[0]),
|
837
|
+
padding=self.get_2d_padding((self.kernel_size[0], self.kernel_size[0])),
|
838
|
+
)
|
839
|
+
)
|
840
|
+
|
841
|
+
self.activation = torch.nn.LeakyReLU(negative_slope=0.1)
|
842
|
+
self.resample = T.Resample(orig_freq=self.fs, new_freq=self.fs * 2)
|
843
|
+
|
844
|
+
self.cqtd_normalize_volume = cqtd_normalize_volume
|
845
|
+
if self.cqtd_normalize_volume:
|
846
|
+
print(
|
847
|
+
f"[INFO] cqtd_normalize_volume set to True. Will apply DC offset removal & peak volume normalization in CQTD!"
|
848
|
+
)
|
849
|
+
|
850
|
+
def get_2d_padding(
|
851
|
+
self,
|
852
|
+
kernel_size: Tuple[int, int],
|
853
|
+
dilation: Tuple[int, int] = (1, 1),
|
854
|
+
):
|
855
|
+
return (
|
856
|
+
((kernel_size[0] - 1) * dilation[0]) // 2,
|
857
|
+
((kernel_size[1] - 1) * dilation[1]) // 2,
|
858
|
+
)
|
859
|
+
|
860
|
+
def forward(self, x: torch.tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
|
861
|
+
fmap = []
|
862
|
+
|
863
|
+
if self.cqtd_normalize_volume:
|
864
|
+
# Remove DC offset
|
865
|
+
x = x - x.mean(dim=-1, keepdims=True)
|
866
|
+
# Peak normalize the volume of input audio
|
867
|
+
x = 0.8 * x / (x.abs().max(dim=-1, keepdim=True)[0] + 1e-9)
|
868
|
+
|
869
|
+
x = self.resample(x)
|
870
|
+
|
871
|
+
z = self.cqt_transform(x)
|
872
|
+
|
873
|
+
z_amplitude = z[:, :, :, 0].unsqueeze(1)
|
874
|
+
z_phase = z[:, :, :, 1].unsqueeze(1)
|
875
|
+
|
876
|
+
z = torch.cat([z_amplitude, z_phase], dim=1)
|
877
|
+
z = torch.permute(z, (0, 1, 3, 2)) # [B, C, W, T] -> [B, C, T, W]
|
878
|
+
|
879
|
+
latent_z = []
|
880
|
+
for i in range(self.n_octaves):
|
881
|
+
latent_z.append(
|
882
|
+
self.conv_pres[i](
|
883
|
+
z[
|
884
|
+
:,
|
885
|
+
:,
|
886
|
+
:,
|
887
|
+
i * self.bins_per_octave : (i + 1) * self.bins_per_octave,
|
888
|
+
]
|
889
|
+
)
|
890
|
+
)
|
891
|
+
latent_z = torch.cat(latent_z, dim=-1)
|
892
|
+
|
893
|
+
for i, l in enumerate(self.convs):
|
894
|
+
latent_z = l(latent_z)
|
895
|
+
|
896
|
+
latent_z = self.activation(latent_z)
|
897
|
+
fmap.append(latent_z)
|
898
|
+
|
899
|
+
latent_z = self.conv_post(latent_z)
|
900
|
+
|
901
|
+
return latent_z, fmap
|
902
|
+
|
903
|
+
|
904
|
+
class MultiScaleSubbandCQTDiscriminator(_MultiDiscriminatorT):
|
905
|
+
def __init__(
|
906
|
+
self,
|
907
|
+
sampling_rate: int,
|
908
|
+
cqtd_filters: int = 128,
|
909
|
+
cqtd_max_filters: int = 1024,
|
910
|
+
cqtd_filters_scale: Number = 1,
|
911
|
+
cqtd_dilations: list = [1, 2, 4],
|
912
|
+
cqtd_hop_lengths: list = [512, 256, 256],
|
913
|
+
cqtd_n_octaves: list = [9, 9, 9],
|
914
|
+
cqtd_bins_per_octaves: list = [24, 36, 48],
|
915
|
+
cqtd_in_channels: int = 1,
|
916
|
+
cqtd_out_channels: int = 1,
|
917
|
+
cqtd_normalize_volume: bool = False,
|
918
|
+
):
|
919
|
+
super().__init__()
|
920
|
+
|
921
|
+
self.discriminators = nn.ModuleList(
|
922
|
+
[
|
923
|
+
DiscriminatorCQT(
|
924
|
+
hop_length=cqtd_hop_lengths[i],
|
925
|
+
n_octaves=cqtd_n_octaves[i],
|
926
|
+
bins_per_octave=cqtd_bins_per_octaves[i],
|
927
|
+
sampling_rate=sampling_rate,
|
928
|
+
cqtd_filters=cqtd_filters,
|
929
|
+
cqtd_max_filters=cqtd_max_filters,
|
930
|
+
cqtd_filters_scale=cqtd_filters_scale,
|
931
|
+
cqtd_dilations=cqtd_dilations,
|
932
|
+
cqtd_in_channels=cqtd_in_channels,
|
933
|
+
cqtd_out_channels=cqtd_out_channels,
|
934
|
+
cqtd_normalize_volume=cqtd_normalize_volume,
|
935
|
+
)
|
936
|
+
for i in range(len(cqtd_hop_lengths))
|
937
|
+
]
|
938
|
+
)
|
939
|
+
|
940
|
+
def forward(self, y: torch.Tensor, y_hat: torch.Tensor) -> Tuple[
|
941
|
+
List[torch.Tensor],
|
942
|
+
List[torch.Tensor],
|
943
|
+
List[List[torch.Tensor]],
|
944
|
+
List[List[torch.Tensor]],
|
945
|
+
]:
|
946
|
+
|
947
|
+
y_d_rs = []
|
948
|
+
y_d_gs = []
|
949
|
+
fmap_rs = []
|
950
|
+
fmap_gs = []
|
951
|
+
|
952
|
+
for disc in self.discriminators:
|
953
|
+
y_d_r, fmap_r = disc(y)
|
954
|
+
y_d_g, fmap_g = disc(y_hat)
|
955
|
+
y_d_rs.append(y_d_r)
|
956
|
+
fmap_rs.append(fmap_r)
|
957
|
+
y_d_gs.append(y_d_g)
|
958
|
+
fmap_gs.append(fmap_g)
|
959
|
+
|
960
|
+
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
lt_tensor/processors/audio.py
CHANGED
@@ -73,7 +73,7 @@ class AudioProcessorConfig(ModelConfig):
|
|
73
73
|
def post_process(self):
|
74
74
|
self.n_stft = self.n_fft // 2 + 1
|
75
75
|
# some functions needs this to be a non-zero or not None value.
|
76
|
-
self.f_min = max(self.f_min, (self.sample_rate / (self.n_fft - 1))
|
76
|
+
self.f_min = max(self.f_min, (self.sample_rate / (self.n_fft - 1)) * 2)
|
77
77
|
self.default_f_max = min(
|
78
78
|
default(self.f_max, self.sample_rate // 2), self.sample_rate // 2
|
79
79
|
)
|
@@ -105,7 +105,6 @@ class AudioProcessor(Model):
|
|
105
105
|
f_min=self.cfg.f_min,
|
106
106
|
f_max=self.cfg.f_max,
|
107
107
|
mel_scale=self.cfg.mel_scale,
|
108
|
-
onesided=self.cfg.onesided,
|
109
108
|
normalized=self.cfg.normalized,
|
110
109
|
)
|
111
110
|
self._mel_rscale = torchaudio.transforms.InverseMelScale(
|
@@ -122,17 +121,15 @@ class AudioProcessor(Model):
|
|
122
121
|
(torch.hann_window(self.cfg.win_length) if window is None else window),
|
123
122
|
)
|
124
123
|
|
125
|
-
|
126
|
-
|
127
124
|
def compute_mel(
|
128
125
|
self,
|
129
126
|
wave: Tensor,
|
130
|
-
raw_mel_only: bool = False,
|
131
127
|
eps: float = 1e-5,
|
128
|
+
raw_mel_only: bool = False,
|
132
129
|
*,
|
133
130
|
_recall: bool = False,
|
134
131
|
) -> Tensor:
|
135
|
-
"""Returns:
|
132
|
+
"""Returns: (M, T) or (B, M, T) if batched"""
|
136
133
|
try:
|
137
134
|
mel_tensor = self._mel_spec(wave.to(self.device)) # [M, T]
|
138
135
|
if not raw_mel_only:
|
@@ -203,13 +200,16 @@ class AudioProcessor(Model):
|
|
203
200
|
rms_ = []
|
204
201
|
for i in range(B):
|
205
202
|
_t = _comp_rms_helper(i, audio, mel)
|
206
|
-
_r = librosa.feature.rms(**_t, **rms_kwargs)[
|
207
|
-
0
|
208
|
-
]
|
203
|
+
_r = librosa.feature.rms(**_t, **rms_kwargs)[0]
|
209
204
|
rms_.append(_r)
|
210
205
|
return self.from_numpy_batch(rms_, default_device, default_dtype).squeeze()
|
211
206
|
|
212
|
-
def pitch_shift(
|
207
|
+
def pitch_shift(
|
208
|
+
self,
|
209
|
+
audio: torch.Tensor,
|
210
|
+
sample_rate: Optional[int] = None,
|
211
|
+
n_steps: float = 2.0,
|
212
|
+
):
|
213
213
|
"""
|
214
214
|
Shifts the pitch of an audio tensor by `n_steps` semitones.
|
215
215
|
|
@@ -225,21 +225,25 @@ class AudioProcessor(Model):
|
|
225
225
|
src_dtype = audio.dtype
|
226
226
|
audio = audio.squeeze()
|
227
227
|
sample_rate = default(sample_rate, self.cfg.sample_rate)
|
228
|
+
|
228
229
|
def _shift_one(wav):
|
229
230
|
wav_np = self.to_numpy_safe(wav)
|
230
|
-
shifted_np = librosa.effects.pitch_shift(
|
231
|
+
shifted_np = librosa.effects.pitch_shift(
|
232
|
+
wav_np, sr=sample_rate, n_steps=n_steps
|
233
|
+
)
|
231
234
|
return torch.from_numpy(shifted_np)
|
232
235
|
|
233
236
|
if audio.ndim == 1:
|
234
237
|
return _shift_one(audio).to(device=src_device, dtype=src_dtype)
|
235
|
-
return torch.stack([_shift_one(a) for a in audio]).to(
|
236
|
-
|
238
|
+
return torch.stack([_shift_one(a) for a in audio]).to(
|
239
|
+
device=src_device, dtype=src_dtype
|
240
|
+
)
|
237
241
|
|
238
242
|
@staticmethod
|
239
|
-
def calc_pitch_fmin(sr:int, frame_length:float):
|
243
|
+
def calc_pitch_fmin(sr: int, frame_length: float):
|
240
244
|
"""For pitch f_min"""
|
241
245
|
return (sr / (frame_length - 1)) * 2
|
242
|
-
|
246
|
+
|
243
247
|
def compute_pitch(
|
244
248
|
self,
|
245
249
|
audio: Tensor,
|
@@ -261,8 +265,10 @@ class AudioProcessor(Model):
|
|
261
265
|
B = 1
|
262
266
|
sr = default(sr, self.cfg.sample_rate)
|
263
267
|
frame_length = default(frame_length, self.cfg.n_fft)
|
264
|
-
fmin = max(
|
265
|
-
|
268
|
+
fmin = max(
|
269
|
+
default(fmin, self.cfg.f_min), self.calc_pitch_fmin(sr, frame_length)
|
270
|
+
)
|
271
|
+
fmax = min(max(default(fmax, self.cfg.default_f_max), fmin + 1), sr // 2)
|
266
272
|
hop_length = default(hop_length, self.cfg.hop_length)
|
267
273
|
center = default(center, self.cfg.center)
|
268
274
|
yn_kwargs = dict(
|
@@ -361,7 +367,7 @@ class AudioProcessor(Model):
|
|
361
367
|
The modes available for upsampling are: `nearest`, `linear` (3D-only),
|
362
368
|
`bilinear`, `bicubic` (4D-only), `trilinear` (5D-only)
|
363
369
|
"""
|
364
|
-
|
370
|
+
tensor = tensor.squeeze()
|
365
371
|
if tensor.ndim == 2: # [1, T]
|
366
372
|
tensor = tensor.unsqueeze(1) # [1, 1, T]
|
367
373
|
elif tensor.ndim == 1:
|
@@ -384,7 +390,7 @@ class AudioProcessor(Model):
|
|
384
390
|
hop_length: Optional[int] = None,
|
385
391
|
win_length: Optional[int] = None,
|
386
392
|
length: Optional[int] = None,
|
387
|
-
center:
|
393
|
+
center: bool = True,
|
388
394
|
normalized: Optional[bool] = None,
|
389
395
|
onesided: Optional[bool] = None,
|
390
396
|
return_complex: bool = False,
|
@@ -403,7 +409,7 @@ class AudioProcessor(Model):
|
|
403
409
|
hop_length=default(hop_length, self.cfg.hop_length),
|
404
410
|
win_length=default(win_length, self.cfg.win_length),
|
405
411
|
window=window,
|
406
|
-
center=
|
412
|
+
center=center,
|
407
413
|
normalized=default(normalized, self.cfg.normalized),
|
408
414
|
onesided=default(onesided, self.cfg.onesided),
|
409
415
|
length=length,
|
@@ -421,44 +427,48 @@ class AudioProcessor(Model):
|
|
421
427
|
self,
|
422
428
|
wave: Tensor,
|
423
429
|
length: Optional[int] = None,
|
424
|
-
|
425
|
-
|
430
|
+
center: bool = True,
|
431
|
+
n_fft: Optional[int] = None,
|
432
|
+
hop_length: Optional[int] = None,
|
433
|
+
win_length: Optional[int] = None,
|
434
|
+
normalized: Optional[bool] = None,
|
435
|
+
onesided: Optional[bool] = None,
|
436
|
+
return_complex: bool = False,
|
426
437
|
):
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
raise e
|
438
|
+
|
439
|
+
if win_length is not None and win_length != self.cfg.win_length:
|
440
|
+
window = torch.hann_window(win_length, device=wave.device)
|
441
|
+
else:
|
442
|
+
window = self.window
|
443
|
+
spectrogram = torch.stft(
|
444
|
+
input=wave,
|
445
|
+
n_fft=default(n_fft, self.cfg.n_fft),
|
446
|
+
hop_length=default(hop_length, self.cfg.hop_length),
|
447
|
+
win_length=default(win_length, self.cfg.win_length),
|
448
|
+
window=window,
|
449
|
+
center=center,
|
450
|
+
pad_mode="reflect",
|
451
|
+
normalized=default(normalized, self.cfg.normalized),
|
452
|
+
onesided=default(onesided, self.cfg.onesided),
|
453
|
+
return_complex=True,
|
454
|
+
)
|
455
|
+
return torch.istft(
|
456
|
+
spectrogram
|
457
|
+
* torch.full(
|
458
|
+
spectrogram.size(),
|
459
|
+
fill_value=1,
|
460
|
+
device=spectrogram.device,
|
461
|
+
),
|
462
|
+
n_fft=default(n_fft, self.cfg.n_fft),
|
463
|
+
hop_length=default(hop_length, self.cfg.hop_length),
|
464
|
+
win_length=default(win_length, self.cfg.win_length),
|
465
|
+
window=self.window,
|
466
|
+
length=length,
|
467
|
+
center=center,
|
468
|
+
normalized=default(normalized, self.cfg.normalized),
|
469
|
+
onesided=default(onesided, self.cfg.onesided),
|
470
|
+
return_complex=return_complex,
|
471
|
+
)
|
462
472
|
|
463
473
|
def load_audio(
|
464
474
|
self,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
lt_tensor/__init__.py,sha256=
|
1
|
+
lt_tensor/__init__.py,sha256=nBbiGH1byHU0aTTKKorRj8MIEO2oEMBXl7kt5DOCatU,441
|
2
2
|
lt_tensor/config_templates.py,sha256=F9UvL8paAjkSvio890kp8WznpYeI50pYnm9iqQroBxk,2797
|
3
3
|
lt_tensor/losses.py,sha256=Heco_WyoC1HkNkcJEircOAzS9umusATHiNAG-FKGyzc,8918
|
4
4
|
lt_tensor/lr_schedulers.py,sha256=6_vcfaPHrozfH3wvmNEdKSFYl6iTIijYoHL8vuG-45U,7651
|
@@ -25,16 +25,19 @@ lt_tensor/model_zoo/activations/alias_free/resample.py,sha256=3iM4fNr9fLNXXMyXvz
|
|
25
25
|
lt_tensor/model_zoo/activations/snake/__init__.py,sha256=AtOAbJuMinxmKkppITGMzRbcbPQaALnl9mCtl1c3x0Q,4356
|
26
26
|
lt_tensor/model_zoo/audio_models/__init__.py,sha256=WwiP9MekJreMOfKPWLl24VkRJIpLk6hhL8ch0aKgOss,103
|
27
27
|
lt_tensor/model_zoo/audio_models/resblocks.py,sha256=u-foHxaFDUICjxSkpyHXljQYQG9zMxVYaOGqLR_nJ-k,7978
|
28
|
-
lt_tensor/model_zoo/audio_models/bigvgan/__init__.py,sha256=
|
28
|
+
lt_tensor/model_zoo/audio_models/bigvgan/__init__.py,sha256=4EZG8Non75dHoDCizMHbMTvPrKwdUlPYGHc7hkfT_nw,8526
|
29
29
|
lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=PDuDYN1omD1RoAXcmxH3tEgfAuM3ZHAWzimD6ElMqEQ,9073
|
30
|
-
lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=
|
30
|
+
lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=ITSXHg3c0Um1P2HaPaXkQKI7meG5Ne60wTbyyYju3hY,6360
|
31
31
|
lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=blICjLX_z_IFmR3_TCz_dJiSayLYGza9eG6fd9aKyvE,7448
|
32
32
|
lt_tensor/model_zoo/losses/__init__.py,sha256=B9RAUxBiOZwooztnij1oLeRwZ7_MjnN3mPoum7saD6s,59
|
33
|
-
lt_tensor/model_zoo/losses/discriminators.py,sha256=
|
33
|
+
lt_tensor/model_zoo/losses/discriminators.py,sha256=o4cicNdOv0jH3ink7jTNeDqOnwmkmRtEj9E7IUIGnEI,31866
|
34
|
+
lt_tensor/model_zoo/losses/CQT/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
|
+
lt_tensor/model_zoo/losses/CQT/transforms.py,sha256=Vkid0J9dqLnlINfyyUlQf-qB3gOQAgU7W9j7xLOjDFw,13218
|
36
|
+
lt_tensor/model_zoo/losses/CQT/utils.py,sha256=twGw6FVD7V5Ksfx_1BUEN3EP1tAS6wo-9LL3VnuHB8c,16751
|
34
37
|
lt_tensor/processors/__init__.py,sha256=Pvxhh0KR65zLCgUd53_k5Z0y5JWWcO0ZBXFK9rv0o5w,109
|
35
|
-
lt_tensor/processors/audio.py,sha256=
|
36
|
-
lt_tensor-0.0.
|
37
|
-
lt_tensor-0.0.
|
38
|
-
lt_tensor-0.0.
|
39
|
-
lt_tensor-0.0.
|
40
|
-
lt_tensor-0.0.
|
38
|
+
lt_tensor/processors/audio.py,sha256=3YzyEpMwh124rb1KMAly62qweeruF200BnM-vQIbzy0,18645
|
39
|
+
lt_tensor-0.0.1a36.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
|
40
|
+
lt_tensor-0.0.1a36.dist-info/METADATA,sha256=mTmnoWn8EG48j_VOM3rr_8RLLgaxB5pWZE1tkPdFrac,1062
|
41
|
+
lt_tensor-0.0.1a36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
42
|
+
lt_tensor-0.0.1a36.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
43
|
+
lt_tensor-0.0.1a36.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|