lt-tensor 0.0.1a13__py3-none-any.whl → 0.0.1a14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/misc_utils.py +1 -1
- lt_tensor/model_zoo/istft/generator.py +5 -65
- lt_tensor/model_zoo/istft/trainer.py +209 -66
- lt_tensor/model_zoo/residual.py +36 -1
- lt_tensor/processors/audio.py +5 -16
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a14.dist-info}/METADATA +2 -2
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a14.dist-info}/RECORD +10 -10
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a14.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a14.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a14.dist-info}/top_level.txt +0 -0
lt_tensor/misc_utils.py
CHANGED
@@ -240,7 +240,7 @@ class LogTensor:
|
|
240
240
|
stored_items: List[
|
241
241
|
Dict[str, Union[str, Number, Tensor, List[Union[Tensor, Number, str]]]]
|
242
242
|
] = []
|
243
|
-
max_stored_items: int =
|
243
|
+
max_stored_items: int = 8
|
244
244
|
|
245
245
|
def _setup_message(self, title: str, t: Union[Tensor, str, int]):
|
246
246
|
try:
|
@@ -1,52 +1,7 @@
|
|
1
|
-
__all__ = ["iSTFTGenerator"
|
2
|
-
import gc
|
3
|
-
import math
|
4
|
-
import itertools
|
1
|
+
__all__ = ["iSTFTGenerator"]
|
5
2
|
from lt_utils.common import *
|
6
3
|
from lt_tensor.torch_commons import *
|
7
|
-
from lt_tensor.
|
8
|
-
from lt_tensor.misc_utils import log_tensor
|
9
|
-
from lt_tensor.model_zoo.residual import ResBlock1D, ConvNets, get_weight_norm
|
10
|
-
from lt_utils.misc_utils import log_traceback
|
11
|
-
from lt_tensor.processors import AudioProcessor
|
12
|
-
from lt_utils.type_utils import is_dir, is_pathlike
|
13
|
-
from lt_tensor.misc_utils import set_seed, clear_cache
|
14
|
-
from lt_tensor.model_zoo.discriminator import MultiPeriodDiscriminator, MultiScaleDiscriminator
|
15
|
-
import torch.nn.functional as F
|
16
|
-
from lt_tensor.config_templates import updateDict, ModelConfig
|
17
|
-
|
18
|
-
|
19
|
-
class ResBlocks(ConvNets):
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
channels: int,
|
23
|
-
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
|
24
|
-
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
25
|
-
[1, 3, 5],
|
26
|
-
[1, 3, 5],
|
27
|
-
[1, 3, 5],
|
28
|
-
],
|
29
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
30
|
-
):
|
31
|
-
super().__init__()
|
32
|
-
self.num_kernels = len(resblock_kernel_sizes)
|
33
|
-
self.rb = nn.ModuleList()
|
34
|
-
self.activation = activation
|
35
|
-
|
36
|
-
for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
37
|
-
self.rb.append(ResBlock1D(channels, k, j, activation))
|
38
|
-
|
39
|
-
self.rb.apply(self.init_weights)
|
40
|
-
|
41
|
-
def forward(self, x: torch.Tensor):
|
42
|
-
xs = None
|
43
|
-
for i, block in enumerate(self.rb):
|
44
|
-
if i == 0:
|
45
|
-
xs = block(x)
|
46
|
-
else:
|
47
|
-
xs += block(x)
|
48
|
-
x = xs / self.num_kernels
|
49
|
-
return self.activation(x)
|
4
|
+
from lt_tensor.model_zoo.residual import ConvNets, ResBlocks
|
50
5
|
|
51
6
|
|
52
7
|
class iSTFTGenerator(ConvNets):
|
@@ -91,19 +46,6 @@ class iSTFTGenerator(ConvNets):
|
|
91
46
|
self.conv_post.apply(self.init_weights)
|
92
47
|
self.reflection_pad = nn.ReflectionPad1d((1, 0))
|
93
48
|
|
94
|
-
self.phase = nn.Sequential(
|
95
|
-
nn.LeakyReLU(0.2),
|
96
|
-
nn.Conv1d(self.post_n_fft, self.post_n_fft, kernel_size=3, padding=1),
|
97
|
-
nn.LeakyReLU(0.2),
|
98
|
-
nn.Conv1d(self.post_n_fft, self.post_n_fft, kernel_size=3, padding=1),
|
99
|
-
)
|
100
|
-
self.spec = nn.Sequential(
|
101
|
-
nn.LeakyReLU(0.2),
|
102
|
-
nn.Conv1d(self.post_n_fft, self.post_n_fft, kernel_size=3, padding=1),
|
103
|
-
nn.LeakyReLU(0.2),
|
104
|
-
nn.Conv1d(self.post_n_fft, self.post_n_fft, kernel_size=3, padding=1),
|
105
|
-
)
|
106
|
-
|
107
49
|
def _make_blocks(
|
108
50
|
self,
|
109
51
|
state: Tuple[int, int, int],
|
@@ -142,9 +84,7 @@ class iSTFTGenerator(ConvNets):
|
|
142
84
|
x = block["up"](x)
|
143
85
|
x = block["residual"](x)
|
144
86
|
|
145
|
-
x = self.reflection_pad(x)
|
146
|
-
|
147
|
-
|
148
|
-
phase = torch.sin(self.phase(x[:, self.post_n_fft :, :]))
|
149
|
-
|
87
|
+
x = self.conv_post(self.activation(self.reflection_pad(x)))
|
88
|
+
spec = torch.exp(x[:, : self.post_n_fft, :])
|
89
|
+
phase = torch.sin(x[:, self.post_n_fft :, :])
|
150
90
|
return spec, phase
|
@@ -1,20 +1,20 @@
|
|
1
|
-
__all__ = ["AudioSettings", "
|
1
|
+
__all__ = ["AudioSettings", "AudioDecoderTrainer", "AudioGeneratorOnlyTrainer"]
|
2
2
|
import gc
|
3
|
-
import math
|
4
3
|
import itertools
|
5
4
|
from lt_utils.common import *
|
6
5
|
import torch.nn.functional as F
|
7
6
|
from lt_tensor.torch_commons import *
|
8
7
|
from lt_tensor.model_base import Model
|
9
|
-
from lt_tensor.misc_utils import log_tensor
|
10
8
|
from lt_utils.misc_utils import log_traceback
|
11
9
|
from lt_tensor.processors import AudioProcessor
|
12
10
|
from lt_tensor.misc_utils import set_seed, clear_cache
|
13
|
-
from lt_utils.type_utils import is_dir, is_pathlike
|
14
|
-
from lt_tensor.config_templates import
|
11
|
+
from lt_utils.type_utils import is_dir, is_pathlike
|
12
|
+
from lt_tensor.config_templates import ModelConfig
|
15
13
|
from lt_tensor.model_zoo.istft.generator import iSTFTGenerator
|
16
|
-
from lt_tensor.model_zoo.
|
17
|
-
|
14
|
+
from lt_tensor.model_zoo.discriminator import (
|
15
|
+
MultiPeriodDiscriminator,
|
16
|
+
MultiScaleDiscriminator,
|
17
|
+
)
|
18
18
|
|
19
19
|
|
20
20
|
def feature_loss(fmap_r, fmap_g):
|
@@ -29,7 +29,6 @@ def generator_adv_loss(disc_outputs):
|
|
29
29
|
loss = 0
|
30
30
|
for dg in disc_outputs:
|
31
31
|
l = torch.mean((1 - dg) ** 2)
|
32
|
-
|
33
32
|
loss += l
|
34
33
|
return loss
|
35
34
|
|
@@ -44,29 +43,6 @@ def discriminator_loss(disc_real_outputs, disc_generated_outputs):
|
|
44
43
|
return loss
|
45
44
|
|
46
45
|
|
47
|
-
"""def feature_loss(fmap_r, fmap_g):
|
48
|
-
loss = 0
|
49
|
-
for dr, dg in zip(fmap_r, fmap_g):
|
50
|
-
for rl, gl in zip(dr, dg):
|
51
|
-
loss += torch.mean(torch.abs(rl - gl))
|
52
|
-
return loss * 2
|
53
|
-
|
54
|
-
|
55
|
-
def generator_adv_loss(fake_preds):
|
56
|
-
loss = 0.0
|
57
|
-
for f in fake_preds:
|
58
|
-
loss += torch.mean((f - 1.0) ** 2)
|
59
|
-
return loss
|
60
|
-
|
61
|
-
|
62
|
-
def discriminator_loss(real_preds, fake_preds):
|
63
|
-
loss = 0.0
|
64
|
-
for r, f in zip(real_preds, fake_preds):
|
65
|
-
loss += torch.mean((r - 1.0) ** 2) + torch.mean(f**2)
|
66
|
-
return loss
|
67
|
-
"""
|
68
|
-
|
69
|
-
|
70
46
|
class AudioSettings(ModelConfig):
|
71
47
|
def __init__(
|
72
48
|
self,
|
@@ -107,12 +83,12 @@ class AudioSettings(ModelConfig):
|
|
107
83
|
self.scheduler_template = scheduler_template
|
108
84
|
|
109
85
|
|
110
|
-
class
|
86
|
+
class AudioDecoderTrainer(Model):
|
111
87
|
def __init__(
|
112
88
|
self,
|
113
89
|
audio_processor: AudioProcessor,
|
114
90
|
settings: Optional[AudioSettings] = None,
|
115
|
-
generator: Optional[Union[Model, "iSTFTGenerator"]] = None, # non
|
91
|
+
generator: Optional[Union[Model, "iSTFTGenerator"]] = None, # non initialized!
|
116
92
|
):
|
117
93
|
super().__init__()
|
118
94
|
if settings is None:
|
@@ -324,7 +300,7 @@ class AudioDecoder(Model):
|
|
324
300
|
else:
|
325
301
|
disc_out = self._discriminator_step(**disc_kwargs)
|
326
302
|
|
327
|
-
|
303
|
+
generator_kwargs = dict(
|
328
304
|
mels=mels,
|
329
305
|
real_audio=real_audio,
|
330
306
|
fake_audio=fake_audio,
|
@@ -339,8 +315,8 @@ class AudioDecoder(Model):
|
|
339
315
|
|
340
316
|
if is_generator_frozen:
|
341
317
|
with torch.no_grad():
|
342
|
-
return self._generator_step(**
|
343
|
-
return self._generator_step(**
|
318
|
+
return self._generator_step(**generator_kwargs)
|
319
|
+
return self._generator_step(**generator_kwargs)
|
344
320
|
|
345
321
|
def _discriminator_step(
|
346
322
|
self,
|
@@ -349,7 +325,8 @@ class AudioDecoder(Model):
|
|
349
325
|
am_i_frozen: bool = False,
|
350
326
|
):
|
351
327
|
# ========== Discriminator Forward Pass ==========
|
352
|
-
|
328
|
+
if not am_i_frozen:
|
329
|
+
self.d_optim.zero_grad()
|
353
330
|
# MPD
|
354
331
|
real_mpd_preds, _ = self.mpd(real_audio)
|
355
332
|
fake_mpd_preds, _ = self.mpd(fake_audio)
|
@@ -362,7 +339,6 @@ class AudioDecoder(Model):
|
|
362
339
|
loss_d = loss_d_mpd + loss_d_msd
|
363
340
|
|
364
341
|
if not am_i_frozen:
|
365
|
-
self.d_optim.zero_grad()
|
366
342
|
loss_d.backward()
|
367
343
|
self.d_optim.step()
|
368
344
|
|
@@ -384,6 +360,8 @@ class AudioDecoder(Model):
|
|
384
360
|
am_i_frozen: bool = False,
|
385
361
|
):
|
386
362
|
# ========== Generator Loss ==========
|
363
|
+
if not am_i_frozen:
|
364
|
+
self.g_optim.zero_grad()
|
387
365
|
real_mpd_feats = self.mpd(real_audio)[1]
|
388
366
|
real_msd_feats = self.msd(real_audio)[1]
|
389
367
|
|
@@ -403,11 +381,12 @@ class AudioDecoder(Model):
|
|
403
381
|
|
404
382
|
loss_adv = (loss_adv_mpd + loss_adv_msd) * adv_scale
|
405
383
|
|
406
|
-
loss_g = loss_adv + loss_fm + loss_stft
|
384
|
+
loss_g = loss_adv + loss_fm + loss_stft + loss_mel
|
407
385
|
if not am_i_frozen:
|
408
|
-
self.g_optim.zero_grad()
|
409
386
|
loss_g.backward()
|
410
387
|
self.g_optim.step()
|
388
|
+
|
389
|
+
lr_g, lr_d = self.get_lr()
|
411
390
|
return {
|
412
391
|
"loss_g": loss_g.item(),
|
413
392
|
"loss_d": loss_d,
|
@@ -415,8 +394,8 @@ class AudioDecoder(Model):
|
|
415
394
|
"loss_fm": loss_fm.item(),
|
416
395
|
"loss_stft": loss_stft.item(),
|
417
396
|
"loss_mel": loss_mel.item(),
|
418
|
-
"lr_g":
|
419
|
-
"lr_d":
|
397
|
+
"lr_g": lr_g,
|
398
|
+
"lr_d": lr_d,
|
420
399
|
}
|
421
400
|
|
422
401
|
def step_scheduler(
|
@@ -442,34 +421,198 @@ class AudioDecoder(Model):
|
|
442
421
|
self.g_scheduler = self.settings.scheduler_template(self.g_optim)
|
443
422
|
|
444
423
|
|
445
|
-
class
|
424
|
+
class AudioGeneratorOnlyTrainer(Model):
|
446
425
|
def __init__(
|
447
426
|
self,
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
[1, 3, 5],
|
452
|
-
[1, 3, 5],
|
453
|
-
[1, 3, 5],
|
454
|
-
],
|
455
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
427
|
+
audio_processor: AudioProcessor,
|
428
|
+
settings: Optional[AudioSettings] = None,
|
429
|
+
generator: Optional[Union[Model, "iSTFTGenerator"]] = None, # non initialized!
|
456
430
|
):
|
457
431
|
super().__init__()
|
458
|
-
|
459
|
-
|
460
|
-
|
432
|
+
if settings is None:
|
433
|
+
self.settings = AudioSettings()
|
434
|
+
elif isinstance(settings, dict):
|
435
|
+
self.settings = AudioSettings(**settings)
|
436
|
+
elif isinstance(settings, AudioSettings):
|
437
|
+
self.settings = settings
|
438
|
+
else:
|
439
|
+
raise ValueError(
|
440
|
+
"Cannot initialize the waveDecoder with the given settings. "
|
441
|
+
"Use either a dictionary, or the class WaveSettings to setup the settings. "
|
442
|
+
"Alternatively, leave it None to use the default values."
|
443
|
+
)
|
444
|
+
if self.settings.seed is not None:
|
445
|
+
set_seed(self.settings.seed)
|
446
|
+
if generator is None:
|
447
|
+
generator = iSTFTGenerator
|
448
|
+
self.generator: iSTFTGenerator = generator(
|
449
|
+
in_channels=self.settings.in_channels,
|
450
|
+
upsample_rates=self.settings.upsample_rates,
|
451
|
+
upsample_kernel_sizes=self.settings.upsample_kernel_sizes,
|
452
|
+
upsample_initial_channel=self.settings.upsample_initial_channel,
|
453
|
+
resblock_kernel_sizes=self.settings.resblock_kernel_sizes,
|
454
|
+
resblock_dilation_sizes=self.settings.resblock_dilation_sizes,
|
455
|
+
n_fft=self.settings.n_fft,
|
456
|
+
activation=self.settings.activation,
|
457
|
+
)
|
458
|
+
self.generator.eval()
|
459
|
+
self.gen_training = False
|
460
|
+
self.audio_processor = audio_processor
|
461
|
+
|
462
|
+
def setup_training_mode(self, *args, **kwargs):
|
463
|
+
self.finish_training_setup()
|
464
|
+
self.update_schedulers_and_optimizer()
|
465
|
+
self.gen_training = True
|
466
|
+
return True
|
467
|
+
|
468
|
+
def update_schedulers_and_optimizer(self):
|
469
|
+
self.g_optim = optim.AdamW(
|
470
|
+
self.generator.parameters(),
|
471
|
+
lr=self.settings.lr,
|
472
|
+
betas=self.settings.adamw_betas,
|
473
|
+
)
|
474
|
+
self.g_scheduler = self.settings.scheduler_template(self.g_optim)
|
475
|
+
|
476
|
+
def set_lr(self, new_lr: float = 1e-4):
|
477
|
+
if self.g_optim is not None:
|
478
|
+
for groups in self.g_optim.param_groups:
|
479
|
+
groups["lr"] = new_lr
|
480
|
+
return self.get_lr()
|
481
|
+
|
482
|
+
def get_lr(self) -> Tuple[float, float]:
|
483
|
+
if self.g_optim is not None:
|
484
|
+
return self.g_optim.param_groups[0]["lr"]
|
485
|
+
return float("nan")
|
486
|
+
|
487
|
+
def save_weights(self, path, replace=True):
|
488
|
+
is_pathlike(path, check_if_empty=True, validate=True)
|
489
|
+
if str(path).endswith(".pt"):
|
490
|
+
path = Path(path).parent
|
491
|
+
else:
|
492
|
+
path = Path(path)
|
493
|
+
self.generator.save_weights(Path(path, "generator.pt"), replace)
|
494
|
+
|
495
|
+
def load_weights(
|
496
|
+
self,
|
497
|
+
path,
|
498
|
+
raise_if_not_exists=False,
|
499
|
+
strict=True,
|
500
|
+
assign=False,
|
501
|
+
weights_only=False,
|
502
|
+
mmap=None,
|
503
|
+
**torch_loader_kwargs
|
504
|
+
):
|
505
|
+
is_pathlike(path, check_if_empty=True, validate=True)
|
506
|
+
if str(path).endswith(".pt"):
|
507
|
+
path = Path(path)
|
508
|
+
else:
|
509
|
+
path = Path(path, "generator.pt")
|
510
|
+
|
511
|
+
self.generator.load_weights(
|
512
|
+
path,
|
513
|
+
raise_if_not_exists,
|
514
|
+
strict,
|
515
|
+
assign,
|
516
|
+
weights_only,
|
517
|
+
mmap,
|
518
|
+
**torch_loader_kwargs,
|
519
|
+
)
|
520
|
+
|
521
|
+
def finish_training_setup(self):
|
522
|
+
gc.collect()
|
523
|
+
clear_cache()
|
524
|
+
self.eval()
|
525
|
+
self.gen_training = False
|
526
|
+
|
527
|
+
def forward(self, mel_spec: Tensor) -> Tuple[Tensor, Tensor]:
|
528
|
+
"""Returns the generated spec and phase"""
|
529
|
+
return self.generator.forward(mel_spec)
|
530
|
+
|
531
|
+
def inference(
|
532
|
+
self,
|
533
|
+
mel_spec: Tensor,
|
534
|
+
return_dict: bool = False,
|
535
|
+
) -> Union[Dict[str, Tensor], Tensor]:
|
536
|
+
spec, phase = super().inference(mel_spec)
|
537
|
+
wave = self.audio_processor.inverse_transform(
|
538
|
+
spec,
|
539
|
+
phase,
|
540
|
+
self.settings.n_fft,
|
541
|
+
hop_length=4,
|
542
|
+
win_length=self.settings.n_fft,
|
543
|
+
)
|
544
|
+
if not return_dict:
|
545
|
+
return wave[:, : wave.shape[-1] - 256]
|
546
|
+
return {
|
547
|
+
"wave": wave[:, : wave.shape[-1] - 256],
|
548
|
+
"spec": spec,
|
549
|
+
"phase": phase,
|
550
|
+
}
|
551
|
+
|
552
|
+
def set_device(self, device: str):
|
553
|
+
self.to(device=device)
|
554
|
+
self.generator.to(device=device)
|
555
|
+
self.audio_processor.to(device=device)
|
556
|
+
self.msd.to(device=device)
|
557
|
+
self.mpd.to(device=device)
|
558
|
+
|
559
|
+
def train_step(
|
560
|
+
self,
|
561
|
+
mels: Tensor,
|
562
|
+
real_audio: Tensor,
|
563
|
+
stft_scale: float = 1.0,
|
564
|
+
mel_scale: float = 1.0,
|
565
|
+
ext_loss: Optional[Callable[[Tensor, Tensor], Tensor]] = None,
|
566
|
+
):
|
567
|
+
if not self.gen_training:
|
568
|
+
self.setup_training_mode()
|
569
|
+
|
570
|
+
self.g_optim.zero_grad()
|
571
|
+
spec, phase = self.generator.train_step(mels)
|
461
572
|
|
462
|
-
|
463
|
-
|
573
|
+
real_audio = real_audio.squeeze(1)
|
574
|
+
with torch.no_grad():
|
575
|
+
fake_audio = self.audio_processor.inverse_transform(
|
576
|
+
spec,
|
577
|
+
phase,
|
578
|
+
self.settings.n_fft,
|
579
|
+
hop_length=4,
|
580
|
+
win_length=self.settings.n_fft,
|
581
|
+
)[:, : real_audio.shape[-1]]
|
582
|
+
loss_stft = self.audio_processor.stft_loss(fake_audio, real_audio) * stft_scale
|
583
|
+
loss_mel = (
|
584
|
+
F.huber_loss(self.audio_processor.compute_mel(fake_audio), mels) * mel_scale
|
585
|
+
)
|
586
|
+
loss_g.backward()
|
587
|
+
loss_g = loss_stft + loss_mel
|
588
|
+
loss_ext = 0
|
589
|
+
|
590
|
+
if ext_loss is not None:
|
591
|
+
l_ext = ext_loss(fake_audio, real_audio)
|
592
|
+
loss_g = loss_g + l_ext
|
593
|
+
loss_ext = l_ext.item()
|
464
594
|
|
465
|
-
self.
|
595
|
+
self.g_optim.step()
|
596
|
+
return {
|
597
|
+
"loss": loss_g.item(),
|
598
|
+
"loss_stft": loss_stft.item(),
|
599
|
+
"loss_mel": loss_mel.item(),
|
600
|
+
"loss_ext": loss_ext,
|
601
|
+
"lr": self.get_lr(),
|
602
|
+
}
|
603
|
+
|
604
|
+
def step_scheduler(self):
|
466
605
|
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
606
|
+
if self.g_scheduler is not None:
|
607
|
+
self.g_scheduler.step()
|
608
|
+
|
609
|
+
def reset_schedulers(self, lr: Optional[float] = None):
|
610
|
+
"""
|
611
|
+
In case you have adopted another strategy, with this function,
|
612
|
+
it is possible restart the scheduler and set the lr to another value.
|
613
|
+
"""
|
614
|
+
if lr is not None:
|
615
|
+
self.set_lr(lr)
|
616
|
+
if self.g_optim is not None:
|
617
|
+
self.g_scheduler = None
|
618
|
+
self.g_scheduler = self.settings.scheduler_template(self.g_optim)
|
lt_tensor/model_zoo/residual.py
CHANGED
@@ -5,13 +5,14 @@ __all__ = [
|
|
5
5
|
"ResBlock2D",
|
6
6
|
"ResBlock1DShuffled",
|
7
7
|
"AdaResBlock1D",
|
8
|
+
"ResBlocks",
|
8
9
|
]
|
9
10
|
import math
|
10
11
|
from lt_utils.common import *
|
12
|
+
import torch.nn.functional as F
|
11
13
|
from lt_tensor.torch_commons import *
|
12
14
|
from lt_tensor.model_base import Model
|
13
15
|
from lt_tensor.misc_utils import log_tensor
|
14
|
-
import torch.nn.functional as F
|
15
16
|
from lt_tensor.model_zoo.fusion import AdaFusion1D, AdaIN1D
|
16
17
|
|
17
18
|
|
@@ -44,6 +45,40 @@ class ConvNets(Model):
|
|
44
45
|
m.weight.data.normal_(mean, std)
|
45
46
|
|
46
47
|
|
48
|
+
class ResBlocks(ConvNets):
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
channels: int,
|
52
|
+
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
|
53
|
+
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
54
|
+
[1, 3, 5],
|
55
|
+
[1, 3, 5],
|
56
|
+
[1, 3, 5],
|
57
|
+
],
|
58
|
+
activation: nn.Module = nn.LeakyReLU(0.1),
|
59
|
+
):
|
60
|
+
super().__init__()
|
61
|
+
self.num_kernels = len(resblock_kernel_sizes)
|
62
|
+
self.rb = nn.ModuleList()
|
63
|
+
self.activation = activation
|
64
|
+
|
65
|
+
for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
66
|
+
self.rb.append(ResBlock1D(channels, k, j, activation))
|
67
|
+
|
68
|
+
self.rb.apply(self.init_weights)
|
69
|
+
|
70
|
+
def forward(self, x: torch.Tensor):
|
71
|
+
xs = None
|
72
|
+
for i, block in enumerate(self.rb):
|
73
|
+
if i == 0:
|
74
|
+
xs = block(x)
|
75
|
+
else:
|
76
|
+
xs += block(x)
|
77
|
+
x = xs / self.num_kernels
|
78
|
+
return x
|
79
|
+
|
80
|
+
|
81
|
+
|
47
82
|
class ResBlock1D(ConvNets):
|
48
83
|
def __init__(
|
49
84
|
self,
|
lt_tensor/processors/audio.py
CHANGED
@@ -106,20 +106,13 @@ class AudioProcessor(Model):
|
|
106
106
|
return tensor.detach().to(DEFAULT_DEVICE).numpy(force=True)
|
107
107
|
|
108
108
|
def compute_rms(
|
109
|
-
self,
|
109
|
+
self,
|
110
|
+
audio: Union[Tensor, np.ndarray],
|
111
|
+
mel: Optional[Tensor] = None,
|
110
112
|
):
|
111
113
|
default_dtype = audio.dtype
|
112
114
|
default_device = audio.device
|
113
|
-
|
114
|
-
f"Audio should have 1D for unbatched and 2D for batched"
|
115
|
-
", received instead a: {audio.ndim}D"
|
116
|
-
)
|
117
|
-
if mel is not None:
|
118
|
-
assert mel.ndim in [2, 3], (
|
119
|
-
"Mel spectogram should have 2D dim for non-batched or 3D dim for both non-batched or batched"
|
120
|
-
f". Received instead {mel.ndim}D."
|
121
|
-
)
|
122
|
-
if audio.ndim == 2:
|
115
|
+
if audio.ndim > 1:
|
123
116
|
B = audio.shape[0]
|
124
117
|
else:
|
125
118
|
B = 1
|
@@ -163,11 +156,7 @@ class AudioProcessor(Model):
|
|
163
156
|
):
|
164
157
|
default_dtype = audio.dtype
|
165
158
|
default_device = audio.device
|
166
|
-
|
167
|
-
f"Audio should have 1D for unbatched and 2D for batched"
|
168
|
-
", received instead a: {audio.ndim}D"
|
169
|
-
)
|
170
|
-
if audio.ndim == 2:
|
159
|
+
if audio.ndim > 1:
|
171
160
|
B = audio.shape[0]
|
172
161
|
else:
|
173
162
|
B = 1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lt-tensor
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.1a14
|
4
4
|
Summary: General utilities for PyTorch and others. Built for general use.
|
5
5
|
Home-page: https://github.com/gr1336/lt-tensor/
|
6
6
|
Author: gr1336
|
@@ -17,7 +17,7 @@ Requires-Dist: numpy>=1.26.4
|
|
17
17
|
Requires-Dist: tokenizers
|
18
18
|
Requires-Dist: pyyaml>=6.0.0
|
19
19
|
Requires-Dist: numba>0.60.0
|
20
|
-
Requires-Dist: lt-utils
|
20
|
+
Requires-Dist: lt-utils==0.0.2a2
|
21
21
|
Requires-Dist: librosa==0.11.*
|
22
22
|
Requires-Dist: einops
|
23
23
|
Requires-Dist: plotly
|
@@ -3,7 +3,7 @@ lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss
|
|
3
3
|
lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
|
4
4
|
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
5
5
|
lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
|
6
|
-
lt_tensor/misc_utils.py,sha256=
|
6
|
+
lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
|
7
7
|
lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
|
8
8
|
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
9
9
|
lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
|
@@ -17,16 +17,16 @@ lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lD
|
|
17
17
|
lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
|
18
18
|
lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
|
19
19
|
lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
20
|
-
lt_tensor/model_zoo/residual.py,sha256=
|
20
|
+
lt_tensor/model_zoo/residual.py,sha256=3tc2fJaz6SxtKYAsxndahhwIxlN6oLk5tcdIXtUKaQc,7357
|
21
21
|
lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
|
22
22
|
lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
|
23
|
-
lt_tensor/model_zoo/istft/generator.py,sha256=
|
23
|
+
lt_tensor/model_zoo/istft/generator.py,sha256=wWHUfLFIItN-tB3pWkc1r9aTWpHYBFg7UfvLN4_cD78,3179
|
24
24
|
lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
|
25
|
-
lt_tensor/model_zoo/istft/trainer.py,sha256=
|
25
|
+
lt_tensor/model_zoo/istft/trainer.py,sha256=KZXsAptOJeLYlr6t-DPX1qxgN526-2EBKoQQlcsHp8Y,21054
|
26
26
|
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
27
|
-
lt_tensor/processors/audio.py,sha256=
|
28
|
-
lt_tensor-0.0.
|
29
|
-
lt_tensor-0.0.
|
30
|
-
lt_tensor-0.0.
|
31
|
-
lt_tensor-0.0.
|
32
|
-
lt_tensor-0.0.
|
27
|
+
lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
|
28
|
+
lt_tensor-0.0.1a14.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
29
|
+
lt_tensor-0.0.1a14.dist-info/METADATA,sha256=mxwJTAo51GfGEEW87lT-Tp1AHtoRvuKCmcPxAyqJxLQ,1033
|
30
|
+
lt_tensor-0.0.1a14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
31
|
+
lt_tensor-0.0.1a14.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
32
|
+
lt_tensor-0.0.1a14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|