lt-tensor 0.0.1a11__py3-none-any.whl → 0.0.1a13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/__init__.py +2 -0
- lt_tensor/config_templates.py +97 -0
- lt_tensor/datasets/audio.py +149 -40
- lt_tensor/losses.py +1 -1
- lt_tensor/math_ops.py +1 -1
- lt_tensor/misc_utils.py +108 -2
- lt_tensor/model_base.py +157 -203
- lt_tensor/model_zoo/__init__.py +18 -9
- lt_tensor/model_zoo/{bsc.py → basic.py} +124 -8
- lt_tensor/model_zoo/{disc.py → discriminator.py} +1 -1
- lt_tensor/model_zoo/features.py +416 -0
- lt_tensor/model_zoo/fusion.py +164 -0
- lt_tensor/model_zoo/istft/__init__.py +5 -0
- lt_tensor/model_zoo/{istft.py → istft/generator.py} +67 -25
- lt_tensor/model_zoo/istft/sg.py +142 -0
- lt_tensor/model_zoo/istft/trainer.py +475 -0
- lt_tensor/model_zoo/{pos.py → pos_encoder.py} +2 -2
- lt_tensor/model_zoo/residual.py +217 -0
- lt_tensor/model_zoo/{tfrms.py → transformer.py} +4 -4
- lt_tensor/noise_tools.py +2 -2
- lt_tensor/processors/audio.py +299 -90
- lt_tensor/transform.py +32 -48
- {lt_tensor-0.0.1a11.dist-info → lt_tensor-0.0.1a13.dist-info}/METADATA +8 -5
- lt_tensor-0.0.1a13.dist-info/RECORD +32 -0
- lt_tensor/model_zoo/fsn.py +0 -67
- lt_tensor/model_zoo/gns.py +0 -185
- lt_tensor/model_zoo/rsd.py +0 -237
- lt_tensor-0.0.1a11.dist-info/RECORD +0 -28
- {lt_tensor-0.0.1a11.dist-info → lt_tensor-0.0.1a13.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a11.dist-info → lt_tensor-0.0.1a13.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a11.dist-info → lt_tensor-0.0.1a13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
|
2
|
+
lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss,3515
|
3
|
+
lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
|
4
|
+
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
5
|
+
lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
|
6
|
+
lt_tensor/misc_utils.py,sha256=UNba6UEsAv1oZ60IAaKBNGbhXK2WPxRI9E4QcjP-_w0,28755
|
7
|
+
lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
|
8
|
+
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
9
|
+
lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
|
10
|
+
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
11
|
+
lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
|
12
|
+
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
lt_tensor/datasets/audio.py,sha256=j73oRyXt-AK4tWWYWjH-3c5RYouQBgDSCTuWHmyG8kQ,7450
|
14
|
+
lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
|
15
|
+
lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
|
16
|
+
lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
|
17
|
+
lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
|
18
|
+
lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
|
19
|
+
lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
20
|
+
lt_tensor/model_zoo/residual.py,sha256=knVLxzrLUjNQ6vdBESTZOk3r86ldi5PHetoBuJmymcw,6388
|
21
|
+
lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
|
22
|
+
lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
|
23
|
+
lt_tensor/model_zoo/istft/generator.py,sha256=lotGkMu67fctzwa5FSwX_xtHILOuV95uP-djCz2N3C8,5261
|
24
|
+
lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
|
25
|
+
lt_tensor/model_zoo/istft/trainer.py,sha256=EPuGtvfgR8vCrVc72p5OwVy73nNVlx510VxnH3NeErY,16080
|
26
|
+
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
27
|
+
lt_tensor/processors/audio.py,sha256=uBvMls4u_B1M-pk3xAiOIRnwM2l_3LcdfESNkE0Ch30,15314
|
28
|
+
lt_tensor-0.0.1a13.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
29
|
+
lt_tensor-0.0.1a13.dist-info/METADATA,sha256=yzNtg91vOGZCoXi6XWpn1kWk7LgVD2mIWQXL-7tw_Uc,1033
|
30
|
+
lt_tensor-0.0.1a13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
31
|
+
lt_tensor-0.0.1a13.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
32
|
+
lt_tensor-0.0.1a13.dist-info/RECORD,,
|
lt_tensor/model_zoo/fsn.py
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
__all__ = [
|
2
|
-
"ConcatFusion",
|
3
|
-
"FiLMFusion",
|
4
|
-
"BilinearFusion",
|
5
|
-
"CrossAttentionFusion",
|
6
|
-
"GatedFusion",
|
7
|
-
]
|
8
|
-
|
9
|
-
from ..torch_commons import *
|
10
|
-
from ..model_base import Model
|
11
|
-
|
12
|
-
|
13
|
-
class ConcatFusion(Model):
|
14
|
-
def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
|
15
|
-
super().__init__()
|
16
|
-
self.proj = nn.Linear(in_dim_a + in_dim_b, out_dim)
|
17
|
-
|
18
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
19
|
-
x = torch.cat([a, b], dim=-1)
|
20
|
-
return self.proj(x)
|
21
|
-
|
22
|
-
|
23
|
-
class FiLMFusion(Model):
|
24
|
-
def __init__(self, cond_dim: int, feature_dim: int):
|
25
|
-
super().__init__()
|
26
|
-
self.modulator = nn.Linear(cond_dim, 2 * feature_dim)
|
27
|
-
|
28
|
-
def forward(self, x: Tensor, cond: Tensor) -> Tensor:
|
29
|
-
scale, shift = self.modulator(cond).chunk(2, dim=-1)
|
30
|
-
return x * scale + shift
|
31
|
-
|
32
|
-
|
33
|
-
class BilinearFusion(Model):
|
34
|
-
def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
|
35
|
-
super().__init__()
|
36
|
-
self.bilinear = nn.Bilinear(in_dim_a, in_dim_b, out_dim)
|
37
|
-
|
38
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
39
|
-
return self.bilinear(a, b)
|
40
|
-
|
41
|
-
|
42
|
-
class CrossAttentionFusion(Model):
|
43
|
-
def __init__(self, q_dim: int, kv_dim: int, n_heads: int = 4, d_model: int = 256):
|
44
|
-
super().__init__()
|
45
|
-
self.q_proj = nn.Linear(q_dim, d_model)
|
46
|
-
self.k_proj = nn.Linear(kv_dim, d_model)
|
47
|
-
self.v_proj = nn.Linear(kv_dim, d_model)
|
48
|
-
self.attn = nn.MultiheadAttention(
|
49
|
-
embed_dim=d_model, num_heads=n_heads, batch_first=True
|
50
|
-
)
|
51
|
-
|
52
|
-
def forward(self, query: Tensor, context: Tensor, mask: Tensor = None) -> Tensor:
|
53
|
-
Q = self.q_proj(query)
|
54
|
-
K = self.k_proj(context)
|
55
|
-
V = self.v_proj(context)
|
56
|
-
output, _ = self.attn(Q, K, V, key_padding_mask=mask)
|
57
|
-
return output
|
58
|
-
|
59
|
-
|
60
|
-
class GatedFusion(Model):
|
61
|
-
def __init__(self, in_dim: int):
|
62
|
-
super().__init__()
|
63
|
-
self.gate = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.Sigmoid())
|
64
|
-
|
65
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
66
|
-
gate = self.gate(torch.cat([a, b], dim=-1))
|
67
|
-
return gate * a + (1 - gate) * b
|
lt_tensor/model_zoo/gns.py
DELETED
@@ -1,185 +0,0 @@
|
|
1
|
-
__all__ = [
|
2
|
-
"Downsample1D",
|
3
|
-
"Upsample1D",
|
4
|
-
"DiffusionUNet",
|
5
|
-
"UNetConvBlock1D",
|
6
|
-
"UNetUpBlock1D",
|
7
|
-
"NoisePredictor1D",
|
8
|
-
]
|
9
|
-
|
10
|
-
from ..torch_commons import *
|
11
|
-
from ..model_base import Model
|
12
|
-
from .rsd import ResBlock1D, ResBlocks
|
13
|
-
from ..misc_utils import log_tensor
|
14
|
-
|
15
|
-
import torch.nn.functional as F
|
16
|
-
|
17
|
-
|
18
|
-
class Downsample1D(Model):
|
19
|
-
def __init__(
|
20
|
-
self,
|
21
|
-
in_channels: int,
|
22
|
-
out_channels: int,
|
23
|
-
):
|
24
|
-
super().__init__()
|
25
|
-
self.pool = nn.Conv1d(in_channels, out_channels, 4, stride=2, padding=1)
|
26
|
-
|
27
|
-
def forward(self, x):
|
28
|
-
return self.pool(x)
|
29
|
-
|
30
|
-
|
31
|
-
class Upsample1D(Model):
|
32
|
-
def __init__(
|
33
|
-
self,
|
34
|
-
in_channels: int,
|
35
|
-
out_channels: int,
|
36
|
-
activation=nn.ReLU(inplace=True),
|
37
|
-
):
|
38
|
-
super().__init__()
|
39
|
-
self.up = nn.Sequential(
|
40
|
-
nn.ConvTranspose1d(
|
41
|
-
in_channels, out_channels, kernel_size=4, stride=2, padding=1
|
42
|
-
),
|
43
|
-
nn.BatchNorm1d(out_channels),
|
44
|
-
activation,
|
45
|
-
)
|
46
|
-
|
47
|
-
def forward(self, x):
|
48
|
-
return self.up(x)
|
49
|
-
|
50
|
-
|
51
|
-
class DiffusionUNet(Model):
|
52
|
-
def __init__(self, in_channels=1, base_channels=64, out_channels=1, depth=4):
|
53
|
-
super().__init__()
|
54
|
-
|
55
|
-
self.depth = depth
|
56
|
-
self.encoder_blocks = nn.ModuleList()
|
57
|
-
self.downsamples = nn.ModuleList()
|
58
|
-
self.upsamples = nn.ModuleList()
|
59
|
-
self.decoder_blocks = nn.ModuleList()
|
60
|
-
# Keep track of channel sizes per layer for skip connections
|
61
|
-
self.channels = [in_channels] # starting input channel
|
62
|
-
for i in range(depth):
|
63
|
-
enc_in = self.channels[-1]
|
64
|
-
enc_out = base_channels * (2**i)
|
65
|
-
# Encoder block and downsample
|
66
|
-
self.encoder_blocks.append(ResBlock1D(enc_in, enc_out))
|
67
|
-
self.downsamples.append(
|
68
|
-
Downsample1D(enc_out, enc_out)
|
69
|
-
) # halve time, keep channels
|
70
|
-
self.channels.append(enc_out)
|
71
|
-
# Bottleneck
|
72
|
-
bottleneck_ch = self.channels[-1]
|
73
|
-
self.bottleneck = ResBlock1D(bottleneck_ch, bottleneck_ch)
|
74
|
-
# Decoder blocks (reverse channel flow)
|
75
|
-
for i in reversed(range(depth)):
|
76
|
-
skip_ch = self.channels[i + 1] # from encoder
|
77
|
-
dec_out = self.channels[i] # match earlier stage's output
|
78
|
-
self.upsamples.append(Upsample1D(skip_ch, skip_ch))
|
79
|
-
self.decoder_blocks.append(ResBlock1D(skip_ch * 2, dec_out))
|
80
|
-
# Final output projection (out_channels)
|
81
|
-
self.final = nn.Conv1d(in_channels, out_channels, kernel_size=1)
|
82
|
-
|
83
|
-
def forward(self, x: Tensor):
|
84
|
-
skips = []
|
85
|
-
|
86
|
-
# Encoder
|
87
|
-
for enc, down in zip(self.encoder_blocks, self.downsamples):
|
88
|
-
# log_tensor(x, "before enc")
|
89
|
-
x = enc(x)
|
90
|
-
skips.append(x)
|
91
|
-
x = down(x)
|
92
|
-
|
93
|
-
# Bottleneck
|
94
|
-
x = self.bottleneck(x)
|
95
|
-
|
96
|
-
# Decoder
|
97
|
-
for up, dec, skip in zip(self.upsamples, self.decoder_blocks, reversed(skips)):
|
98
|
-
x = up(x)
|
99
|
-
|
100
|
-
# Match lengths via trimming or padding
|
101
|
-
if x.shape[-1] > skip.shape[-1]:
|
102
|
-
x = x[..., : skip.shape[-1]]
|
103
|
-
elif x.shape[-1] < skip.shape[-1]:
|
104
|
-
diff = skip.shape[-1] - x.shape[-1]
|
105
|
-
x = F.pad(x, (0, diff))
|
106
|
-
|
107
|
-
x = torch.cat([x, skip], dim=1) # concat on channels
|
108
|
-
x = dec(x)
|
109
|
-
|
110
|
-
# Final 1x1 conv
|
111
|
-
return self.final(x)
|
112
|
-
|
113
|
-
|
114
|
-
class UNetConvBlock1D(Model):
|
115
|
-
def __init__(self, in_channels: int, out_channels: int, down: bool = True):
|
116
|
-
super().__init__()
|
117
|
-
self.down = down
|
118
|
-
self.conv = nn.Sequential(
|
119
|
-
nn.Conv1d(
|
120
|
-
in_channels,
|
121
|
-
out_channels,
|
122
|
-
kernel_size=3,
|
123
|
-
stride=2 if down else 1,
|
124
|
-
padding=1,
|
125
|
-
),
|
126
|
-
nn.BatchNorm1d(out_channels),
|
127
|
-
nn.LeakyReLU(0.2),
|
128
|
-
nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
|
129
|
-
nn.BatchNorm1d(out_channels),
|
130
|
-
nn.LeakyReLU(0.2),
|
131
|
-
)
|
132
|
-
self.downsample = (
|
133
|
-
nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=2 if down else 1)
|
134
|
-
if in_channels != out_channels
|
135
|
-
else nn.Identity()
|
136
|
-
)
|
137
|
-
|
138
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
139
|
-
# x: [B, C, T]
|
140
|
-
residual = self.downsample(x)
|
141
|
-
return self.conv(x) + residual
|
142
|
-
|
143
|
-
|
144
|
-
class UNetUpBlock1D(Model):
|
145
|
-
def __init__(self, in_channels: int, out_channels: int):
|
146
|
-
super().__init__()
|
147
|
-
self.conv = nn.Sequential(
|
148
|
-
nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
|
149
|
-
nn.BatchNorm1d(out_channels),
|
150
|
-
nn.LeakyReLU(0.2),
|
151
|
-
nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
|
152
|
-
nn.BatchNorm1d(out_channels),
|
153
|
-
nn.LeakyReLU(0.2),
|
154
|
-
)
|
155
|
-
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
|
156
|
-
|
157
|
-
def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
|
158
|
-
x = self.upsample(x)
|
159
|
-
x = torch.cat([x, skip], dim=1) # skip connection
|
160
|
-
return self.conv(x)
|
161
|
-
|
162
|
-
|
163
|
-
class NoisePredictor1D(Model):
|
164
|
-
def __init__(self, in_channels: int, cond_dim: int = 0, hidden: int = 128):
|
165
|
-
"""
|
166
|
-
Args:
|
167
|
-
in_channels: channels of the noisy input [B, C, T]
|
168
|
-
cond_dim: optional condition vector [B, cond_dim]
|
169
|
-
"""
|
170
|
-
super().__init__()
|
171
|
-
self.proj = nn.Linear(cond_dim, hidden) if cond_dim > 0 else None
|
172
|
-
self.net = nn.Sequential(
|
173
|
-
nn.Conv1d(in_channels, hidden, kernel_size=3, padding=1),
|
174
|
-
nn.SiLU(),
|
175
|
-
nn.Conv1d(hidden, in_channels, kernel_size=3, padding=1),
|
176
|
-
)
|
177
|
-
|
178
|
-
def forward(self, x: torch.Tensor, cond: Optional[torch.Tensor] = None):
|
179
|
-
# x: [B, C, T], cond: [B, cond_dim]
|
180
|
-
if cond is not None:
|
181
|
-
cond_proj = self.proj(cond).unsqueeze(-1) # [B, hidden, 1]
|
182
|
-
x = x + cond_proj # simple conditioning
|
183
|
-
return self.net(x) # [B, C, T]
|
184
|
-
|
185
|
-
|
lt_tensor/model_zoo/rsd.py
DELETED
@@ -1,237 +0,0 @@
|
|
1
|
-
__all__ = [
|
2
|
-
"spectral_norm_select",
|
3
|
-
"ResBlock1D_BT",
|
4
|
-
"ResBlock1D",
|
5
|
-
"ResBlock2D",
|
6
|
-
"ResBlocks",
|
7
|
-
]
|
8
|
-
from lt_utils.common import *
|
9
|
-
from ..torch_commons import *
|
10
|
-
from ..model_base import Model
|
11
|
-
import math
|
12
|
-
from ..misc_utils import log_tensor
|
13
|
-
|
14
|
-
|
15
|
-
def spectral_norm_select(module: nn.Module, enabled: bool):
|
16
|
-
if enabled:
|
17
|
-
return spectral_norm(module)
|
18
|
-
return module
|
19
|
-
|
20
|
-
|
21
|
-
class ResBlock1D_BT(Model):
|
22
|
-
def __init__(
|
23
|
-
self,
|
24
|
-
in_channels: int,
|
25
|
-
out_channels: int,
|
26
|
-
kernel_size: int = 3,
|
27
|
-
dilation: Union[Sequence[int], int] = (1, 3, 5),
|
28
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
29
|
-
num_groups: int = 1,
|
30
|
-
batched: bool = True,
|
31
|
-
):
|
32
|
-
super().__init__()
|
33
|
-
self.conv = nn.ModuleList()
|
34
|
-
if isinstance(dilation, int):
|
35
|
-
dilation = [dilation]
|
36
|
-
|
37
|
-
if batched:
|
38
|
-
layernorm_fn = lambda x: nn.GroupNorm(num_groups=num_groups, num_channels=x)
|
39
|
-
else:
|
40
|
-
layernorm_fn = lambda x: nn.LayerNorm(normalized_shape=x)
|
41
|
-
for i, dil in enumerate(dilation):
|
42
|
-
|
43
|
-
self.conv.append(
|
44
|
-
nn.ModuleDict(
|
45
|
-
dict(
|
46
|
-
net=nn.Sequential(
|
47
|
-
self._get_conv_layer(
|
48
|
-
in_channels, in_channels, kernel_size, dil
|
49
|
-
),
|
50
|
-
activation,
|
51
|
-
self._get_conv_layer(
|
52
|
-
in_channels, in_channels, kernel_size, 1, True
|
53
|
-
),
|
54
|
-
activation,
|
55
|
-
),
|
56
|
-
l_norm=layernorm_fn(in_channels),
|
57
|
-
)
|
58
|
-
)
|
59
|
-
)
|
60
|
-
self.final = nn.Sequential(
|
61
|
-
self._get_conv_layer(in_channels, out_channels, kernel_size, 1, True),
|
62
|
-
activation,
|
63
|
-
)
|
64
|
-
self.conv.apply(self.init_weights)
|
65
|
-
|
66
|
-
def _get_conv_layer(
|
67
|
-
self,
|
68
|
-
channels_in: int,
|
69
|
-
channels_out: int,
|
70
|
-
kernel_size: int,
|
71
|
-
dilation: int,
|
72
|
-
pad_gate: bool = False,
|
73
|
-
):
|
74
|
-
return weight_norm(
|
75
|
-
nn.Conv1d(
|
76
|
-
in_channels=channels_in,
|
77
|
-
out_channels=channels_out,
|
78
|
-
kernel_size=kernel_size,
|
79
|
-
stride=1,
|
80
|
-
dilation=dilation,
|
81
|
-
padding=(
|
82
|
-
int((kernel_size * dilation - dilation) / 2)
|
83
|
-
if not pad_gate
|
84
|
-
else int((kernel_size * 1 - 1) / 2)
|
85
|
-
),
|
86
|
-
)
|
87
|
-
)
|
88
|
-
|
89
|
-
def forward(self, x: Tensor):
|
90
|
-
for i, layer in enumerate(self.conv):
|
91
|
-
xt = layer["net"](x)
|
92
|
-
x = xt + x
|
93
|
-
x = layer["l_norm"](x)
|
94
|
-
return self.final(x)
|
95
|
-
|
96
|
-
def remove_weight_norm(self):
|
97
|
-
for module in self.modules():
|
98
|
-
try:
|
99
|
-
remove_weight_norm(module)
|
100
|
-
except ValueError:
|
101
|
-
pass # Not normed, skip
|
102
|
-
|
103
|
-
@staticmethod
|
104
|
-
def init_weights(m, mean=0.0, std=0.01):
|
105
|
-
classname = m.__class__.__name__
|
106
|
-
if "Conv" in classname:
|
107
|
-
m.weight.data.normal_(mean, std)
|
108
|
-
|
109
|
-
|
110
|
-
class ResBlock1D(Model):
|
111
|
-
def __init__(
|
112
|
-
self,
|
113
|
-
channels,
|
114
|
-
kernel_size=3,
|
115
|
-
dilation=(1, 3, 5),
|
116
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
117
|
-
):
|
118
|
-
super(ResBlock1D, self).__init__()
|
119
|
-
self.convs = nn.ModuleList(
|
120
|
-
[
|
121
|
-
self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
|
122
|
-
for i in range(3)
|
123
|
-
]
|
124
|
-
)
|
125
|
-
self.convs.apply(self.init_weights)
|
126
|
-
|
127
|
-
def _get_conv_layer(self, id, ch, k, stride, d, actv):
|
128
|
-
get_padding = lambda ks, d: int((ks * d - d) / 2)
|
129
|
-
return nn.Sequential(
|
130
|
-
actv, # 1
|
131
|
-
weight_norm(
|
132
|
-
nn.Conv1d(
|
133
|
-
ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
|
134
|
-
)
|
135
|
-
), # 2
|
136
|
-
actv, # 3
|
137
|
-
weight_norm(
|
138
|
-
nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
|
139
|
-
), # 4
|
140
|
-
)
|
141
|
-
|
142
|
-
def forward(self, x: torch.Tensor):
|
143
|
-
for cnn in self.convs:
|
144
|
-
x = cnn(x) + x
|
145
|
-
return x
|
146
|
-
|
147
|
-
def remove_weight_norm(self):
|
148
|
-
for module in self.modules():
|
149
|
-
try:
|
150
|
-
remove_weight_norm(module)
|
151
|
-
except ValueError:
|
152
|
-
pass # Not normed, skip
|
153
|
-
|
154
|
-
@staticmethod
|
155
|
-
def init_weights(m, mean=0.0, std=0.01):
|
156
|
-
classname = m.__class__.__name__
|
157
|
-
if "Conv" in classname:
|
158
|
-
m.weight.data.normal_(mean, std)
|
159
|
-
|
160
|
-
|
161
|
-
class ResBlocks(Model):
|
162
|
-
def __init__(
|
163
|
-
self,
|
164
|
-
channels: int,
|
165
|
-
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
|
166
|
-
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
167
|
-
[1, 3, 5],
|
168
|
-
[1, 3, 5],
|
169
|
-
[1, 3, 5],
|
170
|
-
],
|
171
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
172
|
-
):
|
173
|
-
super().__init__()
|
174
|
-
self.num_kernels = len(resblock_kernel_sizes)
|
175
|
-
self.rb = nn.ModuleList()
|
176
|
-
self.activation = activation
|
177
|
-
|
178
|
-
for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
179
|
-
self.rb.append(ResBlock1D(channels, k, j, activation))
|
180
|
-
|
181
|
-
self.rb.apply(self.init_weights)
|
182
|
-
|
183
|
-
def forward(self, x: torch.Tensor):
|
184
|
-
xs = None
|
185
|
-
for i, block in enumerate(self.rb):
|
186
|
-
if i == 0:
|
187
|
-
xs = block(x)
|
188
|
-
else:
|
189
|
-
xs += block(x)
|
190
|
-
x = xs / self.num_kernels
|
191
|
-
return self.activation(x)
|
192
|
-
|
193
|
-
def remove_weight_norm(self):
|
194
|
-
for module in self.modules():
|
195
|
-
try:
|
196
|
-
remove_weight_norm(module)
|
197
|
-
except ValueError:
|
198
|
-
pass # Not normed, skip
|
199
|
-
|
200
|
-
@staticmethod
|
201
|
-
def init_weights(m, mean=0.0, std=0.01):
|
202
|
-
classname = m.__class__.__name__
|
203
|
-
if "Conv" in classname:
|
204
|
-
m.weight.data.normal_(mean, std)
|
205
|
-
|
206
|
-
|
207
|
-
class ResBlock2D(Model):
|
208
|
-
def __init__(
|
209
|
-
self,
|
210
|
-
in_channels,
|
211
|
-
out_channels,
|
212
|
-
downsample=False,
|
213
|
-
spec_norm: bool = False,
|
214
|
-
):
|
215
|
-
super().__init__()
|
216
|
-
stride = 2 if downsample else 1
|
217
|
-
|
218
|
-
self.block = nn.Sequential(
|
219
|
-
spectral_norm_select(
|
220
|
-
nn.Conv2d(in_channels, out_channels, 3, stride, 1), spec_norm
|
221
|
-
),
|
222
|
-
nn.LeakyReLU(0.2),
|
223
|
-
spectral_norm_select(
|
224
|
-
nn.Conv2d(out_channels, out_channels, 3, 1, 1), spec_norm
|
225
|
-
),
|
226
|
-
)
|
227
|
-
|
228
|
-
self.skip = nn.Identity()
|
229
|
-
if downsample or in_channels != out_channels:
|
230
|
-
self.skip = spectral_norm_select(
|
231
|
-
nn.Conv2d(in_channels, out_channels, 1, stride), spec_norm
|
232
|
-
)
|
233
|
-
# on less to be handled every cicle
|
234
|
-
self.sqrt_2 = math.sqrt(2)
|
235
|
-
|
236
|
-
def forward(self, x):
|
237
|
-
return (self.block(x) + self.skip(x)) / self.sqrt_2
|
@@ -1,28 +0,0 @@
|
|
1
|
-
lt_tensor/__init__.py,sha256=uwJ7uiO18VYj8Z1V4KSOQ3ZrnowSgJWKCIiFBrzLMOI,429
|
2
|
-
lt_tensor/losses.py,sha256=1wrke1e68hUBNAoPdJgKni0pJvXKcieza_R8nwBzMW4,4937
|
3
|
-
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
4
|
-
lt_tensor/math_ops.py,sha256=ewIYkvxIy_Lab_9ExjFUgLs-oYLOu8IRRDo7f1pn3i8,2248
|
5
|
-
lt_tensor/misc_utils.py,sha256=8LqtpmLKqCo79NdH160ByQojG8YTDcw8aHKFgOFGVLI,25425
|
6
|
-
lt_tensor/model_base.py,sha256=a2ogixC2fUyOLqz15TzCRcGXvBam--TdmpG83jw9Of8,21543
|
7
|
-
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
8
|
-
lt_tensor/noise_tools.py,sha256=rfFbPsrsycWVuH9G4zZCQC9Vgi9r8hDaECcB0TZYSYQ,11345
|
9
|
-
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
10
|
-
lt_tensor/transform.py,sha256=LZZ9G7ud1cojERC7N7hMAbH9GC3ImY1hBIY00kVMs-I,13492
|
11
|
-
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
lt_tensor/datasets/audio.py,sha256=YREyRsCvy-KS5tE0JNMWEdlIJogE1khLqhiq4wOWXVg,3777
|
13
|
-
lt_tensor/model_zoo/__init__.py,sha256=jipEk50_DTMQbGg8FnDDukxmh7Bcwvl_QVRS3rkb7aY,283
|
14
|
-
lt_tensor/model_zoo/bsc.py,sha256=OQqsQDRBf6gWqoeGeEuIaTh96AqcDyTIbO8MAMNTtI4,7045
|
15
|
-
lt_tensor/model_zoo/disc.py,sha256=9RxyHYH2nGhxLs_yoEFVgerBfH4-qdaL2Mu9akyG0_M,5841
|
16
|
-
lt_tensor/model_zoo/fsn.py,sha256=5ySsg2OHjvTV_coPAdZQ0f7bz4ugJB8mDYsItmd61qA,2102
|
17
|
-
lt_tensor/model_zoo/gns.py,sha256=Tirr_grONp_FFQ_L7K-zV2lvkaC39h8mMl4QDpx9vLQ,6028
|
18
|
-
lt_tensor/model_zoo/istft.py,sha256=RV7KVY7q4CYzzsWXH4NGJQwSqrYWwHh-16Q62lKoA2k,3594
|
19
|
-
lt_tensor/model_zoo/pos.py,sha256=N28v-rF8CELouYxQ9r45Jbd4ri5DNydwDgg7nzmQ4Ig,4471
|
20
|
-
lt_tensor/model_zoo/rsd.py,sha256=5bba50g1Hm5kMexuJ4SwOIJuyQ1qJd8Acrq-Ax6CqE8,6958
|
21
|
-
lt_tensor/model_zoo/tfrms.py,sha256=kauh-A13pk08SZ5OspEE5a-gPKD4rZr6tqMKWu3KGhk,4237
|
22
|
-
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
23
|
-
lt_tensor/processors/audio.py,sha256=2Sta_KytTqGZh-ZeHpcCbqP6O8VT6QQVkx-7szA3Itc,8830
|
24
|
-
lt_tensor-0.0.1a11.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
25
|
-
lt_tensor-0.0.1a11.dist-info/METADATA,sha256=DNs5JZfr_mjve_GHy13Auics3BI_f1pNYBth-dQW04M,966
|
26
|
-
lt_tensor-0.0.1a11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
27
|
-
lt_tensor-0.0.1a11.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
28
|
-
lt_tensor-0.0.1a11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|