lt-tensor 0.0.1a13__py3-none-any.whl → 0.0.1a15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/datasets/audio.py +23 -6
- lt_tensor/misc_utils.py +1 -1
- lt_tensor/model_base.py +163 -123
- lt_tensor/model_zoo/diffwave/__init__.py +0 -0
- lt_tensor/model_zoo/diffwave/model.py +200 -0
- lt_tensor/model_zoo/diffwave/params.py +58 -0
- lt_tensor/model_zoo/discriminator.py +269 -151
- lt_tensor/model_zoo/features.py +102 -11
- lt_tensor/model_zoo/istft/generator.py +10 -66
- lt_tensor/model_zoo/istft/trainer.py +224 -72
- lt_tensor/model_zoo/residual.py +136 -32
- lt_tensor/processors/audio.py +5 -16
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/METADATA +2 -2
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/RECORD +17 -14
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/top_level.txt +0 -0
lt_tensor/model_zoo/residual.py
CHANGED
@@ -5,13 +5,16 @@ __all__ = [
|
|
5
5
|
"ResBlock2D",
|
6
6
|
"ResBlock1DShuffled",
|
7
7
|
"AdaResBlock1D",
|
8
|
+
"ResBlocks1D",
|
9
|
+
"ResBlock1D2",
|
10
|
+
"ShuffleBlock2D",
|
8
11
|
]
|
9
12
|
import math
|
10
13
|
from lt_utils.common import *
|
14
|
+
import torch.nn.functional as F
|
11
15
|
from lt_tensor.torch_commons import *
|
12
16
|
from lt_tensor.model_base import Model
|
13
17
|
from lt_tensor.misc_utils import log_tensor
|
14
|
-
import torch.nn.functional as F
|
15
18
|
from lt_tensor.model_zoo.fusion import AdaFusion1D, AdaIN1D
|
16
19
|
|
17
20
|
|
@@ -44,6 +47,10 @@ class ConvNets(Model):
|
|
44
47
|
m.weight.data.normal_(mean, std)
|
45
48
|
|
46
49
|
|
50
|
+
def get_padding(ks, d):
|
51
|
+
return int((ks * d - d) / 2)
|
52
|
+
|
53
|
+
|
47
54
|
class ResBlock1D(ConvNets):
|
48
55
|
def __init__(
|
49
56
|
self,
|
@@ -57,14 +64,13 @@ class ResBlock1D(ConvNets):
|
|
57
64
|
self.conv_nets = nn.ModuleList(
|
58
65
|
[
|
59
66
|
self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
|
60
|
-
for i in range(
|
67
|
+
for i in range(len(dilation))
|
61
68
|
]
|
62
69
|
)
|
63
70
|
self.conv_nets.apply(self.init_weights)
|
64
71
|
self.last_index = len(self.conv_nets) - 1
|
65
72
|
|
66
73
|
def _get_conv_layer(self, id, ch, k, stride, d, actv):
|
67
|
-
get_padding = lambda ks, d: int((ks * d - d) / 2)
|
68
74
|
return nn.Sequential(
|
69
75
|
actv, # 1
|
70
76
|
weight_norm(
|
@@ -91,16 +97,11 @@ class ResBlock1DShuffled(ConvNets):
|
|
91
97
|
kernel_size=3,
|
92
98
|
dilation=(1, 3, 5),
|
93
99
|
activation: nn.Module = nn.LeakyReLU(0.1),
|
94
|
-
add_channel_shuffle: bool = False, # requires pytorch 2.7.0 +
|
95
100
|
channel_shuffle_groups=1,
|
96
101
|
):
|
97
102
|
super().__init__()
|
98
103
|
|
99
|
-
self.channel_shuffle = (
|
100
|
-
nn.ChannelShuffle(channel_shuffle_groups)
|
101
|
-
if add_channel_shuffle
|
102
|
-
else nn.Identity()
|
103
|
-
)
|
104
|
+
self.channel_shuffle = nn.ChannelShuffle(channel_shuffle_groups)
|
104
105
|
|
105
106
|
self.conv_nets = nn.ModuleList(
|
106
107
|
[
|
@@ -136,29 +137,67 @@ class ResBlock1DShuffled(ConvNets):
|
|
136
137
|
class ResBlock2D(Model):
|
137
138
|
def __init__(
|
138
139
|
self,
|
139
|
-
in_channels,
|
140
|
-
out_channels,
|
141
|
-
|
140
|
+
in_channels: int,
|
141
|
+
out_channels: Optional[int] = None,
|
142
|
+
hidden_dim: int = 32,
|
143
|
+
downscale: bool = False,
|
144
|
+
activation: nn.Module = nn.LeakyReLU(0.2),
|
142
145
|
):
|
143
146
|
super().__init__()
|
144
|
-
stride = 2 if
|
147
|
+
stride = 2 if downscale else 1
|
148
|
+
if out_channels is None:
|
149
|
+
out_channels = in_channels
|
145
150
|
|
146
151
|
self.block = nn.Sequential(
|
147
|
-
nn.Conv2d(in_channels,
|
148
|
-
|
149
|
-
nn.Conv2d(
|
152
|
+
nn.Conv2d(in_channels, hidden_dim, 3, stride, 1),
|
153
|
+
activation,
|
154
|
+
nn.Conv2d(hidden_dim, hidden_dim, 7, 1, 3),
|
155
|
+
activation,
|
156
|
+
nn.Conv2d(hidden_dim, out_channels, 3, 1, 1),
|
150
157
|
)
|
151
158
|
|
152
159
|
self.skip = nn.Identity()
|
153
|
-
if
|
160
|
+
if downscale or in_channels != out_channels:
|
154
161
|
self.skip = spectral_norm_select(
|
155
162
|
nn.Conv2d(in_channels, out_channels, 1, stride)
|
156
163
|
)
|
157
|
-
# on less to be handled every
|
164
|
+
# on less to be handled every cycle
|
158
165
|
self.sqrt_2 = math.sqrt(2)
|
159
166
|
|
160
167
|
def forward(self, x: Tensor):
|
161
|
-
return (self.block(x) + self.skip(x)) / self.sqrt_2
|
168
|
+
return x + ((self.block(x) + self.skip(x)) / self.sqrt_2)
|
169
|
+
|
170
|
+
|
171
|
+
class ShuffleBlock2D(ConvNets):
|
172
|
+
def __init__(
|
173
|
+
self,
|
174
|
+
channels: int,
|
175
|
+
out_channels: Optional[int] = None,
|
176
|
+
hidden_dim: int = 32,
|
177
|
+
downscale: bool = False,
|
178
|
+
activation: nn.Module = nn.LeakyReLU(0.1),
|
179
|
+
):
|
180
|
+
super().__init__()
|
181
|
+
if out_channels is None:
|
182
|
+
out_channels = channels
|
183
|
+
self.shuffle = nn.ChannelShuffle(groups=2)
|
184
|
+
self.ch_split = lambda tensor: torch.split(tensor, 1, dim=1)
|
185
|
+
self.activation = activation
|
186
|
+
self.resblock_2d = ResBlock2D(
|
187
|
+
channels, out_channels, hidden_dim, downscale, activation
|
188
|
+
)
|
189
|
+
|
190
|
+
def shuffle_channels(self, tensor: torch.Tensor):
|
191
|
+
with torch.no_grad():
|
192
|
+
x = F.channel_shuffle(tensor.transpose(1, -1), tensor.shape[1]).transpose(
|
193
|
+
-1, 1
|
194
|
+
)
|
195
|
+
return self.ch_split(x)
|
196
|
+
|
197
|
+
def forward(self, x: torch.Tensor):
|
198
|
+
ch1, ch2 = self.shuffle_channels(x)
|
199
|
+
ch2 = self.resblock_2d(ch2)
|
200
|
+
return torch.cat((ch1, ch2), dim=1)
|
162
201
|
|
163
202
|
|
164
203
|
class AdaResBlock1D(ConvNets):
|
@@ -172,46 +211,111 @@ class AdaResBlock1D(ConvNets):
|
|
172
211
|
):
|
173
212
|
super().__init__()
|
174
213
|
|
214
|
+
self.alpha1 = nn.ModuleList()
|
215
|
+
self.alpha2 = nn.ModuleList()
|
175
216
|
self.conv_nets = nn.ModuleList(
|
176
217
|
[
|
177
218
|
self._get_conv_layer(
|
178
|
-
|
219
|
+
d,
|
179
220
|
res_block_channels,
|
180
221
|
ada_channel_in,
|
181
222
|
kernel_size,
|
182
|
-
1,
|
183
|
-
dilation,
|
184
223
|
)
|
185
|
-
for
|
224
|
+
for d in dilation
|
186
225
|
]
|
187
226
|
)
|
188
227
|
self.conv_nets.apply(self.init_weights)
|
189
228
|
self.last_index = len(self.conv_nets) - 1
|
190
229
|
self.activation = activation
|
191
230
|
|
192
|
-
def _get_conv_layer(self,
|
193
|
-
|
231
|
+
def _get_conv_layer(self, d, ch, ada_ch, k):
|
232
|
+
self.alpha1.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
|
233
|
+
self.alpha2.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
|
194
234
|
return nn.ModuleDict(
|
195
235
|
dict(
|
196
236
|
norm1=AdaFusion1D(ada_ch, ch),
|
197
237
|
norm2=AdaFusion1D(ada_ch, ch),
|
198
|
-
alpha1=nn.Parameter(torch.ones(1, ada_ch, 1)),
|
199
|
-
alpha2=nn.Parameter(torch.ones(1, ada_ch, 1)),
|
200
238
|
conv1=weight_norm(
|
201
239
|
nn.Conv1d(
|
202
|
-
ch, ch, k,
|
240
|
+
ch, ch, k, 1, dilation=d, padding=get_padding(k, d)
|
203
241
|
)
|
204
242
|
), # 2
|
205
243
|
conv2=weight_norm(
|
206
|
-
nn.Conv1d(ch, ch, k,
|
244
|
+
nn.Conv1d(ch, ch, k, 1, dilation=1, padding=get_padding(k, 1))
|
207
245
|
), # 4
|
208
246
|
)
|
209
247
|
)
|
210
248
|
|
211
249
|
def forward(self, x: torch.Tensor, y: torch.Tensor):
|
212
|
-
for cnn in self.conv_nets:
|
213
|
-
xt = self.activation(cnn["norm1"](x, y,
|
250
|
+
for i, cnn in enumerate(self.conv_nets):
|
251
|
+
xt = self.activation(cnn["norm1"](x, y, self.alpha1[i]))
|
214
252
|
xt = cnn["conv1"](xt)
|
215
|
-
xt = self.activation(cnn["norm2"](xt, y,
|
253
|
+
xt = self.activation(cnn["norm2"](xt, y, self.alpha2[i]))
|
216
254
|
x = cnn["conv2"](xt) + x
|
217
255
|
return x
|
256
|
+
|
257
|
+
|
258
|
+
class ResBlock1D2(ConvNets):
|
259
|
+
def __init__(
|
260
|
+
self,
|
261
|
+
channels,
|
262
|
+
kernel_size=3,
|
263
|
+
dilation=(1, 3, 5),
|
264
|
+
activation: nn.Module = nn.LeakyReLU(0.1),
|
265
|
+
):
|
266
|
+
super().__init__()
|
267
|
+
self.convs = nn.ModuleList(
|
268
|
+
[
|
269
|
+
weight_norm(
|
270
|
+
nn.Conv1d(
|
271
|
+
channels,
|
272
|
+
channels,
|
273
|
+
kernel_size,
|
274
|
+
dilation=d,
|
275
|
+
padding=get_padding(kernel_size, d),
|
276
|
+
)
|
277
|
+
)
|
278
|
+
for d in range(dilation)
|
279
|
+
]
|
280
|
+
)
|
281
|
+
self.convs.apply(self.init_weights)
|
282
|
+
self.activation = activation
|
283
|
+
|
284
|
+
def forward(self, x):
|
285
|
+
for c in self.convs:
|
286
|
+
xt = c(self.activation(x))
|
287
|
+
x = xt + x
|
288
|
+
return x
|
289
|
+
|
290
|
+
|
291
|
+
class ResBlocks1D(ConvNets):
|
292
|
+
def __init__(
|
293
|
+
self,
|
294
|
+
channels: int,
|
295
|
+
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
|
296
|
+
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
297
|
+
[1, 3, 5],
|
298
|
+
[1, 3, 5],
|
299
|
+
[1, 3, 5],
|
300
|
+
],
|
301
|
+
activation: nn.Module = nn.LeakyReLU(0.1),
|
302
|
+
block: Union[ResBlock1D, ResBlock1D2] = ResBlock1D,
|
303
|
+
):
|
304
|
+
super().__init__()
|
305
|
+
self.num_kernels = len(resblock_kernel_sizes)
|
306
|
+
self.rb = nn.ModuleList()
|
307
|
+
self.activation = activation
|
308
|
+
|
309
|
+
for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
310
|
+
self.rb.append(block(channels, k, j, activation))
|
311
|
+
|
312
|
+
self.rb.apply(self.init_weights)
|
313
|
+
|
314
|
+
def forward(self, x: torch.Tensor):
|
315
|
+
xs = None
|
316
|
+
for i, block in enumerate(self.rb):
|
317
|
+
if i == 0:
|
318
|
+
xs = block(x)
|
319
|
+
else:
|
320
|
+
xs += block(x)
|
321
|
+
return xs / self.num_kernels
|
lt_tensor/processors/audio.py
CHANGED
@@ -106,20 +106,13 @@ class AudioProcessor(Model):
|
|
106
106
|
return tensor.detach().to(DEFAULT_DEVICE).numpy(force=True)
|
107
107
|
|
108
108
|
def compute_rms(
|
109
|
-
self,
|
109
|
+
self,
|
110
|
+
audio: Union[Tensor, np.ndarray],
|
111
|
+
mel: Optional[Tensor] = None,
|
110
112
|
):
|
111
113
|
default_dtype = audio.dtype
|
112
114
|
default_device = audio.device
|
113
|
-
|
114
|
-
f"Audio should have 1D for unbatched and 2D for batched"
|
115
|
-
", received instead a: {audio.ndim}D"
|
116
|
-
)
|
117
|
-
if mel is not None:
|
118
|
-
assert mel.ndim in [2, 3], (
|
119
|
-
"Mel spectogram should have 2D dim for non-batched or 3D dim for both non-batched or batched"
|
120
|
-
f". Received instead {mel.ndim}D."
|
121
|
-
)
|
122
|
-
if audio.ndim == 2:
|
115
|
+
if audio.ndim > 1:
|
123
116
|
B = audio.shape[0]
|
124
117
|
else:
|
125
118
|
B = 1
|
@@ -163,11 +156,7 @@ class AudioProcessor(Model):
|
|
163
156
|
):
|
164
157
|
default_dtype = audio.dtype
|
165
158
|
default_device = audio.device
|
166
|
-
|
167
|
-
f"Audio should have 1D for unbatched and 2D for batched"
|
168
|
-
", received instead a: {audio.ndim}D"
|
169
|
-
)
|
170
|
-
if audio.ndim == 2:
|
159
|
+
if audio.ndim > 1:
|
171
160
|
B = audio.shape[0]
|
172
161
|
else:
|
173
162
|
B = 1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lt-tensor
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.1a15
|
4
4
|
Summary: General utilities for PyTorch and others. Built for general use.
|
5
5
|
Home-page: https://github.com/gr1336/lt-tensor/
|
6
6
|
Author: gr1336
|
@@ -17,7 +17,7 @@ Requires-Dist: numpy>=1.26.4
|
|
17
17
|
Requires-Dist: tokenizers
|
18
18
|
Requires-Dist: pyyaml>=6.0.0
|
19
19
|
Requires-Dist: numba>0.60.0
|
20
|
-
Requires-Dist: lt-utils
|
20
|
+
Requires-Dist: lt-utils==0.0.2a2
|
21
21
|
Requires-Dist: librosa==0.11.*
|
22
22
|
Requires-Dist: einops
|
23
23
|
Requires-Dist: plotly
|
@@ -3,30 +3,33 @@ lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss
|
|
3
3
|
lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
|
4
4
|
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
5
5
|
lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
|
6
|
-
lt_tensor/misc_utils.py,sha256=
|
7
|
-
lt_tensor/model_base.py,sha256=
|
6
|
+
lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
|
7
|
+
lt_tensor/model_base.py,sha256=qqqIVpYz6nv01MnZuuAj1dxq4_NN-zSivP1GaegA9TI,21597
|
8
8
|
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
9
9
|
lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
|
10
10
|
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
11
11
|
lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
|
12
12
|
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
lt_tensor/datasets/audio.py,sha256=
|
13
|
+
lt_tensor/datasets/audio.py,sha256=5Wvz1BJ7xXkLYpVLLw9RY3X3RgMdPPeGiN0-MmJDQy0,8045
|
14
14
|
lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
|
15
15
|
lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
|
16
|
-
lt_tensor/model_zoo/discriminator.py,sha256=
|
17
|
-
lt_tensor/model_zoo/features.py,sha256=
|
16
|
+
lt_tensor/model_zoo/discriminator.py,sha256=_HrgseU3KO_6ONNjISxkp6-9pRseVZr43x8NYxIq1Xg,9989
|
17
|
+
lt_tensor/model_zoo/features.py,sha256=DO8dlE0kmPKTNC1Xkv9wKegOOYkQa_rkxM4hhcNwJWA,15655
|
18
18
|
lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
|
19
19
|
lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
20
|
-
lt_tensor/model_zoo/residual.py,sha256=
|
20
|
+
lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
|
21
21
|
lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
|
22
|
+
lt_tensor/model_zoo/diffwave/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
lt_tensor/model_zoo/diffwave/model.py,sha256=RwrJd7ZZ2uQdLid_m8-wbwEJ7l2gqukq2MSjbquN_Pg,6832
|
24
|
+
lt_tensor/model_zoo/diffwave/params.py,sha256=91aaBWNfWU-q3POS3TbNgdmhw5RAayoLudVNblM8ixU,1719
|
22
25
|
lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
|
23
|
-
lt_tensor/model_zoo/istft/generator.py,sha256=
|
26
|
+
lt_tensor/model_zoo/istft/generator.py,sha256=R5Wym4Bocx1T5ijyETQe1thx4uY9ulMwcHqgsGG3h-0,3364
|
24
27
|
lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
|
25
|
-
lt_tensor/model_zoo/istft/trainer.py,sha256=
|
28
|
+
lt_tensor/model_zoo/istft/trainer.py,sha256=WAoySxxuyJtMDt2q0kGbaJT19vAduHyxYwBo4TTU_LM,21302
|
26
29
|
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
27
|
-
lt_tensor/processors/audio.py,sha256=
|
28
|
-
lt_tensor-0.0.
|
29
|
-
lt_tensor-0.0.
|
30
|
-
lt_tensor-0.0.
|
31
|
-
lt_tensor-0.0.
|
32
|
-
lt_tensor-0.0.
|
30
|
+
lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
|
31
|
+
lt_tensor-0.0.1a15.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
32
|
+
lt_tensor-0.0.1a15.dist-info/METADATA,sha256=RKfh13pzXJQtBwVMoXyqizQfshD7gFyC1491UCfSFP8,1033
|
33
|
+
lt_tensor-0.0.1a15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
34
|
+
lt_tensor-0.0.1a15.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
35
|
+
lt_tensor-0.0.1a15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|