lt-tensor 0.0.1a13__py3-none-any.whl → 0.0.1a15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,13 +5,16 @@ __all__ = [
5
5
  "ResBlock2D",
6
6
  "ResBlock1DShuffled",
7
7
  "AdaResBlock1D",
8
+ "ResBlocks1D",
9
+ "ResBlock1D2",
10
+ "ShuffleBlock2D",
8
11
  ]
9
12
  import math
10
13
  from lt_utils.common import *
14
+ import torch.nn.functional as F
11
15
  from lt_tensor.torch_commons import *
12
16
  from lt_tensor.model_base import Model
13
17
  from lt_tensor.misc_utils import log_tensor
14
- import torch.nn.functional as F
15
18
  from lt_tensor.model_zoo.fusion import AdaFusion1D, AdaIN1D
16
19
 
17
20
 
@@ -44,6 +47,10 @@ class ConvNets(Model):
44
47
  m.weight.data.normal_(mean, std)
45
48
 
46
49
 
50
+ def get_padding(ks, d):
51
+ return int((ks * d - d) / 2)
52
+
53
+
47
54
  class ResBlock1D(ConvNets):
48
55
  def __init__(
49
56
  self,
@@ -57,14 +64,13 @@ class ResBlock1D(ConvNets):
57
64
  self.conv_nets = nn.ModuleList(
58
65
  [
59
66
  self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
60
- for i in range(3)
67
+ for i in range(len(dilation))
61
68
  ]
62
69
  )
63
70
  self.conv_nets.apply(self.init_weights)
64
71
  self.last_index = len(self.conv_nets) - 1
65
72
 
66
73
  def _get_conv_layer(self, id, ch, k, stride, d, actv):
67
- get_padding = lambda ks, d: int((ks * d - d) / 2)
68
74
  return nn.Sequential(
69
75
  actv, # 1
70
76
  weight_norm(
@@ -91,16 +97,11 @@ class ResBlock1DShuffled(ConvNets):
91
97
  kernel_size=3,
92
98
  dilation=(1, 3, 5),
93
99
  activation: nn.Module = nn.LeakyReLU(0.1),
94
- add_channel_shuffle: bool = False, # requires pytorch 2.7.0 +
95
100
  channel_shuffle_groups=1,
96
101
  ):
97
102
  super().__init__()
98
103
 
99
- self.channel_shuffle = (
100
- nn.ChannelShuffle(channel_shuffle_groups)
101
- if add_channel_shuffle
102
- else nn.Identity()
103
- )
104
+ self.channel_shuffle = nn.ChannelShuffle(channel_shuffle_groups)
104
105
 
105
106
  self.conv_nets = nn.ModuleList(
106
107
  [
@@ -136,29 +137,67 @@ class ResBlock1DShuffled(ConvNets):
136
137
  class ResBlock2D(Model):
137
138
  def __init__(
138
139
  self,
139
- in_channels,
140
- out_channels,
141
- downsample=False,
140
+ in_channels: int,
141
+ out_channels: Optional[int] = None,
142
+ hidden_dim: int = 32,
143
+ downscale: bool = False,
144
+ activation: nn.Module = nn.LeakyReLU(0.2),
142
145
  ):
143
146
  super().__init__()
144
- stride = 2 if downsample else 1
147
+ stride = 2 if downscale else 1
148
+ if out_channels is None:
149
+ out_channels = in_channels
145
150
 
146
151
  self.block = nn.Sequential(
147
- nn.Conv2d(in_channels, out_channels, 3, stride, 1),
148
- nn.LeakyReLU(0.2),
149
- nn.Conv2d(out_channels, out_channels, 3, 1, 1),
152
+ nn.Conv2d(in_channels, hidden_dim, 3, stride, 1),
153
+ activation,
154
+ nn.Conv2d(hidden_dim, hidden_dim, 7, 1, 3),
155
+ activation,
156
+ nn.Conv2d(hidden_dim, out_channels, 3, 1, 1),
150
157
  )
151
158
 
152
159
  self.skip = nn.Identity()
153
- if downsample or in_channels != out_channels:
160
+ if downscale or in_channels != out_channels:
154
161
  self.skip = spectral_norm_select(
155
162
  nn.Conv2d(in_channels, out_channels, 1, stride)
156
163
  )
157
- # on less to be handled every cicle
164
+ # on less to be handled every cycle
158
165
  self.sqrt_2 = math.sqrt(2)
159
166
 
160
167
  def forward(self, x: Tensor):
161
- return (self.block(x) + self.skip(x)) / self.sqrt_2
168
+ return x + ((self.block(x) + self.skip(x)) / self.sqrt_2)
169
+
170
+
171
+ class ShuffleBlock2D(ConvNets):
172
+ def __init__(
173
+ self,
174
+ channels: int,
175
+ out_channels: Optional[int] = None,
176
+ hidden_dim: int = 32,
177
+ downscale: bool = False,
178
+ activation: nn.Module = nn.LeakyReLU(0.1),
179
+ ):
180
+ super().__init__()
181
+ if out_channels is None:
182
+ out_channels = channels
183
+ self.shuffle = nn.ChannelShuffle(groups=2)
184
+ self.ch_split = lambda tensor: torch.split(tensor, 1, dim=1)
185
+ self.activation = activation
186
+ self.resblock_2d = ResBlock2D(
187
+ channels, out_channels, hidden_dim, downscale, activation
188
+ )
189
+
190
+ def shuffle_channels(self, tensor: torch.Tensor):
191
+ with torch.no_grad():
192
+ x = F.channel_shuffle(tensor.transpose(1, -1), tensor.shape[1]).transpose(
193
+ -1, 1
194
+ )
195
+ return self.ch_split(x)
196
+
197
+ def forward(self, x: torch.Tensor):
198
+ ch1, ch2 = self.shuffle_channels(x)
199
+ ch2 = self.resblock_2d(ch2)
200
+ return torch.cat((ch1, ch2), dim=1)
162
201
 
163
202
 
164
203
  class AdaResBlock1D(ConvNets):
@@ -172,46 +211,111 @@ class AdaResBlock1D(ConvNets):
172
211
  ):
173
212
  super().__init__()
174
213
 
214
+ self.alpha1 = nn.ModuleList()
215
+ self.alpha2 = nn.ModuleList()
175
216
  self.conv_nets = nn.ModuleList(
176
217
  [
177
218
  self._get_conv_layer(
178
- i,
219
+ d,
179
220
  res_block_channels,
180
221
  ada_channel_in,
181
222
  kernel_size,
182
- 1,
183
- dilation,
184
223
  )
185
- for i in range(3)
224
+ for d in dilation
186
225
  ]
187
226
  )
188
227
  self.conv_nets.apply(self.init_weights)
189
228
  self.last_index = len(self.conv_nets) - 1
190
229
  self.activation = activation
191
230
 
192
- def _get_conv_layer(self, id, ch, ada_ch, k, stride, d):
193
- get_padding = lambda ks, d: int((ks * d - d) / 2)
231
+ def _get_conv_layer(self, d, ch, ada_ch, k):
232
+ self.alpha1.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
233
+ self.alpha2.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
194
234
  return nn.ModuleDict(
195
235
  dict(
196
236
  norm1=AdaFusion1D(ada_ch, ch),
197
237
  norm2=AdaFusion1D(ada_ch, ch),
198
- alpha1=nn.Parameter(torch.ones(1, ada_ch, 1)),
199
- alpha2=nn.Parameter(torch.ones(1, ada_ch, 1)),
200
238
  conv1=weight_norm(
201
239
  nn.Conv1d(
202
- ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
240
+ ch, ch, k, 1, dilation=d, padding=get_padding(k, d)
203
241
  )
204
242
  ), # 2
205
243
  conv2=weight_norm(
206
- nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
244
+ nn.Conv1d(ch, ch, k, 1, dilation=1, padding=get_padding(k, 1))
207
245
  ), # 4
208
246
  )
209
247
  )
210
248
 
211
249
  def forward(self, x: torch.Tensor, y: torch.Tensor):
212
- for cnn in self.conv_nets:
213
- xt = self.activation(cnn["norm1"](x, y, cnn["alpha1"]))
250
+ for i, cnn in enumerate(self.conv_nets):
251
+ xt = self.activation(cnn["norm1"](x, y, self.alpha1[i]))
214
252
  xt = cnn["conv1"](xt)
215
- xt = self.activation(cnn["norm2"](xt, y, cnn["alpha2"]))
253
+ xt = self.activation(cnn["norm2"](xt, y, self.alpha2[i]))
216
254
  x = cnn["conv2"](xt) + x
217
255
  return x
256
+
257
+
258
+ class ResBlock1D2(ConvNets):
259
+ def __init__(
260
+ self,
261
+ channels,
262
+ kernel_size=3,
263
+ dilation=(1, 3, 5),
264
+ activation: nn.Module = nn.LeakyReLU(0.1),
265
+ ):
266
+ super().__init__()
267
+ self.convs = nn.ModuleList(
268
+ [
269
+ weight_norm(
270
+ nn.Conv1d(
271
+ channels,
272
+ channels,
273
+ kernel_size,
274
+ dilation=d,
275
+ padding=get_padding(kernel_size, d),
276
+ )
277
+ )
278
+ for d in range(dilation)
279
+ ]
280
+ )
281
+ self.convs.apply(self.init_weights)
282
+ self.activation = activation
283
+
284
+ def forward(self, x):
285
+ for c in self.convs:
286
+ xt = c(self.activation(x))
287
+ x = xt + x
288
+ return x
289
+
290
+
291
+ class ResBlocks1D(ConvNets):
292
+ def __init__(
293
+ self,
294
+ channels: int,
295
+ resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
296
+ resblock_dilation_sizes: List[Union[int, List[int]]] = [
297
+ [1, 3, 5],
298
+ [1, 3, 5],
299
+ [1, 3, 5],
300
+ ],
301
+ activation: nn.Module = nn.LeakyReLU(0.1),
302
+ block: Union[ResBlock1D, ResBlock1D2] = ResBlock1D,
303
+ ):
304
+ super().__init__()
305
+ self.num_kernels = len(resblock_kernel_sizes)
306
+ self.rb = nn.ModuleList()
307
+ self.activation = activation
308
+
309
+ for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
310
+ self.rb.append(block(channels, k, j, activation))
311
+
312
+ self.rb.apply(self.init_weights)
313
+
314
+ def forward(self, x: torch.Tensor):
315
+ xs = None
316
+ for i, block in enumerate(self.rb):
317
+ if i == 0:
318
+ xs = block(x)
319
+ else:
320
+ xs += block(x)
321
+ return xs / self.num_kernels
@@ -106,20 +106,13 @@ class AudioProcessor(Model):
106
106
  return tensor.detach().to(DEFAULT_DEVICE).numpy(force=True)
107
107
 
108
108
  def compute_rms(
109
- self, audio: Union[Tensor, np.ndarray], mel: Optional[Tensor] = None
109
+ self,
110
+ audio: Union[Tensor, np.ndarray],
111
+ mel: Optional[Tensor] = None,
110
112
  ):
111
113
  default_dtype = audio.dtype
112
114
  default_device = audio.device
113
- assert audio.ndim in [1, 2], (
114
- f"Audio should have 1D for unbatched and 2D for batched"
115
- ", received instead a: {audio.ndim}D"
116
- )
117
- if mel is not None:
118
- assert mel.ndim in [2, 3], (
119
- "Mel spectogram should have 2D dim for non-batched or 3D dim for both non-batched or batched"
120
- f". Received instead {mel.ndim}D."
121
- )
122
- if audio.ndim == 2:
115
+ if audio.ndim > 1:
123
116
  B = audio.shape[0]
124
117
  else:
125
118
  B = 1
@@ -163,11 +156,7 @@ class AudioProcessor(Model):
163
156
  ):
164
157
  default_dtype = audio.dtype
165
158
  default_device = audio.device
166
- assert audio.ndim in [1, 2], (
167
- f"Audio should have 1D for unbatched and 2D for batched"
168
- ", received instead a: {audio.ndim}D"
169
- )
170
- if audio.ndim == 2:
159
+ if audio.ndim > 1:
171
160
  B = audio.shape[0]
172
161
  else:
173
162
  B = 1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lt-tensor
3
- Version: 0.0.1a13
3
+ Version: 0.0.1a15
4
4
  Summary: General utilities for PyTorch and others. Built for general use.
5
5
  Home-page: https://github.com/gr1336/lt-tensor/
6
6
  Author: gr1336
@@ -17,7 +17,7 @@ Requires-Dist: numpy>=1.26.4
17
17
  Requires-Dist: tokenizers
18
18
  Requires-Dist: pyyaml>=6.0.0
19
19
  Requires-Dist: numba>0.60.0
20
- Requires-Dist: lt-utils>=0.0.2a1
20
+ Requires-Dist: lt-utils==0.0.2a2
21
21
  Requires-Dist: librosa==0.11.*
22
22
  Requires-Dist: einops
23
23
  Requires-Dist: plotly
@@ -3,30 +3,33 @@ lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss
3
3
  lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
4
4
  lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
5
5
  lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
6
- lt_tensor/misc_utils.py,sha256=UNba6UEsAv1oZ60IAaKBNGbhXK2WPxRI9E4QcjP-_w0,28755
7
- lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
6
+ lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
7
+ lt_tensor/model_base.py,sha256=qqqIVpYz6nv01MnZuuAj1dxq4_NN-zSivP1GaegA9TI,21597
8
8
  lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
9
9
  lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
10
10
  lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
11
11
  lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
12
12
  lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- lt_tensor/datasets/audio.py,sha256=j73oRyXt-AK4tWWYWjH-3c5RYouQBgDSCTuWHmyG8kQ,7450
13
+ lt_tensor/datasets/audio.py,sha256=5Wvz1BJ7xXkLYpVLLw9RY3X3RgMdPPeGiN0-MmJDQy0,8045
14
14
  lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
15
15
  lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
16
- lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
17
- lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
16
+ lt_tensor/model_zoo/discriminator.py,sha256=_HrgseU3KO_6ONNjISxkp6-9pRseVZr43x8NYxIq1Xg,9989
17
+ lt_tensor/model_zoo/features.py,sha256=DO8dlE0kmPKTNC1Xkv9wKegOOYkQa_rkxM4hhcNwJWA,15655
18
18
  lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
19
19
  lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
20
- lt_tensor/model_zoo/residual.py,sha256=knVLxzrLUjNQ6vdBESTZOk3r86ldi5PHetoBuJmymcw,6388
20
+ lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
21
21
  lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
22
+ lt_tensor/model_zoo/diffwave/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ lt_tensor/model_zoo/diffwave/model.py,sha256=RwrJd7ZZ2uQdLid_m8-wbwEJ7l2gqukq2MSjbquN_Pg,6832
24
+ lt_tensor/model_zoo/diffwave/params.py,sha256=91aaBWNfWU-q3POS3TbNgdmhw5RAayoLudVNblM8ixU,1719
22
25
  lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
23
- lt_tensor/model_zoo/istft/generator.py,sha256=lotGkMu67fctzwa5FSwX_xtHILOuV95uP-djCz2N3C8,5261
26
+ lt_tensor/model_zoo/istft/generator.py,sha256=R5Wym4Bocx1T5ijyETQe1thx4uY9ulMwcHqgsGG3h-0,3364
24
27
  lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
25
- lt_tensor/model_zoo/istft/trainer.py,sha256=EPuGtvfgR8vCrVc72p5OwVy73nNVlx510VxnH3NeErY,16080
28
+ lt_tensor/model_zoo/istft/trainer.py,sha256=WAoySxxuyJtMDt2q0kGbaJT19vAduHyxYwBo4TTU_LM,21302
26
29
  lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
27
- lt_tensor/processors/audio.py,sha256=uBvMls4u_B1M-pk3xAiOIRnwM2l_3LcdfESNkE0Ch30,15314
28
- lt_tensor-0.0.1a13.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
29
- lt_tensor-0.0.1a13.dist-info/METADATA,sha256=yzNtg91vOGZCoXi6XWpn1kWk7LgVD2mIWQXL-7tw_Uc,1033
30
- lt_tensor-0.0.1a13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- lt_tensor-0.0.1a13.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
32
- lt_tensor-0.0.1a13.dist-info/RECORD,,
30
+ lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
31
+ lt_tensor-0.0.1a15.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
32
+ lt_tensor-0.0.1a15.dist-info/METADATA,sha256=RKfh13pzXJQtBwVMoXyqizQfshD7gFyC1491UCfSFP8,1033
33
+ lt_tensor-0.0.1a15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ lt_tensor-0.0.1a15.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
35
+ lt_tensor-0.0.1a15.dist-info/RECORD,,