lt-tensor 0.0.1a15__py3-none-any.whl → 0.0.1a16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,18 +2,20 @@ __all__ = [
2
2
  "basic", # basic
3
3
  "residual", # residual
4
4
  "transformer", # transformer
5
- "pos_encoder",
6
- "fusion",
7
- "features",
8
- "discriminator",
5
+ "pos_encoder",
6
+ "fusion",
7
+ "features",
8
+ "discriminator",
9
+ "audio_models",
10
+ "hifigan",
9
11
  "istft",
10
12
  ]
13
+ from .audio_models import hifigan, istft
11
14
  from . import (
12
15
  basic,
13
- discriminator,
14
16
  features,
15
17
  fusion,
16
- istft,
18
+ audio_models,
17
19
  pos_encoder,
18
20
  residual,
19
21
  transformer,
@@ -0,0 +1 @@
1
+ from . import diffwave, istft, hifigan
@@ -0,0 +1,3 @@
1
+ __all__ = ["DiffWave", "SpectrogramUpsampler", "DiffusionEmbedding"]
2
+
3
+ from .model import DiffWave, SpectrogramUpsampler, DiffusionEmbedding
@@ -1,3 +1,4 @@
1
+ __all__ = ["DiffWave", "SpectrogramUpsampler", "DiffusionEmbedding"]
1
2
  import numpy as np
2
3
  import torch
3
4
  import torch.nn as nn
@@ -0,0 +1,393 @@
1
+ __all__ = ["HifiganGenerator"]
2
+ from lt_utils.common import *
3
+ from lt_tensor.torch_commons import *
4
+ from lt_tensor.model_zoo.residual import ConvNets
5
+ from torch.nn import functional as F
6
+
7
+ import torch
8
+ import torch.nn.functional as F
9
+ import torch.nn as nn
10
+
11
+
12
+ def get_padding(kernel_size, dilation=1):
13
+ return int((kernel_size * dilation - dilation) / 2)
14
+
15
+
16
+ class ResBlock1(ConvNets):
17
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
18
+ super().__init__()
19
+
20
+ self.convs1 = nn.ModuleList(
21
+ [
22
+ weight_norm(
23
+ nn.Conv1d(
24
+ channels,
25
+ channels,
26
+ kernel_size,
27
+ 1,
28
+ dilation=dilation[0],
29
+ padding=get_padding(kernel_size, dilation[0]),
30
+ )
31
+ ),
32
+ weight_norm(
33
+ nn.Conv1d(
34
+ channels,
35
+ channels,
36
+ kernel_size,
37
+ 1,
38
+ dilation=dilation[1],
39
+ padding=get_padding(kernel_size, dilation[1]),
40
+ )
41
+ ),
42
+ weight_norm(
43
+ nn.Conv1d(
44
+ channels,
45
+ channels,
46
+ kernel_size,
47
+ 1,
48
+ dilation=dilation[2],
49
+ padding=get_padding(kernel_size, dilation[2]),
50
+ )
51
+ ),
52
+ ]
53
+ )
54
+ self.convs1.apply(self.init_weights)
55
+
56
+ self.convs2 = nn.ModuleList(
57
+ [
58
+ weight_norm(
59
+ nn.Conv1d(
60
+ channels,
61
+ channels,
62
+ kernel_size,
63
+ 1,
64
+ dilation=1,
65
+ padding=get_padding(kernel_size, 1),
66
+ )
67
+ ),
68
+ weight_norm(
69
+ nn.Conv1d(
70
+ channels,
71
+ channels,
72
+ kernel_size,
73
+ 1,
74
+ dilation=1,
75
+ padding=get_padding(kernel_size, 1),
76
+ )
77
+ ),
78
+ weight_norm(
79
+ nn.Conv1d(
80
+ channels,
81
+ channels,
82
+ kernel_size,
83
+ 1,
84
+ dilation=1,
85
+ padding=get_padding(kernel_size, 1),
86
+ )
87
+ ),
88
+ ]
89
+ )
90
+ self.convs2.apply(self.init_weights)
91
+ self.activation = nn.LeakyReLU(0.1)
92
+
93
+ def forward(self, x):
94
+ for c1, c2 in zip(self.convs1, self.convs2):
95
+ xt = c1(self.activation(x))
96
+ xt = c2(self.activation(xt))
97
+ x = xt + x
98
+ return x
99
+
100
+ def remove_weight_norm(self):
101
+ for l in self.convs1:
102
+ remove_weight_norm(l)
103
+ for l in self.convs2:
104
+ remove_weight_norm(l)
105
+
106
+
107
+ class ResBlock2(ConvNets):
108
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
109
+ super().__init__()
110
+ self.convs = nn.ModuleList(
111
+ [
112
+ weight_norm(
113
+ nn.Conv1d(
114
+ channels,
115
+ channels,
116
+ kernel_size,
117
+ 1,
118
+ dilation=dilation[0],
119
+ padding=get_padding(kernel_size, dilation[0]),
120
+ )
121
+ ),
122
+ weight_norm(
123
+ nn.Conv1d(
124
+ channels,
125
+ channels,
126
+ kernel_size,
127
+ 1,
128
+ dilation=dilation[1],
129
+ padding=get_padding(kernel_size, dilation[1]),
130
+ )
131
+ ),
132
+ ]
133
+ )
134
+ self.convs.apply(self.init_weights)
135
+ self.activation = nn.LeakyReLU(0.1)
136
+
137
+ def forward(self, x):
138
+ for c in self.convs:
139
+ xt = c(self.activation(x))
140
+ x = xt + x
141
+ return x
142
+
143
+
144
+ class HifiganGenerator(ConvNets):
145
+ def __init__(self, h):
146
+ super().__init__()
147
+ self.h = h
148
+ self.num_kernels = len(h.resblock_kernel_sizes)
149
+ self.num_upsamples = len(h.upsample_rates)
150
+ self.conv_pre = weight_norm(
151
+ nn.Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3)
152
+ )
153
+ resblock = ResBlock1 if h.resblock == "1" else ResBlock2
154
+ self.activation = nn.LeakyReLU(0.1)
155
+ self.ups = nn.ModuleList()
156
+ for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
157
+ self.ups.append(
158
+ weight_norm(
159
+ nn.ConvTranspose1d(
160
+ h.upsample_initial_channel // (2**i),
161
+ h.upsample_initial_channel // (2 ** (i + 1)),
162
+ k,
163
+ u,
164
+ padding=(k - u) // 2,
165
+ )
166
+ )
167
+ )
168
+
169
+ self.resblocks = nn.ModuleList()
170
+ for i in range(len(self.ups)):
171
+ ch = h.upsample_initial_channel // (2 ** (i + 1))
172
+ for j, (k, d) in enumerate(
173
+ zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)
174
+ ):
175
+ self.resblocks.append(resblock(h, ch, k, d))
176
+
177
+ self.conv_post = weight_norm(nn.Conv1d(ch, 1, 7, 1, padding=3))
178
+ self.ups.apply(self.init_weights)
179
+ self.conv_post.apply(self.init_weights)
180
+
181
+ def forward(self, x):
182
+ x = self.conv_pre(x)
183
+ for i in range(self.num_upsamples):
184
+ x = self.ups[i](self.activation(x))
185
+ xs = None
186
+ for j in range(self.num_kernels):
187
+ if xs is None:
188
+ xs = self.resblocks[i * self.num_kernels + j](x)
189
+ else:
190
+ xs += self.resblocks[i * self.num_kernels + j](x)
191
+ x = xs / self.num_kernels
192
+ x = self.conv_post(self.activation(x))
193
+ x = torch.tanh(x)
194
+
195
+ return x
196
+
197
+
198
+ class DiscriminatorP(ConvNets):
199
+ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
200
+ super(DiscriminatorP, self).__init__()
201
+ self.period = period
202
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
203
+ self.convs = nn.ModuleList(
204
+ [
205
+ norm_f(
206
+ nn.Conv2d(
207
+ 1,
208
+ 32,
209
+ (kernel_size, 1),
210
+ (stride, 1),
211
+ padding=(get_padding(5, 1), 0),
212
+ )
213
+ ),
214
+ norm_f(
215
+ nn.Conv2d(
216
+ 32,
217
+ 128,
218
+ (kernel_size, 1),
219
+ (stride, 1),
220
+ padding=(get_padding(5, 1), 0),
221
+ )
222
+ ),
223
+ norm_f(
224
+ nn.Conv2d(
225
+ 128,
226
+ 512,
227
+ (kernel_size, 1),
228
+ (stride, 1),
229
+ padding=(get_padding(5, 1), 0),
230
+ )
231
+ ),
232
+ norm_f(
233
+ nn.Conv2d(
234
+ 512,
235
+ 1024,
236
+ (kernel_size, 1),
237
+ (stride, 1),
238
+ padding=(get_padding(5, 1), 0),
239
+ )
240
+ ),
241
+ norm_f(nn.Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))),
242
+ ]
243
+ )
244
+ self.conv_post = norm_f(nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
245
+ self.activation = nn.LeakyReLU(0.1)
246
+
247
+ def forward(self, x):
248
+ fmap = []
249
+
250
+ # 1d to 2d
251
+ b, c, t = x.shape
252
+ if t % self.period != 0: # pad first
253
+ n_pad = self.period - (t % self.period)
254
+ x = F.pad(x, (0, n_pad), "reflect")
255
+ t = t + n_pad
256
+ x = x.view(b, c, t // self.period, self.period)
257
+
258
+ for l in self.convs:
259
+ x = l(x)
260
+ x = self.activation(x)
261
+ fmap.append(x)
262
+ x = self.conv_post(x)
263
+ fmap.append(x)
264
+ x = torch.flatten(x, 1, -1)
265
+
266
+ return x, fmap
267
+
268
+
269
+ class MultiPeriodDiscriminator(ConvNets):
270
+ def __init__(self):
271
+ super(MultiPeriodDiscriminator, self).__init__()
272
+ self.discriminators = nn.ModuleList(
273
+ [
274
+ DiscriminatorP(2),
275
+ DiscriminatorP(3),
276
+ DiscriminatorP(5),
277
+ DiscriminatorP(7),
278
+ DiscriminatorP(11),
279
+ ]
280
+ )
281
+
282
+ def forward(self, y, y_hat):
283
+ y_d_rs = []
284
+ y_d_gs = []
285
+ fmap_rs = []
286
+ fmap_gs = []
287
+ for i, d in enumerate(self.discriminators):
288
+ y_d_r, fmap_r = d(y)
289
+ y_d_g, fmap_g = d(y_hat)
290
+ y_d_rs.append(y_d_r)
291
+ fmap_rs.append(fmap_r)
292
+ y_d_gs.append(y_d_g)
293
+ fmap_gs.append(fmap_g)
294
+
295
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
296
+
297
+
298
+ class DiscriminatorS(ConvNets):
299
+ def __init__(self, use_spectral_norm=False):
300
+ super(DiscriminatorS, self).__init__()
301
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
302
+ self.convs = nn.ModuleList(
303
+ [
304
+ norm_f(nn.Conv1d(1, 128, 15, 1, padding=7)),
305
+ norm_f(nn.Conv1d(128, 128, 41, 2, groups=4, padding=20)),
306
+ norm_f(nn.Conv1d(128, 256, 41, 2, groups=16, padding=20)),
307
+ norm_f(nn.Conv1d(256, 512, 41, 4, groups=16, padding=20)),
308
+ norm_f(nn.Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
309
+ norm_f(nn.Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
310
+ norm_f(nn.Conv1d(1024, 1024, 5, 1, padding=2)),
311
+ ]
312
+ )
313
+ self.conv_post = norm_f(nn.Conv1d(1024, 1, 3, 1, padding=1))
314
+ self.activation = nn.LeakyReLU(0.1)
315
+
316
+ def forward(self, x):
317
+ fmap = []
318
+ for l in self.convs:
319
+ x = l(x)
320
+ x = self.activation(x)
321
+ fmap.append(x)
322
+ x = self.conv_post(x)
323
+ fmap.append(x)
324
+ x = torch.flatten(x, 1, -1)
325
+
326
+ return x, fmap
327
+
328
+
329
+ class MultiScaleDiscriminator(ConvNets):
330
+ def __init__(self):
331
+ super(MultiScaleDiscriminator, self).__init__()
332
+ self.discriminators = nn.ModuleList(
333
+ [
334
+ DiscriminatorS(use_spectral_norm=True),
335
+ DiscriminatorS(),
336
+ DiscriminatorS(),
337
+ ]
338
+ )
339
+ self.meanpools = nn.ModuleList(
340
+ [nn.AvgPool1d(4, 2, padding=2), nn.AvgPool1d(4, 2, padding=2)]
341
+ )
342
+
343
+ def forward(self, y, y_hat):
344
+ y_d_rs = []
345
+ y_d_gs = []
346
+ fmap_rs = []
347
+ fmap_gs = []
348
+ for i, d in enumerate(self.discriminators):
349
+ if i != 0:
350
+ y = self.meanpools[i - 1](y)
351
+ y_hat = self.meanpools[i - 1](y_hat)
352
+ y_d_r, fmap_r = d(y)
353
+ y_d_g, fmap_g = d(y_hat)
354
+ y_d_rs.append(y_d_r)
355
+ fmap_rs.append(fmap_r)
356
+ y_d_gs.append(y_d_g)
357
+ fmap_gs.append(fmap_g)
358
+
359
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
360
+
361
+
362
+ def feature_loss(fmap_r, fmap_g):
363
+ loss = 0
364
+ for dr, dg in zip(fmap_r, fmap_g):
365
+ for rl, gl in zip(dr, dg):
366
+ loss += torch.mean(torch.abs(rl - gl))
367
+
368
+ return loss * 2
369
+
370
+
371
+ def discriminator_loss(disc_real_outputs, disc_generated_outputs):
372
+ loss = 0
373
+ r_losses = []
374
+ g_losses = []
375
+ for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
376
+ r_loss = torch.mean((1 - dr) ** 2)
377
+ g_loss = torch.mean(dg**2)
378
+ loss += r_loss + g_loss
379
+ r_losses.append(r_loss.item())
380
+ g_losses.append(g_loss.item())
381
+
382
+ return loss, r_losses, g_losses
383
+
384
+
385
+ def generator_loss(disc_outputs):
386
+ loss = 0
387
+ gen_losses = []
388
+ for dg in disc_outputs:
389
+ l = torch.mean((1 - dg) ** 2)
390
+ gen_losses.append(l)
391
+ loss += l
392
+
393
+ return loss, gen_losses