lt-tensor 0.0.1a17__py3-none-any.whl → 0.0.1a18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/model_zoo/audio_models/__init__.py +1 -0
- lt_tensor/model_zoo/audio_models/diffwave/__init__.py +3 -6
- lt_tensor/model_zoo/audio_models/hifigan/__init__.py +42 -15
- lt_tensor/model_zoo/audio_models/istft/__init__.py +49 -23
- {lt_tensor-0.0.1a17.dist-info → lt_tensor-0.0.1a18.dist-info}/METADATA +1 -1
- {lt_tensor-0.0.1a17.dist-info → lt_tensor-0.0.1a18.dist-info}/RECORD +9 -9
- {lt_tensor-0.0.1a17.dist-info → lt_tensor-0.0.1a18.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a17.dist-info → lt_tensor-0.0.1a18.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a17.dist-info → lt_tensor-0.0.1a18.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
__all__ = ["DiffWave", "
|
1
|
+
__all__ = ["DiffWave", "DiffWaveConfig", "SpectrogramUpsample", "DiffusionEmbedding"]
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import torch
|
@@ -10,8 +10,6 @@ from lt_tensor.model_base import Model
|
|
10
10
|
from math import sqrt
|
11
11
|
from lt_utils.common import *
|
12
12
|
|
13
|
-
F.t
|
14
|
-
|
15
13
|
|
16
14
|
class DiffWaveConfig(ModelConfig):
|
17
15
|
# Training params
|
@@ -40,7 +38,6 @@ class DiffWaveConfig(ModelConfig):
|
|
40
38
|
settings: Dict[str, Any] = {},
|
41
39
|
path_name: Optional[Union[str, PathLike]] = None,
|
42
40
|
):
|
43
|
-
self._forbidden_list.extend()
|
44
41
|
super().__init__(settings, path_name)
|
45
42
|
|
46
43
|
|
@@ -86,7 +83,7 @@ class DiffusionEmbedding(Model):
|
|
86
83
|
return table
|
87
84
|
|
88
85
|
|
89
|
-
class
|
86
|
+
class SpectrogramUpsample(Model):
|
90
87
|
def __init__(self):
|
91
88
|
super().__init__()
|
92
89
|
self.conv1 = nn.ConvTranspose2d(1, 1, [3, 32], stride=[1, 16], padding=[1, 8])
|
@@ -162,7 +159,7 @@ class DiffWave(Model):
|
|
162
159
|
if self.params.unconditional: # use unconditional model
|
163
160
|
self.spectrogram_upsample = None
|
164
161
|
else:
|
165
|
-
self.spectrogram_upsample =
|
162
|
+
self.spectrogram_upsample = SpectrogramUpsample()
|
166
163
|
|
167
164
|
self.residual_layers = nn.ModuleList(
|
168
165
|
[
|
@@ -1,4 +1,4 @@
|
|
1
|
-
__all__ = ["HifiganGenerator"]
|
1
|
+
__all__ = ["HifiganGenerator", "HifiganConfig"]
|
2
2
|
from lt_utils.common import *
|
3
3
|
from lt_tensor.torch_commons import *
|
4
4
|
from lt_tensor.model_zoo.residual import ConvNets
|
@@ -13,6 +13,33 @@ def get_padding(kernel_size, dilation=1):
|
|
13
13
|
return int((kernel_size * dilation - dilation) / 2)
|
14
14
|
|
15
15
|
|
16
|
+
from lt_tensor.config_templates import ModelConfig
|
17
|
+
|
18
|
+
|
19
|
+
class HifiganConfig(ModelConfig):
|
20
|
+
# Training params
|
21
|
+
in_channels: int = 80
|
22
|
+
upsample_rates: List[Union[int, List[int]]] = [8, 8]
|
23
|
+
upsample_kernel_sizes: List[Union[int, List[int]]] = [16, 16]
|
24
|
+
upsample_initial_channel: int = (512,)
|
25
|
+
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11]
|
26
|
+
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
27
|
+
[1, 3, 5],
|
28
|
+
[1, 3, 5],
|
29
|
+
[1, 3, 5],
|
30
|
+
]
|
31
|
+
|
32
|
+
activation: nn.Module = nn.LeakyReLU(0.1)
|
33
|
+
resblock: int = 0
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
settings: Dict[str, Any] = {},
|
38
|
+
path_name: Optional[Union[str, PathLike]] = None,
|
39
|
+
):
|
40
|
+
super().__init__(settings, path_name)
|
41
|
+
|
42
|
+
|
16
43
|
class ResBlock1(ConvNets):
|
17
44
|
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
|
18
45
|
super().__init__()
|
@@ -142,23 +169,23 @@ class ResBlock2(ConvNets):
|
|
142
169
|
|
143
170
|
|
144
171
|
class HifiganGenerator(ConvNets):
|
145
|
-
def __init__(self,
|
172
|
+
def __init__(self, cfg: HifiganConfig = HifiganConfig()):
|
146
173
|
super().__init__()
|
147
|
-
self.
|
148
|
-
self.num_kernels = len(
|
149
|
-
self.num_upsamples = len(
|
174
|
+
self.cfg = cfg
|
175
|
+
self.num_kernels = len(cfg.resblock_kernel_sizes)
|
176
|
+
self.num_upsamples = len(cfg.upsample_rates)
|
150
177
|
self.conv_pre = weight_norm(
|
151
|
-
nn.Conv1d(
|
178
|
+
nn.Conv1d(cfg.in_channels, cfg.upsample_initial_channel, 7, 1, padding=3)
|
152
179
|
)
|
153
|
-
resblock = ResBlock1 if
|
154
|
-
self.activation =
|
180
|
+
resblock = ResBlock1 if resblock == 0 else ResBlock2
|
181
|
+
self.activation = cfg.activation
|
155
182
|
self.ups = nn.ModuleList()
|
156
|
-
for i, (u, k) in enumerate(zip(
|
183
|
+
for i, (u, k) in enumerate(zip(cfg.psample_rates, cfg.upsample_kernel_sizes)):
|
157
184
|
self.ups.append(
|
158
185
|
weight_norm(
|
159
186
|
nn.ConvTranspose1d(
|
160
|
-
|
161
|
-
|
187
|
+
cfg.upsample_initial_channel // (2**i),
|
188
|
+
cfg.upsample_initial_channel // (2 ** (i + 1)),
|
162
189
|
k,
|
163
190
|
u,
|
164
191
|
padding=(k - u) // 2,
|
@@ -168,17 +195,17 @@ class HifiganGenerator(ConvNets):
|
|
168
195
|
|
169
196
|
self.resblocks = nn.ModuleList()
|
170
197
|
for i in range(len(self.ups)):
|
171
|
-
ch =
|
198
|
+
ch = cfg.upsample_initial_channel // (2 ** (i + 1))
|
172
199
|
for j, (k, d) in enumerate(
|
173
|
-
zip(
|
200
|
+
zip(cfg.resblock_kernel_sizes, cfg.resblock_dilation_sizes)
|
174
201
|
):
|
175
|
-
self.resblocks.append(resblock(
|
202
|
+
self.resblocks.append(resblock(ch, k, d))
|
176
203
|
|
177
204
|
self.conv_post = weight_norm(nn.Conv1d(ch, 1, 7, 1, padding=3))
|
178
205
|
self.ups.apply(self.init_weights)
|
179
206
|
self.conv_post.apply(self.init_weights)
|
180
207
|
|
181
|
-
def forward(self, x):
|
208
|
+
def forward(self, x: Tensor):
|
182
209
|
x = self.conv_pre(x)
|
183
210
|
for i in range(self.num_upsamples):
|
184
211
|
x = self.ups[i](self.activation(x))
|
@@ -1,8 +1,35 @@
|
|
1
|
-
__all__ = ["
|
1
|
+
__all__ = ["iSTFTNetGenerator", "iSTFTNetConfig"]
|
2
2
|
from lt_utils.common import *
|
3
3
|
from lt_tensor.torch_commons import *
|
4
4
|
from lt_tensor.model_zoo.residual import ConvNets
|
5
5
|
from torch.nn import functional as F
|
6
|
+
from lt_tensor.config_templates import ModelConfig
|
7
|
+
|
8
|
+
|
9
|
+
class iSTFTNetConfig(ModelConfig):
|
10
|
+
# Training params
|
11
|
+
in_channels: int = 80
|
12
|
+
upsample_rates: List[Union[int, List[int]]] = [8, 8]
|
13
|
+
upsample_kernel_sizes: List[Union[int, List[int]]] = [16, 16]
|
14
|
+
upsample_initial_channel: int = (512,)
|
15
|
+
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11]
|
16
|
+
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
17
|
+
[1, 3, 5],
|
18
|
+
[1, 3, 5],
|
19
|
+
[1, 3, 5],
|
20
|
+
]
|
21
|
+
|
22
|
+
activation: nn.Module = nn.LeakyReLU(0.1)
|
23
|
+
resblock: int = 0
|
24
|
+
gen_istft_n_fft: int = 16
|
25
|
+
sampling_rate: Number = 24000
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
settings: Dict[str, Any] = {},
|
30
|
+
path_name: Optional[Union[str, PathLike]] = None,
|
31
|
+
):
|
32
|
+
super().__init__(settings, path_name)
|
6
33
|
|
7
34
|
|
8
35
|
def get_padding(ks, d):
|
@@ -10,9 +37,8 @@ def get_padding(ks, d):
|
|
10
37
|
|
11
38
|
|
12
39
|
class ResBlock1(ConvNets):
|
13
|
-
def __init__(self,
|
40
|
+
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
|
14
41
|
super().__init__()
|
15
|
-
self.h = h
|
16
42
|
self.convs1 = nn.ModuleList(
|
17
43
|
[
|
18
44
|
weight_norm(
|
@@ -95,10 +121,10 @@ class ResBlock1(ConvNets):
|
|
95
121
|
x = xt + x
|
96
122
|
return x
|
97
123
|
|
124
|
+
|
98
125
|
class ResBlock2(ConvNets):
|
99
|
-
def __init__(self,
|
126
|
+
def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
|
100
127
|
super().__init__()
|
101
|
-
self.h = h
|
102
128
|
self.convs = nn.ModuleList(
|
103
129
|
[
|
104
130
|
weight_norm(
|
@@ -134,25 +160,25 @@ class ResBlock2(ConvNets):
|
|
134
160
|
return x
|
135
161
|
|
136
162
|
|
137
|
-
class
|
138
|
-
def __init__(self,
|
163
|
+
class iSTFTNetGenerator(ConvNets):
|
164
|
+
def __init__(self, cfg: iSTFTNetConfig = iSTFTNetConfig()):
|
139
165
|
super().__init__()
|
140
|
-
self.
|
141
|
-
self.num_kernels = len(
|
142
|
-
self.num_upsamples = len(
|
166
|
+
self.cfg = cfg
|
167
|
+
self.num_kernels = len(cfg.resblock_kernel_sizes)
|
168
|
+
self.num_upsamples = len(cfg.upsample_rates)
|
143
169
|
self.conv_pre = weight_norm(
|
144
|
-
nn.Conv1d(
|
170
|
+
nn.Conv1d(cfg.in_channels, cfg.upsample_initial_channel, 7, 1, padding=3)
|
145
171
|
)
|
146
|
-
resblock = ResBlock1 if
|
172
|
+
resblock = ResBlock1 if resblock == 0 else ResBlock2
|
147
173
|
|
148
174
|
self.ups = nn.ModuleList()
|
149
|
-
for i, (u, k) in enumerate(zip(
|
150
|
-
if
|
175
|
+
for i, (u, k) in enumerate(zip(cfg.upsample_rates, cfg.upsample_kernel_sizes)):
|
176
|
+
if cfg.sampling_rate % 16000:
|
151
177
|
self.ups.append(
|
152
178
|
weight_norm(
|
153
179
|
nn.ConvTranspose1d(
|
154
|
-
|
155
|
-
|
180
|
+
cfg.upsample_initial_channel // (2**i),
|
181
|
+
cfg.upsample_initial_channel // (2 ** (i + 1)),
|
156
182
|
k,
|
157
183
|
u,
|
158
184
|
padding=(k - u) // 2,
|
@@ -163,8 +189,8 @@ class iSTFTGenerator(ConvNets):
|
|
163
189
|
self.ups.append(
|
164
190
|
weight_norm(
|
165
191
|
nn.ConvTranspose1d(
|
166
|
-
|
167
|
-
|
192
|
+
cfg.upsample_initial_channel // (2**i),
|
193
|
+
cfg.upsample_initial_channel // (2 ** (i + 1)),
|
168
194
|
k,
|
169
195
|
u,
|
170
196
|
padding=(u // 2 + u % 2),
|
@@ -175,19 +201,19 @@ class iSTFTGenerator(ConvNets):
|
|
175
201
|
|
176
202
|
self.resblocks = nn.ModuleList()
|
177
203
|
for i in range(len(self.ups)):
|
178
|
-
ch =
|
204
|
+
ch = cfg.upsample_initial_channel // (2 ** (i + 1))
|
179
205
|
for j, (k, d) in enumerate(
|
180
|
-
zip(
|
206
|
+
zip(cfg.resblock_kernel_sizes, cfg.resblock_dilation_sizes)
|
181
207
|
):
|
182
|
-
self.resblocks.append(resblock(
|
208
|
+
self.resblocks.append(resblock(ch, k, d))
|
183
209
|
|
184
|
-
self.post_n_fft =
|
210
|
+
self.post_n_fft = cfg.gen_istft_n_fft
|
185
211
|
self.conv_post = weight_norm(
|
186
212
|
nn.Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3)
|
187
213
|
)
|
188
214
|
self.ups.apply(self.init_weights)
|
189
215
|
self.conv_post.apply(self.init_weights)
|
190
|
-
self.activation =
|
216
|
+
self.activation = cfg.activation
|
191
217
|
self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
|
192
218
|
|
193
219
|
def forward(self, x):
|
@@ -18,14 +18,14 @@ lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI
|
|
18
18
|
lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
19
19
|
lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
|
20
20
|
lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
|
21
|
-
lt_tensor/model_zoo/audio_models/__init__.py,sha256=
|
22
|
-
lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=
|
23
|
-
lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=
|
24
|
-
lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=
|
21
|
+
lt_tensor/model_zoo/audio_models/__init__.py,sha256=MoG9YjxLyvscq_6njK1ljGBletK9iedBXt66bplzW-s,83
|
22
|
+
lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=R14hY-nCbCO-T3ox9f4MXCPgQQogFUKAJ2WtntLz09w,7393
|
23
|
+
lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=6ZGYyNiTMGHnOjGU0gq_TSM8Y9LtYlP3neGwa01Ghyk,13135
|
24
|
+
lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=noi4GLGZQ_qg5H-ipe5d7j8rvt4Hic_sXiME-TE-B2c,13783
|
25
25
|
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
26
26
|
lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
|
27
|
-
lt_tensor-0.0.
|
28
|
-
lt_tensor-0.0.
|
29
|
-
lt_tensor-0.0.
|
30
|
-
lt_tensor-0.0.
|
31
|
-
lt_tensor-0.0.
|
27
|
+
lt_tensor-0.0.1a18.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
28
|
+
lt_tensor-0.0.1a18.dist-info/METADATA,sha256=fgRzOiw5tMmkaEY9HrGEKNL2v9mN5JVbf9r-bf18Am0,1033
|
29
|
+
lt_tensor-0.0.1a18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
30
|
+
lt_tensor-0.0.1a18.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
31
|
+
lt_tensor-0.0.1a18.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|