lt-tensor 0.0.1a10__py3-none-any.whl → 0.0.1a12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/__init__.py +2 -0
- lt_tensor/config_templates.py +97 -0
- lt_tensor/datasets/audio.py +21 -7
- lt_tensor/losses.py +98 -84
- lt_tensor/math_ops.py +1 -1
- lt_tensor/misc_utils.py +94 -7
- lt_tensor/model_base.py +298 -128
- lt_tensor/model_zoo/__init__.py +2 -2
- lt_tensor/model_zoo/bsc.py +25 -3
- lt_tensor/model_zoo/disc.py +55 -51
- lt_tensor/model_zoo/fsn.py +2 -2
- lt_tensor/model_zoo/gns.py +4 -4
- lt_tensor/model_zoo/istft/__init__.py +5 -0
- lt_tensor/model_zoo/istft/generator.py +150 -0
- lt_tensor/model_zoo/istft/trainer.py +450 -0
- lt_tensor/model_zoo/istft.py +508 -66
- lt_tensor/model_zoo/pos.py +2 -2
- lt_tensor/model_zoo/rsd.py +16 -146
- lt_tensor/model_zoo/tfrms.py +4 -4
- lt_tensor/noise_tools.py +3 -4
- lt_tensor/processors/audio.py +87 -16
- lt_tensor/transform.py +30 -61
- {lt_tensor-0.0.1a10.dist-info → lt_tensor-0.0.1a12.dist-info}/METADATA +3 -2
- lt_tensor-0.0.1a12.dist-info/RECORD +32 -0
- lt_tensor-0.0.1a10.dist-info/RECORD +0 -28
- {lt_tensor-0.0.1a10.dist-info → lt_tensor-0.0.1a12.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a10.dist-info → lt_tensor-0.0.1a12.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a10.dist-info → lt_tensor-0.0.1a12.dist-info}/top_level.txt +0 -0
lt_tensor/model_zoo/rsd.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
__all__ = [
|
2
2
|
"spectral_norm_select",
|
3
|
-
"
|
3
|
+
"get_weight_norm",
|
4
4
|
"ResBlock1D",
|
5
5
|
"ResBlock2D",
|
6
|
-
"ResBlocks",
|
7
6
|
]
|
8
7
|
from lt_utils.common import *
|
9
|
-
from
|
10
|
-
from
|
8
|
+
from lt_tensor.torch_commons import *
|
9
|
+
from lt_tensor.model_base import Model
|
10
|
+
from lt_tensor.misc_utils import log_tensor
|
11
11
|
import math
|
12
|
-
from ..misc_utils import log_tensor
|
13
12
|
|
14
13
|
|
15
14
|
def spectral_norm_select(module: nn.Module, enabled: bool):
|
@@ -18,87 +17,22 @@ def spectral_norm_select(module: nn.Module, enabled: bool):
|
|
18
17
|
return module
|
19
18
|
|
20
19
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
dilation: Union[Sequence[int], int] = (1, 3, 5),
|
28
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
29
|
-
num_groups: int = 1,
|
30
|
-
batched: bool = True,
|
31
|
-
):
|
32
|
-
super().__init__()
|
33
|
-
self.conv = nn.ModuleList()
|
34
|
-
if isinstance(dilation, int):
|
35
|
-
dilation = [dilation]
|
36
|
-
|
37
|
-
if batched:
|
38
|
-
layernorm_fn = lambda x: nn.GroupNorm(num_groups=num_groups, num_channels=x)
|
39
|
-
else:
|
40
|
-
layernorm_fn = lambda x: nn.LayerNorm(normalized_shape=x)
|
41
|
-
for i, dil in enumerate(dilation):
|
42
|
-
|
43
|
-
self.conv.append(
|
44
|
-
nn.ModuleDict(
|
45
|
-
dict(
|
46
|
-
net=nn.Sequential(
|
47
|
-
self._get_conv_layer(
|
48
|
-
in_channels, in_channels, kernel_size, dil
|
49
|
-
),
|
50
|
-
activation,
|
51
|
-
self._get_conv_layer(
|
52
|
-
in_channels, in_channels, kernel_size, 1, True
|
53
|
-
),
|
54
|
-
activation,
|
55
|
-
),
|
56
|
-
l_norm=layernorm_fn(in_channels),
|
57
|
-
)
|
58
|
-
)
|
59
|
-
)
|
60
|
-
self.final = nn.Sequential(
|
61
|
-
self._get_conv_layer(in_channels, out_channels, kernel_size, 1, True),
|
62
|
-
activation,
|
63
|
-
)
|
64
|
-
self.conv.apply(self.init_weights)
|
20
|
+
def get_weight_norm(norm_type: Optional[Literal["weight", "spectral"]] = None):
|
21
|
+
if not norm_type:
|
22
|
+
return lambda x: x
|
23
|
+
if norm_type == "weight":
|
24
|
+
return lambda x: weight_norm(x)
|
25
|
+
return lambda x: spectral_norm(x)
|
65
26
|
|
66
|
-
def _get_conv_layer(
|
67
|
-
self,
|
68
|
-
channels_in: int,
|
69
|
-
channels_out: int,
|
70
|
-
kernel_size: int,
|
71
|
-
dilation: int,
|
72
|
-
pad_gate: bool = False,
|
73
|
-
):
|
74
|
-
return weight_norm(
|
75
|
-
nn.Conv1d(
|
76
|
-
in_channels=channels_in,
|
77
|
-
out_channels=channels_out,
|
78
|
-
kernel_size=kernel_size,
|
79
|
-
stride=1,
|
80
|
-
dilation=dilation,
|
81
|
-
padding=(
|
82
|
-
int((kernel_size * dilation - dilation) / 2)
|
83
|
-
if not pad_gate
|
84
|
-
else int((kernel_size * 1 - 1) / 2)
|
85
|
-
),
|
86
|
-
)
|
87
|
-
)
|
88
27
|
|
89
|
-
|
90
|
-
for i, layer in enumerate(self.conv):
|
91
|
-
xt = layer["net"](x)
|
92
|
-
x = xt + x
|
93
|
-
x = layer["l_norm"](x)
|
94
|
-
return self.final(x)
|
28
|
+
class ConvNets(Model):
|
95
29
|
|
96
30
|
def remove_weight_norm(self):
|
97
31
|
for module in self.modules():
|
98
32
|
try:
|
99
33
|
remove_weight_norm(module)
|
100
34
|
except ValueError:
|
101
|
-
pass
|
35
|
+
pass
|
102
36
|
|
103
37
|
@staticmethod
|
104
38
|
def init_weights(m, mean=0.0, std=0.01):
|
@@ -107,7 +41,7 @@ class ResBlock1D_BT(Model):
|
|
107
41
|
m.weight.data.normal_(mean, std)
|
108
42
|
|
109
43
|
|
110
|
-
class ResBlock1D(
|
44
|
+
class ResBlock1D(ConvNets):
|
111
45
|
def __init__(
|
112
46
|
self,
|
113
47
|
channels,
|
@@ -144,65 +78,6 @@ class ResBlock1D(Model):
|
|
144
78
|
x = cnn(x) + x
|
145
79
|
return x
|
146
80
|
|
147
|
-
def remove_weight_norm(self):
|
148
|
-
for module in self.modules():
|
149
|
-
try:
|
150
|
-
remove_weight_norm(module)
|
151
|
-
except ValueError:
|
152
|
-
pass # Not normed, skip
|
153
|
-
|
154
|
-
@staticmethod
|
155
|
-
def init_weights(m, mean=0.0, std=0.01):
|
156
|
-
classname = m.__class__.__name__
|
157
|
-
if "Conv" in classname:
|
158
|
-
m.weight.data.normal_(mean, std)
|
159
|
-
|
160
|
-
|
161
|
-
class ResBlocks(Model):
|
162
|
-
def __init__(
|
163
|
-
self,
|
164
|
-
channels: int,
|
165
|
-
resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
|
166
|
-
resblock_dilation_sizes: List[Union[int, List[int]]] = [
|
167
|
-
[1, 3, 5],
|
168
|
-
[1, 3, 5],
|
169
|
-
[1, 3, 5],
|
170
|
-
],
|
171
|
-
activation: nn.Module = nn.LeakyReLU(0.1),
|
172
|
-
):
|
173
|
-
super().__init__()
|
174
|
-
self.num_kernels = len(resblock_kernel_sizes)
|
175
|
-
self.rb = nn.ModuleList()
|
176
|
-
self.activation = activation
|
177
|
-
|
178
|
-
for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
179
|
-
self.rb.append(ResBlock1D(channels, k, j, activation))
|
180
|
-
|
181
|
-
self.rb.apply(self.init_weights)
|
182
|
-
|
183
|
-
def forward(self, x: torch.Tensor):
|
184
|
-
xs = None
|
185
|
-
for i, block in enumerate(self.rb):
|
186
|
-
if i == 0:
|
187
|
-
xs = block(x)
|
188
|
-
else:
|
189
|
-
xs += block(x)
|
190
|
-
x = xs / self.num_kernels
|
191
|
-
return self.activation(x)
|
192
|
-
|
193
|
-
def remove_weight_norm(self):
|
194
|
-
for module in self.modules():
|
195
|
-
try:
|
196
|
-
remove_weight_norm(module)
|
197
|
-
except ValueError:
|
198
|
-
pass # Not normed, skip
|
199
|
-
|
200
|
-
@staticmethod
|
201
|
-
def init_weights(m, mean=0.0, std=0.01):
|
202
|
-
classname = m.__class__.__name__
|
203
|
-
if "Conv" in classname:
|
204
|
-
m.weight.data.normal_(mean, std)
|
205
|
-
|
206
81
|
|
207
82
|
class ResBlock2D(Model):
|
208
83
|
def __init__(
|
@@ -210,25 +85,20 @@ class ResBlock2D(Model):
|
|
210
85
|
in_channels,
|
211
86
|
out_channels,
|
212
87
|
downsample=False,
|
213
|
-
spec_norm: bool = False,
|
214
88
|
):
|
215
89
|
super().__init__()
|
216
90
|
stride = 2 if downsample else 1
|
217
91
|
|
218
92
|
self.block = nn.Sequential(
|
219
|
-
|
220
|
-
nn.Conv2d(in_channels, out_channels, 3, stride, 1), spec_norm
|
221
|
-
),
|
93
|
+
nn.Conv2d(in_channels, out_channels, 3, stride, 1),
|
222
94
|
nn.LeakyReLU(0.2),
|
223
|
-
|
224
|
-
nn.Conv2d(out_channels, out_channels, 3, 1, 1), spec_norm
|
225
|
-
),
|
95
|
+
nn.Conv2d(out_channels, out_channels, 3, 1, 1),
|
226
96
|
)
|
227
97
|
|
228
98
|
self.skip = nn.Identity()
|
229
99
|
if downsample or in_channels != out_channels:
|
230
100
|
self.skip = spectral_norm_select(
|
231
|
-
nn.Conv2d(in_channels, out_channels, 1, stride)
|
101
|
+
nn.Conv2d(in_channels, out_channels, 1, stride)
|
232
102
|
)
|
233
103
|
# on less to be handled every cicle
|
234
104
|
self.sqrt_2 = math.sqrt(2)
|
lt_tensor/model_zoo/tfrms.py
CHANGED
@@ -7,12 +7,12 @@ __all__ = [
|
|
7
7
|
]
|
8
8
|
|
9
9
|
import math
|
10
|
-
from
|
11
|
-
from
|
10
|
+
from lt_tensor.torch_commons import *
|
11
|
+
from lt_tensor.model_base import Model
|
12
12
|
from lt_utils.misc_utils import default
|
13
13
|
from typing import Optional
|
14
|
-
from .pos import *
|
15
|
-
from .bsc import FeedForward
|
14
|
+
from lt_tensor.model_zoo.pos import *
|
15
|
+
from lt_tensor.model_zoo.bsc import FeedForward
|
16
16
|
|
17
17
|
|
18
18
|
def init_weights(module):
|
lt_tensor/noise_tools.py
CHANGED
@@ -14,10 +14,10 @@ __all__ = [
|
|
14
14
|
|
15
15
|
from lt_utils.common import *
|
16
16
|
import torch.nn.functional as F
|
17
|
-
from .torch_commons import *
|
17
|
+
from lt_tensor.torch_commons import *
|
18
18
|
import math
|
19
19
|
import random
|
20
|
-
from .misc_utils import set_seed
|
20
|
+
from lt_tensor.misc_utils import set_seed
|
21
21
|
|
22
22
|
|
23
23
|
def add_gaussian_noise(x: Tensor, noise_level=0.025):
|
@@ -271,9 +271,8 @@ class NoiseSchedulerB(nn.Module):
|
|
271
271
|
def forward(
|
272
272
|
self, x_0: Tensor, t: int, noise: Optional[Union[Tensor, float]] = None
|
273
273
|
) -> Tensor:
|
274
|
-
apply_noise()
|
275
274
|
assert (
|
276
|
-
|
275
|
+
0 <= t < self.timesteps
|
277
276
|
), f"Time step t={t} is out of bounds for scheduler with {self.timesteps} steps."
|
278
277
|
|
279
278
|
if noise is None:
|
lt_tensor/processors/audio.py
CHANGED
@@ -26,15 +26,12 @@ class AudioProcessor(Model):
|
|
26
26
|
mel_scale: Literal["htk", "slaney"] = "htk",
|
27
27
|
std: int = 4,
|
28
28
|
mean: int = -4,
|
29
|
-
inverse_transform_config: Union[
|
30
|
-
Dict[str, Union[Number, Tensor, bool]], InverseTransformConfig
|
31
|
-
] = dict(n_fft=16, hop_length=4, win_length=16, center=True),
|
32
29
|
n_iter: int = 32,
|
33
|
-
|
34
|
-
|
30
|
+
window: Optional[Tensor] = None,
|
31
|
+
normalized: bool =False,
|
32
|
+
onesided: Optional[bool] = None,
|
35
33
|
):
|
36
34
|
super().__init__()
|
37
|
-
assert isinstance(inverse_transform_config, (InverseTransformConfig, dict))
|
38
35
|
self.mean = mean
|
39
36
|
self.std = std
|
40
37
|
self.n_mels = n_mels
|
@@ -46,7 +43,8 @@ class AudioProcessor(Model):
|
|
46
43
|
self.hop_length = hop_length or n_fft // 4
|
47
44
|
self.win_length = win_length or n_fft
|
48
45
|
self.sample_rate = sample_rate
|
49
|
-
self.
|
46
|
+
self.center = center
|
47
|
+
self._mel_spec = torchaudio.transforms.MelSpectrogram(
|
50
48
|
sample_rate=sample_rate,
|
51
49
|
n_mels=n_mels,
|
52
50
|
n_fft=n_fft,
|
@@ -71,14 +69,87 @@ class AudioProcessor(Model):
|
|
71
69
|
win_length=win_length,
|
72
70
|
hop_length=hop_length,
|
73
71
|
)
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
self.normalized = normalized
|
73
|
+
self.onesided = onesided
|
74
|
+
|
75
|
+
self.register_buffer(
|
76
|
+
"window",
|
77
|
+
(torch.hann_window(self.win_length) if window is None else window),
|
78
|
+
)
|
79
|
+
# self._inv_transform = InverseTransform(**inverse_transform_config.to_dict())
|
80
|
+
|
81
|
+
def inverse_transform(
|
82
|
+
self,
|
83
|
+
spec: Tensor,
|
84
|
+
phase: Tensor,
|
85
|
+
n_fft: Optional[int] = None,
|
86
|
+
hop_length: Optional[int] = None,
|
87
|
+
win_length: Optional[int] = None,
|
88
|
+
length: Optional[int] = None,
|
89
|
+
*,
|
90
|
+
_recall: bool = False,
|
91
|
+
):
|
92
|
+
try:
|
93
|
+
return torch.istft(
|
94
|
+
spec * torch.exp(phase * 1j),
|
95
|
+
n_fft=n_fft or self.n_fft,
|
96
|
+
hop_length=hop_length or self.hop_length,
|
97
|
+
win_length=win_length or self.win_length,
|
98
|
+
window=torch.hann_window(win_length or self.win_length, device=spec.device),
|
99
|
+
center=self.center,
|
100
|
+
normalized=self.normalized,
|
101
|
+
onesided=self.onesided,
|
102
|
+
length=length,
|
103
|
+
return_complex=False,
|
77
104
|
)
|
78
|
-
|
105
|
+
except RuntimeError as e:
|
106
|
+
if not _recall and spec.device != self.window.device:
|
107
|
+
self.window = self.window.to(spec.device)
|
108
|
+
return self.inverse_transform(spec, phase, n_fft, hop_length, win_length, length, _recall=True)
|
109
|
+
raise e
|
79
110
|
|
80
|
-
def
|
81
|
-
|
111
|
+
def rebuild_spectrogram(
|
112
|
+
self,
|
113
|
+
wave: Tensor,
|
114
|
+
length: Optional[int] = None,
|
115
|
+
*,
|
116
|
+
_recall: bool = False,
|
117
|
+
):
|
118
|
+
try:
|
119
|
+
spectrogram = torch.stft(
|
120
|
+
input=wave,
|
121
|
+
n_fft=self.n_fft,
|
122
|
+
hop_length=self.hop_length,
|
123
|
+
win_length=self.win_length,
|
124
|
+
window=self.window,
|
125
|
+
center=self.center,
|
126
|
+
pad_mode="reflect",
|
127
|
+
normalized=self.normalized,
|
128
|
+
onesided=self.onesided,
|
129
|
+
return_complex=True, # needed for the istft
|
130
|
+
)
|
131
|
+
return torch.istft(
|
132
|
+
spectrogram
|
133
|
+
* torch.full(
|
134
|
+
spectrogram.size(),
|
135
|
+
fill_value=1,
|
136
|
+
device=spectrogram.device,
|
137
|
+
),
|
138
|
+
n_fft=self.n_fft,
|
139
|
+
hop_length=self.hop_length,
|
140
|
+
win_length=self.win_length,
|
141
|
+
window=self.window,
|
142
|
+
length=length,
|
143
|
+
center=self.center,
|
144
|
+
normalized=self.normalized,
|
145
|
+
onesided=self.onesided,
|
146
|
+
return_complex=False,
|
147
|
+
)
|
148
|
+
except RuntimeError as e:
|
149
|
+
if not _recall and wave.device != self.window.device:
|
150
|
+
self.window = self.window.to(wave.device)
|
151
|
+
return self.rebuild_spectrogram(wave, length, _recall=True)
|
152
|
+
raise e
|
82
153
|
|
83
154
|
def compute_mel(
|
84
155
|
self,
|
@@ -87,14 +158,14 @@ class AudioProcessor(Model):
|
|
87
158
|
add_base: bool = True,
|
88
159
|
) -> Tensor:
|
89
160
|
"""Returns: [B, M, ML]"""
|
90
|
-
mel_tensor = self.
|
161
|
+
mel_tensor = self._mel_spec(wave.to(self.device)) # [M, ML]
|
91
162
|
if not add_base:
|
92
163
|
return (mel_tensor - self.mean) / self.std
|
93
164
|
return (
|
94
165
|
(torch.log(base + mel_tensor.unsqueeze(0)) - self.mean) / self.std
|
95
166
|
).squeeze()
|
96
167
|
|
97
|
-
def
|
168
|
+
def inverse_mel_spectogram(self, mel: Tensor, n_iter: Optional[int] = None):
|
98
169
|
if isinstance(n_iter, int) and n_iter != self.n_iter:
|
99
170
|
self.giffin_lim = torchaudio.transforms.GriffinLim(
|
100
171
|
n_fft=self.n_fft,
|
@@ -251,6 +322,6 @@ class AudioProcessor(Model):
|
|
251
322
|
elif ap_task == "inv_transform":
|
252
323
|
return self.inverse_transform(*inputs, **inputs_kwargs)
|
253
324
|
elif ap_task == "revert_mel":
|
254
|
-
return self.
|
325
|
+
return self.inverse_mel_spectogram(*inputs, **inputs_kwargs)
|
255
326
|
else:
|
256
327
|
raise ValueError(f"Invalid task '{ap_task}'")
|
lt_tensor/transform.py
CHANGED
@@ -20,14 +20,14 @@ __all__ = [
|
|
20
20
|
"stft_istft_rebuild",
|
21
21
|
]
|
22
22
|
|
23
|
-
from .torch_commons import *
|
23
|
+
from lt_tensor.torch_commons import *
|
24
24
|
import torchaudio
|
25
25
|
import math
|
26
|
-
from .misc_utils import log_tensor
|
26
|
+
from lt_tensor.misc_utils import log_tensor
|
27
27
|
from lt_utils.common import *
|
28
28
|
from lt_utils.misc_utils import cache_wrapper, default
|
29
29
|
import torch.nn.functional as F
|
30
|
-
from .model_base import Model
|
30
|
+
from lt_tensor.model_base import Model
|
31
31
|
import warnings
|
32
32
|
|
33
33
|
|
@@ -332,8 +332,8 @@ class InverseTransformConfig:
|
|
332
332
|
def __init__(
|
333
333
|
self,
|
334
334
|
n_fft: int = 1024,
|
335
|
-
hop_length: Optional[int] = None,
|
336
335
|
win_length: Optional[int] = None,
|
336
|
+
hop_length: Optional[int] = None,
|
337
337
|
length: Optional[int] = None,
|
338
338
|
window: Optional[Tensor] = None,
|
339
339
|
onesided: Optional[bool] = None,
|
@@ -342,8 +342,8 @@ class InverseTransformConfig:
|
|
342
342
|
center: bool = True,
|
343
343
|
):
|
344
344
|
self.n_fft = n_fft
|
345
|
-
self.hop_length = hop_length
|
346
345
|
self.win_length = win_length
|
346
|
+
self.hop_length = hop_length
|
347
347
|
self.length = length
|
348
348
|
self.onesided = onesided
|
349
349
|
self.return_complex = return_complex
|
@@ -359,8 +359,8 @@ class InverseTransform(Model):
|
|
359
359
|
def __init__(
|
360
360
|
self,
|
361
361
|
n_fft: int = 1024,
|
362
|
-
hop_length: Optional[int] = None,
|
363
362
|
win_length: Optional[int] = None,
|
363
|
+
hop_length: Optional[int] = None,
|
364
364
|
length: Optional[int] = None,
|
365
365
|
window: Optional[Tensor] = None,
|
366
366
|
onesided: Optional[bool] = None,
|
@@ -378,10 +378,10 @@ class InverseTransform(Model):
|
|
378
378
|
----------
|
379
379
|
n_fft : int, optional
|
380
380
|
Size of FFT to use during inversion. Default is 1024.
|
381
|
-
hop_length : int, optional
|
382
|
-
Number of audio samples between STFT columns. Defaults to `n_fft`.
|
383
381
|
win_length : int, optional
|
384
382
|
Size of the window function. Defaults to `n_fft // 4`.
|
383
|
+
hop_length : int, optional
|
384
|
+
Number of audio samples between STFT columns. Defaults to `n_fft`.
|
385
385
|
length : int, optional
|
386
386
|
Output waveform length. If not provided, length will be inferred.
|
387
387
|
window : Tensor, optional
|
@@ -403,14 +403,8 @@ class InverseTransform(Model):
|
|
403
403
|
Updates ISTFT parameters dynamically (used internally during forward).
|
404
404
|
"""
|
405
405
|
super().__init__()
|
406
|
-
assert window is None or isinstance(window, Tensor)
|
407
|
-
assert
|
408
|
-
(
|
409
|
-
(not return_complex and not onesided),
|
410
|
-
(not onesided and return_complex),
|
411
|
-
(not return_complex and onesided),
|
412
|
-
)
|
413
|
-
)
|
406
|
+
assert window is None or isinstance(window, (Tensor, nn.Module))
|
407
|
+
assert not bool(return_complex and onesided)
|
414
408
|
self.n_fft = n_fft
|
415
409
|
self.length = length
|
416
410
|
self.win_length = win_length or n_fft // 4
|
@@ -419,46 +413,9 @@ class InverseTransform(Model):
|
|
419
413
|
self.return_complex = return_complex
|
420
414
|
self.onesided = onesided
|
421
415
|
self.normalized = normalized
|
422
|
-
self.window
|
423
|
-
self.update_settings()
|
424
|
-
|
425
|
-
def _apply_device_to(self):
|
426
|
-
"""Applies to device while used with module `Model`"""
|
427
|
-
self.window = self.window.to(device=self.device)
|
428
|
-
|
429
|
-
def update_settings(
|
430
|
-
self,
|
431
|
-
*,
|
432
|
-
n_fft: Optional[int] = None,
|
433
|
-
hop_length: Optional[int] = None,
|
434
|
-
win_length: Optional[int] = None,
|
435
|
-
length: Optional[int] = None,
|
436
|
-
window: Optional[Tensor] = None,
|
437
|
-
onesided: Optional[bool] = None,
|
438
|
-
return_complex: Optional[bool] = None,
|
439
|
-
center: Optional[bool] = None,
|
440
|
-
normalized: Optional[bool] = None,
|
441
|
-
**_,
|
442
|
-
):
|
443
|
-
|
444
|
-
self.kwargs = dict(
|
445
|
-
n_fft=default(n_fft, self.n_fft),
|
446
|
-
hop_length=default(hop_length, self.hop_length),
|
447
|
-
win_length=default(win_length, self.win_length),
|
448
|
-
length=default(length, self.length),
|
449
|
-
window=default(window, self.window),
|
450
|
-
onesided=default(onesided, self.onesided),
|
451
|
-
return_complex=default(return_complex, self.return_complex),
|
452
|
-
center=default(center, self.center),
|
453
|
-
normalized=default(normalized, self.normalized),
|
454
|
-
)
|
455
|
-
if self.kwargs["onesided"] and self.kwargs["return_complex"]:
|
456
|
-
warnings.warn(
|
457
|
-
"You cannot use return_complex with `onesided` enabled. `return_complex` is set to False."
|
458
|
-
)
|
459
|
-
self.kwargs["return_complex"] = False
|
416
|
+
self.register_buffer('window', torch.hann_window(self.win_length) if window is None else window)
|
460
417
|
|
461
|
-
def forward(self, spec: Tensor, phase: Tensor,
|
418
|
+
def forward(self, spec: Tensor, phase: Tensor, *, _recall:bool = False):
|
462
419
|
"""
|
463
420
|
Perform the inverse short-time Fourier transform.
|
464
421
|
|
@@ -468,15 +425,27 @@ class InverseTransform(Model):
|
|
468
425
|
Magnitude spectrogram of shape (batch, freq, time).
|
469
426
|
phase : Tensor
|
470
427
|
Phase angles tensor, same shape as `spec`, in radians.
|
471
|
-
**kwargs : dict, optional
|
472
|
-
Optional ISTFT override parameters (same as in `update_settings`).
|
473
428
|
|
474
429
|
Returns
|
475
430
|
-------
|
476
431
|
Tensor
|
477
432
|
Time-domain waveform reconstructed from `spec` and `phase`.
|
478
433
|
"""
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
434
|
+
try:
|
435
|
+
return torch.istft(
|
436
|
+
spec * torch.exp(phase * 1j),
|
437
|
+
n_fft = self.n_fft,
|
438
|
+
hop_length=self.hop_length,
|
439
|
+
win_length=self.win_length,
|
440
|
+
window=self.window,
|
441
|
+
center=self.center,
|
442
|
+
normalized=self.normalized,
|
443
|
+
onesided=self.onesided,
|
444
|
+
length=self.length,
|
445
|
+
return_complex=self.return_complex,
|
446
|
+
)
|
447
|
+
except RuntimeError as e:
|
448
|
+
if not _recall and spec.device != self.window.device:
|
449
|
+
self.window = self.window.to(spec.device)
|
450
|
+
return self.forward(spec, phase, _recall=True)
|
451
|
+
raise e
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lt-tensor
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.1a12
|
4
4
|
Summary: General utilities for PyTorch and others. Built for general use.
|
5
5
|
Home-page: https://github.com/gr1336/lt-tensor/
|
6
6
|
Author: gr1336
|
@@ -17,8 +17,9 @@ Requires-Dist: numpy>=1.26.4
|
|
17
17
|
Requires-Dist: tokenizers
|
18
18
|
Requires-Dist: pyyaml>=6.0.0
|
19
19
|
Requires-Dist: numba>0.60.0
|
20
|
-
Requires-Dist: lt-utils
|
20
|
+
Requires-Dist: lt-utils>=0.0.2a1
|
21
21
|
Requires-Dist: librosa>=0.11.0
|
22
|
+
Requires-Dist: plotly
|
22
23
|
Dynamic: author
|
23
24
|
Dynamic: classifier
|
24
25
|
Dynamic: description
|
@@ -0,0 +1,32 @@
|
|
1
|
+
lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
|
2
|
+
lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss,3515
|
3
|
+
lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
|
4
|
+
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
5
|
+
lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
|
6
|
+
lt_tensor/misc_utils.py,sha256=kdZZrNpFLm5nxAao0j4YYI6rlUet_ZTuIeUJ-1wYvj0,27804
|
7
|
+
lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
|
8
|
+
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
9
|
+
lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
|
10
|
+
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
11
|
+
lt_tensor/transform.py,sha256=FGlpNrIZWn57zeqEKw6zKttOnE3di3bm9zk3zPAoHy8,13442
|
12
|
+
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
lt_tensor/datasets/audio.py,sha256=o7qTBUznYwUlJtU5tsUY6uD7PkhqmK6VsFf0eF4k3Ww,4316
|
14
|
+
lt_tensor/model_zoo/__init__.py,sha256=ZG2mFsDyR75Y5o2q2O4Vn1txI2DqBy-ZVrtyhu2zYfs,286
|
15
|
+
lt_tensor/model_zoo/bsc.py,sha256=UvKHdO4KvDlI2820cFcbTnUka8WSmn0N_qs_U3Gu5JY,7064
|
16
|
+
lt_tensor/model_zoo/disc.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
|
17
|
+
lt_tensor/model_zoo/fsn.py,sha256=S1U-70AXpHdYEggaI_WWWZSGlp9qBebYzRaDXHhECFk,2118
|
18
|
+
lt_tensor/model_zoo/gns.py,sha256=lNFmNP-It5JIhMDISUknD4HWwZCqX0Q0dIw9DhuUjps,6060
|
19
|
+
lt_tensor/model_zoo/istft.py,sha256=AaojkrBR3v9VGwiLSbbObiYXnGuU7hD636Z0wjtNmxY,20234
|
20
|
+
lt_tensor/model_zoo/pos.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
21
|
+
lt_tensor/model_zoo/rsd.py,sha256=R2ZyNOzSI6kNKUHMqMURJ7jqNRQMlVzaXr4S1K4fvws,2907
|
22
|
+
lt_tensor/model_zoo/tfrms.py,sha256=b2rRfD8ZFquD5wZKcZ_dGJ2enZ3exxdTrQE4GuPYc_o,4291
|
23
|
+
lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
|
24
|
+
lt_tensor/model_zoo/istft/generator.py,sha256=rcA7pRyoflbvC__cbYpDFup3k4YyKFu3Ldi2F0rateM,5247
|
25
|
+
lt_tensor/model_zoo/istft/trainer.py,sha256=cc5-YGT_GJ7py3aNnXuSHxAcXcDU6AaMh8hRAfYqvSs,15606
|
26
|
+
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
27
|
+
lt_tensor/processors/audio.py,sha256=kQEiyVuiMXQ_zmOd_7XDW_xRwBnFnO9GpPU-FKP9B7Y,11204
|
28
|
+
lt_tensor-0.0.1a12.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
29
|
+
lt_tensor-0.0.1a12.dist-info/METADATA,sha256=lFa5SuofIHXBqyT-4wj0m3XPmsYrXe4NEUOJjM0fjZw,990
|
30
|
+
lt_tensor-0.0.1a12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
31
|
+
lt_tensor-0.0.1a12.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
32
|
+
lt_tensor-0.0.1a12.dist-info/RECORD,,
|
@@ -1,28 +0,0 @@
|
|
1
|
-
lt_tensor/__init__.py,sha256=uwJ7uiO18VYj8Z1V4KSOQ3ZrnowSgJWKCIiFBrzLMOI,429
|
2
|
-
lt_tensor/losses.py,sha256=TinZJP2ypZ7Tdg6d9nnFWFkPyormfgQ0Z9P2ER3sqzE,4341
|
3
|
-
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
4
|
-
lt_tensor/math_ops.py,sha256=ewIYkvxIy_Lab_9ExjFUgLs-oYLOu8IRRDo7f1pn3i8,2248
|
5
|
-
lt_tensor/misc_utils.py,sha256=N9Rf-i6m51Q3YYdmI5tI5Rb3wPz8OAJrTrLlqfCwWrk,24792
|
6
|
-
lt_tensor/model_base.py,sha256=8qN7oklALFanOz-eqVzdnB9RD2kN_3ltynSMAPOl-TI,13413
|
7
|
-
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
8
|
-
lt_tensor/noise_tools.py,sha256=JkWw0-bCMRNNMShwXKKt5KbO3104tvNiBePt-ThPkEo,11366
|
9
|
-
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
10
|
-
lt_tensor/transform.py,sha256=Bxh87vFRKuZay_g1Alf_ZtEo89CzmV3XUQDINwHB7iA,14505
|
11
|
-
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
lt_tensor/datasets/audio.py,sha256=YREyRsCvy-KS5tE0JNMWEdlIJogE1khLqhiq4wOWXVg,3777
|
13
|
-
lt_tensor/model_zoo/__init__.py,sha256=jipEk50_DTMQbGg8FnDDukxmh7Bcwvl_QVRS3rkb7aY,283
|
14
|
-
lt_tensor/model_zoo/bsc.py,sha256=muxIR7dU-Pvf-HFE-iy3zmRb1sTJlcs1vqdlnbU1Hss,6307
|
15
|
-
lt_tensor/model_zoo/disc.py,sha256=SphFVVPZLP96-mZPEvWD_up2aT63rqSPjnps1-j9D6w,5707
|
16
|
-
lt_tensor/model_zoo/fsn.py,sha256=5ySsg2OHjvTV_coPAdZQ0f7bz4ugJB8mDYsItmd61qA,2102
|
17
|
-
lt_tensor/model_zoo/gns.py,sha256=Tirr_grONp_FFQ_L7K-zV2lvkaC39h8mMl4QDpx9vLQ,6028
|
18
|
-
lt_tensor/model_zoo/istft.py,sha256=0Xms2QNPAgz_ib8XTfaWl1SCHgS53oKC6-EkDkl_qe4,4863
|
19
|
-
lt_tensor/model_zoo/pos.py,sha256=N28v-rF8CELouYxQ9r45Jbd4ri5DNydwDgg7nzmQ4Ig,4471
|
20
|
-
lt_tensor/model_zoo/rsd.py,sha256=5bba50g1Hm5kMexuJ4SwOIJuyQ1qJd8Acrq-Ax6CqE8,6958
|
21
|
-
lt_tensor/model_zoo/tfrms.py,sha256=kauh-A13pk08SZ5OspEE5a-gPKD4rZr6tqMKWu3KGhk,4237
|
22
|
-
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
23
|
-
lt_tensor/processors/audio.py,sha256=2Sta_KytTqGZh-ZeHpcCbqP6O8VT6QQVkx-7szA3Itc,8830
|
24
|
-
lt_tensor-0.0.1a10.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
25
|
-
lt_tensor-0.0.1a10.dist-info/METADATA,sha256=-VDQmGfkd5uW4_8B_TbwH-xvRivsGn3jWEtXTyeCT0s,966
|
26
|
-
lt_tensor-0.0.1a10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
27
|
-
lt_tensor-0.0.1a10.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
28
|
-
lt_tensor-0.0.1a10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|