lt-tensor 0.0.1a12__py3-none-any.whl → 0.0.1a14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lt_tensor/datasets/audio.py +141 -46
- lt_tensor/misc_utils.py +38 -1
- lt_tensor/model_zoo/__init__.py +18 -9
- lt_tensor/model_zoo/{bsc.py → basic.py} +118 -2
- lt_tensor/model_zoo/features.py +416 -0
- lt_tensor/model_zoo/fusion.py +164 -0
- lt_tensor/model_zoo/istft/generator.py +5 -65
- lt_tensor/model_zoo/istft/sg.py +142 -0
- lt_tensor/model_zoo/istft/trainer.py +227 -59
- lt_tensor/model_zoo/residual.py +252 -0
- lt_tensor/model_zoo/{tfrms.py → transformer.py} +2 -2
- lt_tensor/processors/audio.py +207 -80
- lt_tensor/transform.py +7 -16
- {lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a14.dist-info}/METADATA +7 -5
- lt_tensor-0.0.1a14.dist-info/RECORD +32 -0
- lt_tensor/model_zoo/fsn.py +0 -67
- lt_tensor/model_zoo/gns.py +0 -185
- lt_tensor/model_zoo/istft.py +0 -591
- lt_tensor/model_zoo/rsd.py +0 -107
- lt_tensor-0.0.1a12.dist-info/RECORD +0 -32
- /lt_tensor/model_zoo/{disc.py → discriminator.py} +0 -0
- /lt_tensor/model_zoo/{pos.py → pos_encoder.py} +0 -0
- {lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a14.dist-info}/WHEEL +0 -0
- {lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a14.dist-info}/licenses/LICENSE +0 -0
- {lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a14.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lt-tensor
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.1a14
|
4
4
|
Summary: General utilities for PyTorch and others. Built for general use.
|
5
5
|
Home-page: https://github.com/gr1336/lt-tensor/
|
6
6
|
Author: gr1336
|
@@ -11,15 +11,17 @@ Classifier: Topic :: Software Development :: Libraries
|
|
11
11
|
Classifier: Topic :: Utilities
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
|
-
Requires-Dist: torch>=2.
|
15
|
-
Requires-Dist: torchaudio>=2.
|
14
|
+
Requires-Dist: torch>=2.7.0
|
15
|
+
Requires-Dist: torchaudio>=2.7.0
|
16
16
|
Requires-Dist: numpy>=1.26.4
|
17
17
|
Requires-Dist: tokenizers
|
18
18
|
Requires-Dist: pyyaml>=6.0.0
|
19
19
|
Requires-Dist: numba>0.60.0
|
20
|
-
Requires-Dist: lt-utils
|
21
|
-
Requires-Dist: librosa
|
20
|
+
Requires-Dist: lt-utils==0.0.2a2
|
21
|
+
Requires-Dist: librosa==0.11.*
|
22
|
+
Requires-Dist: einops
|
22
23
|
Requires-Dist: plotly
|
24
|
+
Requires-Dist: scipy
|
23
25
|
Dynamic: author
|
24
26
|
Dynamic: classifier
|
25
27
|
Dynamic: description
|
@@ -0,0 +1,32 @@
|
|
1
|
+
lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
|
2
|
+
lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss,3515
|
3
|
+
lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
|
4
|
+
lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
|
5
|
+
lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
|
6
|
+
lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
|
7
|
+
lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
|
8
|
+
lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
|
9
|
+
lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
|
10
|
+
lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
|
11
|
+
lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
|
12
|
+
lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
lt_tensor/datasets/audio.py,sha256=j73oRyXt-AK4tWWYWjH-3c5RYouQBgDSCTuWHmyG8kQ,7450
|
14
|
+
lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
|
15
|
+
lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
|
16
|
+
lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
|
17
|
+
lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
|
18
|
+
lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
|
19
|
+
lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
|
20
|
+
lt_tensor/model_zoo/residual.py,sha256=3tc2fJaz6SxtKYAsxndahhwIxlN6oLk5tcdIXtUKaQc,7357
|
21
|
+
lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
|
22
|
+
lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
|
23
|
+
lt_tensor/model_zoo/istft/generator.py,sha256=wWHUfLFIItN-tB3pWkc1r9aTWpHYBFg7UfvLN4_cD78,3179
|
24
|
+
lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
|
25
|
+
lt_tensor/model_zoo/istft/trainer.py,sha256=KZXsAptOJeLYlr6t-DPX1qxgN526-2EBKoQQlcsHp8Y,21054
|
26
|
+
lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
|
27
|
+
lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
|
28
|
+
lt_tensor-0.0.1a14.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
|
29
|
+
lt_tensor-0.0.1a14.dist-info/METADATA,sha256=mxwJTAo51GfGEEW87lT-Tp1AHtoRvuKCmcPxAyqJxLQ,1033
|
30
|
+
lt_tensor-0.0.1a14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
31
|
+
lt_tensor-0.0.1a14.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
|
32
|
+
lt_tensor-0.0.1a14.dist-info/RECORD,,
|
lt_tensor/model_zoo/fsn.py
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
__all__ = [
|
2
|
-
"ConcatFusion",
|
3
|
-
"FiLMFusion",
|
4
|
-
"BilinearFusion",
|
5
|
-
"CrossAttentionFusion",
|
6
|
-
"GatedFusion",
|
7
|
-
]
|
8
|
-
|
9
|
-
from lt_tensor.torch_commons import *
|
10
|
-
from lt_tensor.model_base import Model
|
11
|
-
|
12
|
-
|
13
|
-
class ConcatFusion(Model):
|
14
|
-
def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
|
15
|
-
super().__init__()
|
16
|
-
self.proj = nn.Linear(in_dim_a + in_dim_b, out_dim)
|
17
|
-
|
18
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
19
|
-
x = torch.cat([a, b], dim=-1)
|
20
|
-
return self.proj(x)
|
21
|
-
|
22
|
-
|
23
|
-
class FiLMFusion(Model):
|
24
|
-
def __init__(self, cond_dim: int, feature_dim: int):
|
25
|
-
super().__init__()
|
26
|
-
self.modulator = nn.Linear(cond_dim, 2 * feature_dim)
|
27
|
-
|
28
|
-
def forward(self, x: Tensor, cond: Tensor) -> Tensor:
|
29
|
-
scale, shift = self.modulator(cond).chunk(2, dim=-1)
|
30
|
-
return x * scale + shift
|
31
|
-
|
32
|
-
|
33
|
-
class BilinearFusion(Model):
|
34
|
-
def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
|
35
|
-
super().__init__()
|
36
|
-
self.bilinear = nn.Bilinear(in_dim_a, in_dim_b, out_dim)
|
37
|
-
|
38
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
39
|
-
return self.bilinear(a, b)
|
40
|
-
|
41
|
-
|
42
|
-
class CrossAttentionFusion(Model):
|
43
|
-
def __init__(self, q_dim: int, kv_dim: int, n_heads: int = 4, d_model: int = 256):
|
44
|
-
super().__init__()
|
45
|
-
self.q_proj = nn.Linear(q_dim, d_model)
|
46
|
-
self.k_proj = nn.Linear(kv_dim, d_model)
|
47
|
-
self.v_proj = nn.Linear(kv_dim, d_model)
|
48
|
-
self.attn = nn.MultiheadAttention(
|
49
|
-
embed_dim=d_model, num_heads=n_heads, batch_first=True
|
50
|
-
)
|
51
|
-
|
52
|
-
def forward(self, query: Tensor, context: Tensor, mask: Tensor = None) -> Tensor:
|
53
|
-
Q = self.q_proj(query)
|
54
|
-
K = self.k_proj(context)
|
55
|
-
V = self.v_proj(context)
|
56
|
-
output, _ = self.attn(Q, K, V, key_padding_mask=mask)
|
57
|
-
return output
|
58
|
-
|
59
|
-
|
60
|
-
class GatedFusion(Model):
|
61
|
-
def __init__(self, in_dim: int):
|
62
|
-
super().__init__()
|
63
|
-
self.gate = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.Sigmoid())
|
64
|
-
|
65
|
-
def forward(self, a: Tensor, b: Tensor) -> Tensor:
|
66
|
-
gate = self.gate(torch.cat([a, b], dim=-1))
|
67
|
-
return gate * a + (1 - gate) * b
|
lt_tensor/model_zoo/gns.py
DELETED
@@ -1,185 +0,0 @@
|
|
1
|
-
__all__ = [
|
2
|
-
"Downsample1D",
|
3
|
-
"Upsample1D",
|
4
|
-
"DiffusionUNet",
|
5
|
-
"UNetConvBlock1D",
|
6
|
-
"UNetUpBlock1D",
|
7
|
-
"NoisePredictor1D",
|
8
|
-
]
|
9
|
-
|
10
|
-
from lt_tensor.torch_commons import *
|
11
|
-
from lt_tensor.model_base import Model
|
12
|
-
from lt_tensor.model_zoo.rsd import ResBlock1D
|
13
|
-
from lt_tensor.misc_utils import log_tensor
|
14
|
-
|
15
|
-
import torch.nn.functional as F
|
16
|
-
|
17
|
-
|
18
|
-
class Downsample1D(Model):
|
19
|
-
def __init__(
|
20
|
-
self,
|
21
|
-
in_channels: int,
|
22
|
-
out_channels: int,
|
23
|
-
):
|
24
|
-
super().__init__()
|
25
|
-
self.pool = nn.Conv1d(in_channels, out_channels, 4, stride=2, padding=1)
|
26
|
-
|
27
|
-
def forward(self, x):
|
28
|
-
return self.pool(x)
|
29
|
-
|
30
|
-
|
31
|
-
class Upsample1D(Model):
|
32
|
-
def __init__(
|
33
|
-
self,
|
34
|
-
in_channels: int,
|
35
|
-
out_channels: int,
|
36
|
-
activation=nn.ReLU(inplace=True),
|
37
|
-
):
|
38
|
-
super().__init__()
|
39
|
-
self.up = nn.Sequential(
|
40
|
-
nn.ConvTranspose1d(
|
41
|
-
in_channels, out_channels, kernel_size=4, stride=2, padding=1
|
42
|
-
),
|
43
|
-
nn.BatchNorm1d(out_channels),
|
44
|
-
activation,
|
45
|
-
)
|
46
|
-
|
47
|
-
def forward(self, x):
|
48
|
-
return self.up(x)
|
49
|
-
|
50
|
-
|
51
|
-
class DiffusionUNet(Model):
|
52
|
-
def __init__(self, in_channels=1, base_channels=64, out_channels=1, depth=4):
|
53
|
-
super().__init__()
|
54
|
-
|
55
|
-
self.depth = depth
|
56
|
-
self.encoder_blocks = nn.ModuleList()
|
57
|
-
self.downsamples = nn.ModuleList()
|
58
|
-
self.upsamples = nn.ModuleList()
|
59
|
-
self.decoder_blocks = nn.ModuleList()
|
60
|
-
# Keep track of channel sizes per layer for skip connections
|
61
|
-
self.channels = [in_channels] # starting input channel
|
62
|
-
for i in range(depth):
|
63
|
-
enc_in = self.channels[-1]
|
64
|
-
enc_out = base_channels * (2**i)
|
65
|
-
# Encoder block and downsample
|
66
|
-
self.encoder_blocks.append(ResBlock1D(enc_in, enc_out))
|
67
|
-
self.downsamples.append(
|
68
|
-
Downsample1D(enc_out, enc_out)
|
69
|
-
) # halve time, keep channels
|
70
|
-
self.channels.append(enc_out)
|
71
|
-
# Bottleneck
|
72
|
-
bottleneck_ch = self.channels[-1]
|
73
|
-
self.bottleneck = ResBlock1D(bottleneck_ch, bottleneck_ch)
|
74
|
-
# Decoder blocks (reverse channel flow)
|
75
|
-
for i in reversed(range(depth)):
|
76
|
-
skip_ch = self.channels[i + 1] # from encoder
|
77
|
-
dec_out = self.channels[i] # match earlier stage's output
|
78
|
-
self.upsamples.append(Upsample1D(skip_ch, skip_ch))
|
79
|
-
self.decoder_blocks.append(ResBlock1D(skip_ch * 2, dec_out))
|
80
|
-
# Final output projection (out_channels)
|
81
|
-
self.final = nn.Conv1d(in_channels, out_channels, kernel_size=1)
|
82
|
-
|
83
|
-
def forward(self, x: Tensor):
|
84
|
-
skips = []
|
85
|
-
|
86
|
-
# Encoder
|
87
|
-
for enc, down in zip(self.encoder_blocks, self.downsamples):
|
88
|
-
# log_tensor(x, "before enc")
|
89
|
-
x = enc(x)
|
90
|
-
skips.append(x)
|
91
|
-
x = down(x)
|
92
|
-
|
93
|
-
# Bottleneck
|
94
|
-
x = self.bottleneck(x)
|
95
|
-
|
96
|
-
# Decoder
|
97
|
-
for up, dec, skip in zip(self.upsamples, self.decoder_blocks, reversed(skips)):
|
98
|
-
x = up(x)
|
99
|
-
|
100
|
-
# Match lengths via trimming or padding
|
101
|
-
if x.shape[-1] > skip.shape[-1]:
|
102
|
-
x = x[..., : skip.shape[-1]]
|
103
|
-
elif x.shape[-1] < skip.shape[-1]:
|
104
|
-
diff = skip.shape[-1] - x.shape[-1]
|
105
|
-
x = F.pad(x, (0, diff))
|
106
|
-
|
107
|
-
x = torch.cat([x, skip], dim=1) # concat on channels
|
108
|
-
x = dec(x)
|
109
|
-
|
110
|
-
# Final 1x1 conv
|
111
|
-
return self.final(x)
|
112
|
-
|
113
|
-
|
114
|
-
class UNetConvBlock1D(Model):
|
115
|
-
def __init__(self, in_channels: int, out_channels: int, down: bool = True):
|
116
|
-
super().__init__()
|
117
|
-
self.down = down
|
118
|
-
self.conv = nn.Sequential(
|
119
|
-
nn.Conv1d(
|
120
|
-
in_channels,
|
121
|
-
out_channels,
|
122
|
-
kernel_size=3,
|
123
|
-
stride=2 if down else 1,
|
124
|
-
padding=1,
|
125
|
-
),
|
126
|
-
nn.BatchNorm1d(out_channels),
|
127
|
-
nn.LeakyReLU(0.2),
|
128
|
-
nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
|
129
|
-
nn.BatchNorm1d(out_channels),
|
130
|
-
nn.LeakyReLU(0.2),
|
131
|
-
)
|
132
|
-
self.downsample = (
|
133
|
-
nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=2 if down else 1)
|
134
|
-
if in_channels != out_channels
|
135
|
-
else nn.Identity()
|
136
|
-
)
|
137
|
-
|
138
|
-
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
139
|
-
# x: [B, C, T]
|
140
|
-
residual = self.downsample(x)
|
141
|
-
return self.conv(x) + residual
|
142
|
-
|
143
|
-
|
144
|
-
class UNetUpBlock1D(Model):
|
145
|
-
def __init__(self, in_channels: int, out_channels: int):
|
146
|
-
super().__init__()
|
147
|
-
self.conv = nn.Sequential(
|
148
|
-
nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
|
149
|
-
nn.BatchNorm1d(out_channels),
|
150
|
-
nn.LeakyReLU(0.2),
|
151
|
-
nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
|
152
|
-
nn.BatchNorm1d(out_channels),
|
153
|
-
nn.LeakyReLU(0.2),
|
154
|
-
)
|
155
|
-
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
|
156
|
-
|
157
|
-
def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
|
158
|
-
x = self.upsample(x)
|
159
|
-
x = torch.cat([x, skip], dim=1) # skip connection
|
160
|
-
return self.conv(x)
|
161
|
-
|
162
|
-
|
163
|
-
class NoisePredictor1D(Model):
|
164
|
-
def __init__(self, in_channels: int, cond_dim: int = 0, hidden: int = 128):
|
165
|
-
"""
|
166
|
-
Args:
|
167
|
-
in_channels: channels of the noisy input [B, C, T]
|
168
|
-
cond_dim: optional condition vector [B, cond_dim]
|
169
|
-
"""
|
170
|
-
super().__init__()
|
171
|
-
self.proj = nn.Linear(cond_dim, hidden) if cond_dim > 0 else None
|
172
|
-
self.net = nn.Sequential(
|
173
|
-
nn.Conv1d(in_channels, hidden, kernel_size=3, padding=1),
|
174
|
-
nn.SiLU(),
|
175
|
-
nn.Conv1d(hidden, in_channels, kernel_size=3, padding=1),
|
176
|
-
)
|
177
|
-
|
178
|
-
def forward(self, x: torch.Tensor, cond: Optional[torch.Tensor] = None):
|
179
|
-
# x: [B, C, T], cond: [B, cond_dim]
|
180
|
-
if cond is not None:
|
181
|
-
cond_proj = self.proj(cond).unsqueeze(-1) # [B, hidden, 1]
|
182
|
-
x = x + cond_proj # simple conditioning
|
183
|
-
return self.net(x) # [B, C, T]
|
184
|
-
|
185
|
-
|