lt-tensor 0.0.1a12__py3-none-any.whl → 0.0.1a14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lt-tensor
3
- Version: 0.0.1a12
3
+ Version: 0.0.1a14
4
4
  Summary: General utilities for PyTorch and others. Built for general use.
5
5
  Home-page: https://github.com/gr1336/lt-tensor/
6
6
  Author: gr1336
@@ -11,15 +11,17 @@ Classifier: Topic :: Software Development :: Libraries
11
11
  Classifier: Topic :: Utilities
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: torch>=2.2.0
15
- Requires-Dist: torchaudio>=2.2.0
14
+ Requires-Dist: torch>=2.7.0
15
+ Requires-Dist: torchaudio>=2.7.0
16
16
  Requires-Dist: numpy>=1.26.4
17
17
  Requires-Dist: tokenizers
18
18
  Requires-Dist: pyyaml>=6.0.0
19
19
  Requires-Dist: numba>0.60.0
20
- Requires-Dist: lt-utils>=0.0.2a1
21
- Requires-Dist: librosa>=0.11.0
20
+ Requires-Dist: lt-utils==0.0.2a2
21
+ Requires-Dist: librosa==0.11.*
22
+ Requires-Dist: einops
22
23
  Requires-Dist: plotly
24
+ Requires-Dist: scipy
23
25
  Dynamic: author
24
26
  Dynamic: classifier
25
27
  Dynamic: description
@@ -0,0 +1,32 @@
1
+ lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
2
+ lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss,3515
3
+ lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
4
+ lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
5
+ lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
6
+ lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
7
+ lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
8
+ lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
9
+ lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
10
+ lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
11
+ lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
12
+ lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ lt_tensor/datasets/audio.py,sha256=j73oRyXt-AK4tWWYWjH-3c5RYouQBgDSCTuWHmyG8kQ,7450
14
+ lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
15
+ lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
16
+ lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
17
+ lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
18
+ lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
19
+ lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
20
+ lt_tensor/model_zoo/residual.py,sha256=3tc2fJaz6SxtKYAsxndahhwIxlN6oLk5tcdIXtUKaQc,7357
21
+ lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
22
+ lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
23
+ lt_tensor/model_zoo/istft/generator.py,sha256=wWHUfLFIItN-tB3pWkc1r9aTWpHYBFg7UfvLN4_cD78,3179
24
+ lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
25
+ lt_tensor/model_zoo/istft/trainer.py,sha256=KZXsAptOJeLYlr6t-DPX1qxgN526-2EBKoQQlcsHp8Y,21054
26
+ lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
27
+ lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
28
+ lt_tensor-0.0.1a14.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
29
+ lt_tensor-0.0.1a14.dist-info/METADATA,sha256=mxwJTAo51GfGEEW87lT-Tp1AHtoRvuKCmcPxAyqJxLQ,1033
30
+ lt_tensor-0.0.1a14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ lt_tensor-0.0.1a14.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
32
+ lt_tensor-0.0.1a14.dist-info/RECORD,,
@@ -1,67 +0,0 @@
1
- __all__ = [
2
- "ConcatFusion",
3
- "FiLMFusion",
4
- "BilinearFusion",
5
- "CrossAttentionFusion",
6
- "GatedFusion",
7
- ]
8
-
9
- from lt_tensor.torch_commons import *
10
- from lt_tensor.model_base import Model
11
-
12
-
13
- class ConcatFusion(Model):
14
- def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
15
- super().__init__()
16
- self.proj = nn.Linear(in_dim_a + in_dim_b, out_dim)
17
-
18
- def forward(self, a: Tensor, b: Tensor) -> Tensor:
19
- x = torch.cat([a, b], dim=-1)
20
- return self.proj(x)
21
-
22
-
23
- class FiLMFusion(Model):
24
- def __init__(self, cond_dim: int, feature_dim: int):
25
- super().__init__()
26
- self.modulator = nn.Linear(cond_dim, 2 * feature_dim)
27
-
28
- def forward(self, x: Tensor, cond: Tensor) -> Tensor:
29
- scale, shift = self.modulator(cond).chunk(2, dim=-1)
30
- return x * scale + shift
31
-
32
-
33
- class BilinearFusion(Model):
34
- def __init__(self, in_dim_a: int, in_dim_b: int, out_dim: int):
35
- super().__init__()
36
- self.bilinear = nn.Bilinear(in_dim_a, in_dim_b, out_dim)
37
-
38
- def forward(self, a: Tensor, b: Tensor) -> Tensor:
39
- return self.bilinear(a, b)
40
-
41
-
42
- class CrossAttentionFusion(Model):
43
- def __init__(self, q_dim: int, kv_dim: int, n_heads: int = 4, d_model: int = 256):
44
- super().__init__()
45
- self.q_proj = nn.Linear(q_dim, d_model)
46
- self.k_proj = nn.Linear(kv_dim, d_model)
47
- self.v_proj = nn.Linear(kv_dim, d_model)
48
- self.attn = nn.MultiheadAttention(
49
- embed_dim=d_model, num_heads=n_heads, batch_first=True
50
- )
51
-
52
- def forward(self, query: Tensor, context: Tensor, mask: Tensor = None) -> Tensor:
53
- Q = self.q_proj(query)
54
- K = self.k_proj(context)
55
- V = self.v_proj(context)
56
- output, _ = self.attn(Q, K, V, key_padding_mask=mask)
57
- return output
58
-
59
-
60
- class GatedFusion(Model):
61
- def __init__(self, in_dim: int):
62
- super().__init__()
63
- self.gate = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.Sigmoid())
64
-
65
- def forward(self, a: Tensor, b: Tensor) -> Tensor:
66
- gate = self.gate(torch.cat([a, b], dim=-1))
67
- return gate * a + (1 - gate) * b
@@ -1,185 +0,0 @@
1
- __all__ = [
2
- "Downsample1D",
3
- "Upsample1D",
4
- "DiffusionUNet",
5
- "UNetConvBlock1D",
6
- "UNetUpBlock1D",
7
- "NoisePredictor1D",
8
- ]
9
-
10
- from lt_tensor.torch_commons import *
11
- from lt_tensor.model_base import Model
12
- from lt_tensor.model_zoo.rsd import ResBlock1D
13
- from lt_tensor.misc_utils import log_tensor
14
-
15
- import torch.nn.functional as F
16
-
17
-
18
- class Downsample1D(Model):
19
- def __init__(
20
- self,
21
- in_channels: int,
22
- out_channels: int,
23
- ):
24
- super().__init__()
25
- self.pool = nn.Conv1d(in_channels, out_channels, 4, stride=2, padding=1)
26
-
27
- def forward(self, x):
28
- return self.pool(x)
29
-
30
-
31
- class Upsample1D(Model):
32
- def __init__(
33
- self,
34
- in_channels: int,
35
- out_channels: int,
36
- activation=nn.ReLU(inplace=True),
37
- ):
38
- super().__init__()
39
- self.up = nn.Sequential(
40
- nn.ConvTranspose1d(
41
- in_channels, out_channels, kernel_size=4, stride=2, padding=1
42
- ),
43
- nn.BatchNorm1d(out_channels),
44
- activation,
45
- )
46
-
47
- def forward(self, x):
48
- return self.up(x)
49
-
50
-
51
- class DiffusionUNet(Model):
52
- def __init__(self, in_channels=1, base_channels=64, out_channels=1, depth=4):
53
- super().__init__()
54
-
55
- self.depth = depth
56
- self.encoder_blocks = nn.ModuleList()
57
- self.downsamples = nn.ModuleList()
58
- self.upsamples = nn.ModuleList()
59
- self.decoder_blocks = nn.ModuleList()
60
- # Keep track of channel sizes per layer for skip connections
61
- self.channels = [in_channels] # starting input channel
62
- for i in range(depth):
63
- enc_in = self.channels[-1]
64
- enc_out = base_channels * (2**i)
65
- # Encoder block and downsample
66
- self.encoder_blocks.append(ResBlock1D(enc_in, enc_out))
67
- self.downsamples.append(
68
- Downsample1D(enc_out, enc_out)
69
- ) # halve time, keep channels
70
- self.channels.append(enc_out)
71
- # Bottleneck
72
- bottleneck_ch = self.channels[-1]
73
- self.bottleneck = ResBlock1D(bottleneck_ch, bottleneck_ch)
74
- # Decoder blocks (reverse channel flow)
75
- for i in reversed(range(depth)):
76
- skip_ch = self.channels[i + 1] # from encoder
77
- dec_out = self.channels[i] # match earlier stage's output
78
- self.upsamples.append(Upsample1D(skip_ch, skip_ch))
79
- self.decoder_blocks.append(ResBlock1D(skip_ch * 2, dec_out))
80
- # Final output projection (out_channels)
81
- self.final = nn.Conv1d(in_channels, out_channels, kernel_size=1)
82
-
83
- def forward(self, x: Tensor):
84
- skips = []
85
-
86
- # Encoder
87
- for enc, down in zip(self.encoder_blocks, self.downsamples):
88
- # log_tensor(x, "before enc")
89
- x = enc(x)
90
- skips.append(x)
91
- x = down(x)
92
-
93
- # Bottleneck
94
- x = self.bottleneck(x)
95
-
96
- # Decoder
97
- for up, dec, skip in zip(self.upsamples, self.decoder_blocks, reversed(skips)):
98
- x = up(x)
99
-
100
- # Match lengths via trimming or padding
101
- if x.shape[-1] > skip.shape[-1]:
102
- x = x[..., : skip.shape[-1]]
103
- elif x.shape[-1] < skip.shape[-1]:
104
- diff = skip.shape[-1] - x.shape[-1]
105
- x = F.pad(x, (0, diff))
106
-
107
- x = torch.cat([x, skip], dim=1) # concat on channels
108
- x = dec(x)
109
-
110
- # Final 1x1 conv
111
- return self.final(x)
112
-
113
-
114
- class UNetConvBlock1D(Model):
115
- def __init__(self, in_channels: int, out_channels: int, down: bool = True):
116
- super().__init__()
117
- self.down = down
118
- self.conv = nn.Sequential(
119
- nn.Conv1d(
120
- in_channels,
121
- out_channels,
122
- kernel_size=3,
123
- stride=2 if down else 1,
124
- padding=1,
125
- ),
126
- nn.BatchNorm1d(out_channels),
127
- nn.LeakyReLU(0.2),
128
- nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
129
- nn.BatchNorm1d(out_channels),
130
- nn.LeakyReLU(0.2),
131
- )
132
- self.downsample = (
133
- nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=2 if down else 1)
134
- if in_channels != out_channels
135
- else nn.Identity()
136
- )
137
-
138
- def forward(self, x: torch.Tensor) -> torch.Tensor:
139
- # x: [B, C, T]
140
- residual = self.downsample(x)
141
- return self.conv(x) + residual
142
-
143
-
144
- class UNetUpBlock1D(Model):
145
- def __init__(self, in_channels: int, out_channels: int):
146
- super().__init__()
147
- self.conv = nn.Sequential(
148
- nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
149
- nn.BatchNorm1d(out_channels),
150
- nn.LeakyReLU(0.2),
151
- nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
152
- nn.BatchNorm1d(out_channels),
153
- nn.LeakyReLU(0.2),
154
- )
155
- self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
156
-
157
- def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
158
- x = self.upsample(x)
159
- x = torch.cat([x, skip], dim=1) # skip connection
160
- return self.conv(x)
161
-
162
-
163
- class NoisePredictor1D(Model):
164
- def __init__(self, in_channels: int, cond_dim: int = 0, hidden: int = 128):
165
- """
166
- Args:
167
- in_channels: channels of the noisy input [B, C, T]
168
- cond_dim: optional condition vector [B, cond_dim]
169
- """
170
- super().__init__()
171
- self.proj = nn.Linear(cond_dim, hidden) if cond_dim > 0 else None
172
- self.net = nn.Sequential(
173
- nn.Conv1d(in_channels, hidden, kernel_size=3, padding=1),
174
- nn.SiLU(),
175
- nn.Conv1d(hidden, in_channels, kernel_size=3, padding=1),
176
- )
177
-
178
- def forward(self, x: torch.Tensor, cond: Optional[torch.Tensor] = None):
179
- # x: [B, C, T], cond: [B, cond_dim]
180
- if cond is not None:
181
- cond_proj = self.proj(cond).unsqueeze(-1) # [B, hidden, 1]
182
- x = x + cond_proj # simple conditioning
183
- return self.net(x) # [B, C, T]
184
-
185
-