PyPI - lt-tensor - Versions diffs - 0.0.1a13__py3-none-any.whl → 0.0.1a15__py3-none-any.whl - Mend

lt-tensor 0.0.1a13py3-none-any.whl → 0.0.1a15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

lt_tensor/datasets/audio.py +23 -6
lt_tensor/misc_utils.py +1 -1
lt_tensor/model_base.py +163 -123
lt_tensor/model_zoo/diffwave/__init__.py +0 -0
lt_tensor/model_zoo/diffwave/model.py +200 -0
lt_tensor/model_zoo/diffwave/params.py +58 -0
lt_tensor/model_zoo/discriminator.py +269 -151
lt_tensor/model_zoo/features.py +102 -11
lt_tensor/model_zoo/istft/generator.py +10 -66
lt_tensor/model_zoo/istft/trainer.py +224 -72
lt_tensor/model_zoo/residual.py +136 -32
lt_tensor/processors/audio.py +5 -16
{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/METADATA +2 -2
{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/RECORD +17 -14
{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/WHEEL +0 -0
{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/licenses/LICENSE +0 -0
{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/top_level.txt +0 -0

lt_tensor/model_zoo/residual.py CHANGED Viewed

@@ -5,13 +5,16 @@ __all__ = [
     "ResBlock2D",
     "ResBlock1DShuffled",
     "AdaResBlock1D",
+    "ResBlocks1D",
+    "ResBlock1D2",
+    "ShuffleBlock2D",
 ]
 import math
 from lt_utils.common import *
+import torch.nn.functional as F
 from lt_tensor.torch_commons import *
 from lt_tensor.model_base import Model
 from lt_tensor.misc_utils import log_tensor
-import torch.nn.functional as F
 from lt_tensor.model_zoo.fusion import AdaFusion1D, AdaIN1D
@@ -44,6 +47,10 @@ class ConvNets(Model):
             m.weight.data.normal_(mean, std)
+def get_padding(ks, d):
+    return int((ks * d - d) / 2)
 class ResBlock1D(ConvNets):
     def __init__(
         self,
@@ -57,14 +64,13 @@ class ResBlock1D(ConvNets):
         self.conv_nets = nn.ModuleList(
             [
                 self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
-                for i in range(3)
+                for i in range(len(dilation))
             ]
         )
         self.conv_nets.apply(self.init_weights)
         self.last_index = len(self.conv_nets) - 1
     def _get_conv_layer(self, id, ch, k, stride, d, actv):
-        get_padding = lambda ks, d: int((ks * d - d) / 2)
         return nn.Sequential(
             actv,  # 1
             weight_norm(
@@ -91,16 +97,11 @@ class ResBlock1DShuffled(ConvNets):
         kernel_size=3,
         dilation=(1, 3, 5),
         activation: nn.Module = nn.LeakyReLU(0.1),
-        add_channel_shuffle: bool = False,  # requires pytorch 2.7.0 +
         channel_shuffle_groups=1,
     ):
         super().__init__()
-        self.channel_shuffle = (
-            nn.ChannelShuffle(channel_shuffle_groups)
-            if add_channel_shuffle
-            else nn.Identity()
-        )
+        self.channel_shuffle = nn.ChannelShuffle(channel_shuffle_groups)
         self.conv_nets = nn.ModuleList(
             [
@@ -136,29 +137,67 @@ class ResBlock1DShuffled(ConvNets):
 class ResBlock2D(Model):
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        downsample=False,
+        in_channels: int,
+        out_channels: Optional[int] = None,
+        hidden_dim: int = 32,
+        downscale: bool = False,
+        activation: nn.Module = nn.LeakyReLU(0.2),
     ):
         super().__init__()
-        stride = 2 if downsample else 1
+        stride = 2 if downscale else 1
+        if out_channels is None:
+            out_channels = in_channels
         self.block = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, 3, stride, 1),
-            nn.LeakyReLU(0.2),
-            nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+            nn.Conv2d(in_channels, hidden_dim, 3, stride, 1),
+            activation,
+            nn.Conv2d(hidden_dim, hidden_dim, 7, 1, 3),
+            activation,
+            nn.Conv2d(hidden_dim, out_channels, 3, 1, 1),
         )
         self.skip = nn.Identity()
-        if downsample or in_channels != out_channels:
+        if downscale or in_channels != out_channels:
             self.skip = spectral_norm_select(
                 nn.Conv2d(in_channels, out_channels, 1, stride)
             )
-        # on less to be handled every cicle
+        # on less to be handled every cycle
         self.sqrt_2 = math.sqrt(2)
     def forward(self, x: Tensor):
-        return (self.block(x) + self.skip(x)) / self.sqrt_2
+        return x + ((self.block(x) + self.skip(x)) / self.sqrt_2)
+class ShuffleBlock2D(ConvNets):
+    def __init__(
+        self,
+        channels: int,
+        out_channels: Optional[int] = None,
+        hidden_dim: int = 32,
+        downscale: bool = False,
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        if out_channels is None:
+            out_channels = channels
+        self.shuffle = nn.ChannelShuffle(groups=2)
+        self.ch_split = lambda tensor: torch.split(tensor, 1, dim=1)
+        self.activation = activation
+        self.resblock_2d = ResBlock2D(
+            channels, out_channels, hidden_dim, downscale, activation
+        )
+    def shuffle_channels(self, tensor: torch.Tensor):
+        with torch.no_grad():
+            x = F.channel_shuffle(tensor.transpose(1, -1), tensor.shape[1]).transpose(
+                -1, 1
+            )
+        return self.ch_split(x)
+    def forward(self, x: torch.Tensor):
+        ch1, ch2 = self.shuffle_channels(x)
+        ch2 = self.resblock_2d(ch2)
+        return torch.cat((ch1, ch2), dim=1)
 class AdaResBlock1D(ConvNets):
@@ -172,46 +211,111 @@ class AdaResBlock1D(ConvNets):
     ):
         super().__init__()
+        self.alpha1 = nn.ModuleList()
+        self.alpha2 = nn.ModuleList()
         self.conv_nets = nn.ModuleList(
             [
                 self._get_conv_layer(
-                    i,
+                    d,
                     res_block_channels,
                     ada_channel_in,
                     kernel_size,
-                    1,
-                    dilation,
                 )
-                for i in range(3)
+                for d in dilation
             ]
         )
         self.conv_nets.apply(self.init_weights)
         self.last_index = len(self.conv_nets) - 1
         self.activation = activation
-    def _get_conv_layer(self, id, ch, ada_ch, k, stride, d):
-        get_padding = lambda ks, d: int((ks * d - d) / 2)
+    def _get_conv_layer(self, d, ch, ada_ch, k):
+        self.alpha1.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
+        self.alpha2.append(nn.Parameter(torch.ones(1, ada_ch, 1)))
         return nn.ModuleDict(
             dict(
                 norm1=AdaFusion1D(ada_ch, ch),
                 norm2=AdaFusion1D(ada_ch, ch),
-                alpha1=nn.Parameter(torch.ones(1, ada_ch, 1)),
-                alpha2=nn.Parameter(torch.ones(1, ada_ch, 1)),
                 conv1=weight_norm(
                     nn.Conv1d(
-                        ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
+                        ch, ch, k, 1, dilation=d, padding=get_padding(k, d)
                     )
                 ),  # 2
                 conv2=weight_norm(
-                    nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
+                    nn.Conv1d(ch, ch, k, 1, dilation=1, padding=get_padding(k, 1))
                 ),  # 4
             )
         )
     def forward(self, x: torch.Tensor, y: torch.Tensor):
-        for cnn in self.conv_nets:
-            xt = self.activation(cnn["norm1"](x, y, cnn["alpha1"]))
+        for i, cnn in enumerate(self.conv_nets):
+            xt = self.activation(cnn["norm1"](x, y, self.alpha1[i]))
             xt = cnn["conv1"](xt)
-            xt = self.activation(cnn["norm2"](xt, y, cnn["alpha2"]))
+            xt = self.activation(cnn["norm2"](xt, y, self.alpha2[i]))
             x = cnn["conv2"](xt) + x
         return x
+class ResBlock1D2(ConvNets):
+    def __init__(
+        self,
+        channels,
+        kernel_size=3,
+        dilation=(1, 3, 5),
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        self.convs = nn.ModuleList(
+            [
+                weight_norm(
+                    nn.Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        dilation=d,
+                        padding=get_padding(kernel_size, d),
+                    )
+                )
+                for d in range(dilation)
+            ]
+        )
+        self.convs.apply(self.init_weights)
+        self.activation = activation
+    def forward(self, x):
+        for c in self.convs:
+            xt = c(self.activation(x))
+            x = xt + x
+        return x
+class ResBlocks1D(ConvNets):
+    def __init__(
+        self,
+        channels: int,
+        resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
+        resblock_dilation_sizes: List[Union[int, List[int]]] = [
+            [1, 3, 5],
+            [1, 3, 5],
+            [1, 3, 5],
+        ],
+        activation: nn.Module = nn.LeakyReLU(0.1),
+        block: Union[ResBlock1D, ResBlock1D2] = ResBlock1D,
+    ):
+        super().__init__()
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.rb = nn.ModuleList()
+        self.activation = activation
+        for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
+            self.rb.append(block(channels, k, j, activation))
+        self.rb.apply(self.init_weights)
+    def forward(self, x: torch.Tensor):
+        xs = None
+        for i, block in enumerate(self.rb):
+            if i == 0:
+                xs = block(x)
+            else:
+                xs += block(x)
+        return xs / self.num_kernels

lt_tensor/processors/audio.py CHANGED Viewed

@@ -106,20 +106,13 @@ class AudioProcessor(Model):
         return tensor.detach().to(DEFAULT_DEVICE).numpy(force=True)
     def compute_rms(
-        self, audio: Union[Tensor, np.ndarray], mel: Optional[Tensor] = None
+        self,
+        audio: Union[Tensor, np.ndarray],
+        mel: Optional[Tensor] = None,
     ):
         default_dtype = audio.dtype
         default_device = audio.device
-        assert audio.ndim in [1, 2], (
-            f"Audio should have 1D for unbatched and 2D for batched"
-            ", received instead a: {audio.ndim}D"
-        )
-        if mel is not None:
-            assert mel.ndim in [2, 3], (
-                "Mel spectogram should have 2D dim for non-batched or 3D dim for both non-batched or batched"
-                f". Received instead {mel.ndim}D."
-            )
-        if audio.ndim == 2:
+        if audio.ndim > 1:
             B = audio.shape[0]
         else:
             B = 1
@@ -163,11 +156,7 @@ class AudioProcessor(Model):
     ):
         default_dtype = audio.dtype
         default_device = audio.device
-        assert audio.ndim in [1, 2], (
-            f"Audio should have 1D for unbatched and 2D for batched"
-            ", received instead a: {audio.ndim}D"
-        )
-        if audio.ndim == 2:
+        if audio.ndim > 1:
             B = audio.shape[0]
         else:
             B = 1

{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lt-tensor
-Version: 0.0.1a13
+Version: 0.0.1a15
 Summary: General utilities for PyTorch and others. Built for general use.
 Home-page: https://github.com/gr1336/lt-tensor/
 Author: gr1336
@@ -17,7 +17,7 @@ Requires-Dist: numpy>=1.26.4
 Requires-Dist: tokenizers
 Requires-Dist: pyyaml>=6.0.0
 Requires-Dist: numba>0.60.0
-Requires-Dist: lt-utils>=0.0.2a1
+Requires-Dist: lt-utils==0.0.2a2
 Requires-Dist: librosa==0.11.*
 Requires-Dist: einops
 Requires-Dist: plotly

{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/RECORD RENAMED Viewed

@@ -3,30 +3,33 @@ lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss
 lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
 lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
 lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
-lt_tensor/misc_utils.py,sha256=UNba6UEsAv1oZ60IAaKBNGbhXK2WPxRI9E4QcjP-_w0,28755
-lt_tensor/model_base.py,sha256=lxzRXfPlR_t_6LfgRw2dct55evrtmwTiDqZGAe3jLro,20026
+lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
+lt_tensor/model_base.py,sha256=qqqIVpYz6nv01MnZuuAj1dxq4_NN-zSivP1GaegA9TI,21597
 lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
 lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
 lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
 lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
 lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lt_tensor/datasets/audio.py,sha256=j73oRyXt-AK4tWWYWjH-3c5RYouQBgDSCTuWHmyG8kQ,7450
+lt_tensor/datasets/audio.py,sha256=5Wvz1BJ7xXkLYpVLLw9RY3X3RgMdPPeGiN0-MmJDQy0,8045
 lt_tensor/model_zoo/__init__.py,sha256=RzG7fltZLyiIU_Za4pgfBPli5uPITiJkq4sTCd4uA_0,319
 lt_tensor/model_zoo/basic.py,sha256=_26H_jJk5Ld3DZiNpIhGosGfMxoFDZrI8bpDAYUOYno,10660
-lt_tensor/model_zoo/discriminator.py,sha256=dS5UmJZV5MxIFiaBlIXfgGLDdUT3y0Vuv9lDGHsjJE8,5849
-lt_tensor/model_zoo/features.py,sha256=CTFMidzza31pqQjwPfp_g0BNVfuQ8Dlo5JnxpYpKgag,13144
+lt_tensor/model_zoo/discriminator.py,sha256=_HrgseU3KO_6ONNjISxkp6-9pRseVZr43x8NYxIq1Xg,9989
+lt_tensor/model_zoo/features.py,sha256=DO8dlE0kmPKTNC1Xkv9wKegOOYkQa_rkxM4hhcNwJWA,15655
 lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI,5422
 lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
-lt_tensor/model_zoo/residual.py,sha256=knVLxzrLUjNQ6vdBESTZOk3r86ldi5PHetoBuJmymcw,6388
+lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
 lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
+lt_tensor/model_zoo/diffwave/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lt_tensor/model_zoo/diffwave/model.py,sha256=RwrJd7ZZ2uQdLid_m8-wbwEJ7l2gqukq2MSjbquN_Pg,6832
+lt_tensor/model_zoo/diffwave/params.py,sha256=91aaBWNfWU-q3POS3TbNgdmhw5RAayoLudVNblM8ixU,1719
 lt_tensor/model_zoo/istft/__init__.py,sha256=SV96w9WUWfHMee8Vjgn2MP0igKft7_mLTju9rFVYGHY,102
-lt_tensor/model_zoo/istft/generator.py,sha256=lotGkMu67fctzwa5FSwX_xtHILOuV95uP-djCz2N3C8,5261
+lt_tensor/model_zoo/istft/generator.py,sha256=R5Wym4Bocx1T5ijyETQe1thx4uY9ulMwcHqgsGG3h-0,3364
 lt_tensor/model_zoo/istft/sg.py,sha256=EaEi3otw_uY5QfqDBNIWBWTJSg3KnwzzR4FBr0u09C0,4838
-lt_tensor/model_zoo/istft/trainer.py,sha256=EPuGtvfgR8vCrVc72p5OwVy73nNVlx510VxnH3NeErY,16080
+lt_tensor/model_zoo/istft/trainer.py,sha256=WAoySxxuyJtMDt2q0kGbaJT19vAduHyxYwBo4TTU_LM,21302
 lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
-lt_tensor/processors/audio.py,sha256=uBvMls4u_B1M-pk3xAiOIRnwM2l_3LcdfESNkE0Ch30,15314
-lt_tensor-0.0.1a13.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
-lt_tensor-0.0.1a13.dist-info/METADATA,sha256=yzNtg91vOGZCoXi6XWpn1kWk7LgVD2mIWQXL-7tw_Uc,1033
-lt_tensor-0.0.1a13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lt_tensor-0.0.1a13.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
-lt_tensor-0.0.1a13.dist-info/RECORD,,
+lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
+lt_tensor-0.0.1a15.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
+lt_tensor-0.0.1a15.dist-info/METADATA,sha256=RKfh13pzXJQtBwVMoXyqizQfshD7gFyC1491UCfSFP8,1033
+lt_tensor-0.0.1a15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lt_tensor-0.0.1a15.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
+lt_tensor-0.0.1a15.dist-info/RECORD,,

{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/WHEEL RENAMED Viewed

File without changes

{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lt_tensor-0.0.1a13.dist-info → lt_tensor-0.0.1a15.dist-info}/top_level.txt RENAMED Viewed

File without changes

lt-tensor 0.0.1a13__py3-none-any.whl → 0.0.1a15__py3-none-any.whl

lt-tensor 0.0.1a13py3-none-any.whl → 0.0.1a15py3-none-any.whl