PyPI - lt-tensor - Versions diffs - 0.0.1a0__py3-none-any.whl → 0.0.1a3__py3-none-any.whl - Mend

lt-tensor 0.0.1a0py3-none-any.whl → 0.0.1a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lt_tensor/__init__.py +21 -1
lt_tensor/datasets/__init__.py +0 -0
lt_tensor/datasets/audio.py +111 -0
lt_tensor/math_ops.py +1 -1
lt_tensor/misc_utils.py +10 -9
lt_tensor/{_basics.py → model_base.py} +28 -15
lt_tensor/model_zoo/__init__.py +5 -3
lt_tensor/model_zoo/bsc.py +2 -2
lt_tensor/model_zoo/disc.py +155 -0
lt_tensor/model_zoo/fsn.py +4 -4
lt_tensor/model_zoo/{dfs.py → gns.py} +7 -3
lt_tensor/model_zoo/istft.py +108 -0
lt_tensor/model_zoo/pos.py +5 -5
lt_tensor/model_zoo/rsd.py +104 -25
lt_tensor/model_zoo/{tfr.py → tfrms.py} +3 -3
lt_tensor/torch_commons.py +30 -0
lt_tensor/transform.py +7 -35
{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/METADATA +3 -2
lt_tensor-0.0.1a3.dist-info/RECORD +24 -0
lt_tensor/_torch_commons.py +0 -12
lt_tensor-0.0.1a0.dist-info/RECORD +0 -20
{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/WHEEL +0 -0
{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/licenses/LICENSE +0 -0
{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/top_level.txt +0 -0

lt_tensor/__init__.py CHANGED Viewed

@@ -1 +1,21 @@
-__version__ = "0.0.1dev3"
+__version__ = "0.0.1a"
+from . import (
+    model_zoo,
+    model_base,
+    math_ops,
+    misc_utils,
+    monotonic_align,
+    transform,
+    lr_schedulers,
+)
+__all__ = [
+    "model_zoo",
+    "model_base",
+    "math_ops",
+    "misc_utils",
+    "monotonic_align",
+    "transform",
+    "lr_schedulers",
+]

lt_tensor/datasets/__init__.py ADDED Viewed

File without changes

lt_tensor/datasets/audio.py ADDED Viewed

@@ -0,0 +1,111 @@
+__all__ = ["AudioProcessor"]
+from ..torch_commons import *
+import torchaudio
+from typing import TypeAlias, Union, Optional
+from lt_utils.common import PathLike
+import librosa
+from lt_utils.type_utils import is_file
+from torchaudio.functional import resample
+from ..transform import inverse_transform
+class AudioProcessor:
+    def __init__(
+        self,
+        sample_rate: int = 24000,
+        n_mels: int = 80,
+        n_fft: int = 2048,
+        win_length: int = 2048,
+        hop_length: int = 256,
+        f_min: float = 0,
+        f_max: float | None = None,
+        mean: int = -4,
+        std: int = 4,
+        n_iter: int = 32,
+        center: bool = True,
+        mel_scale: str = "htk",
+        inv_n_fft: int = 16,
+        inv_hop: int = 4,
+    ):
+        self.mean = mean
+        self.std = std
+        self.n_mels = n_mels
+        self.n_fft = n_fft
+        self.n_stft = n_fft // 2 + 1
+        self.f_min = f_min
+        self.f_max = f_max
+        self.n_iter = n_iter
+        self.hop_length = hop_length
+        self.sample_rate = sample_rate
+        self.mel_spec = torchaudio.transforms.MelSpectrogram(
+            sample_rate=sample_rate,
+            n_mels=n_mels,
+            n_fft=n_fft,
+            win_length=win_length,
+            hop_length=hop_length,
+            center=center,
+            f_min=f_min,
+            f_max=f_max,
+            mel_scale=mel_scale,
+        )
+        self.mel_rscale = torchaudio.transforms.InverseMelScale(
+            n_stft=self.n_stft,
+            m_mels=n_mels,
+            sample_rate=sample_rate,
+            f_min=f_min,
+            f_max=f_max,
+            mel_scale=mel_scale,
+        )
+        self.giffin_lim = torchaudio.transforms.GriffinLim(
+            n_fft=n_fft,
+            n_iter=n_iter,
+            win_length=win_length,
+            hop_length=hop_length,
+        )
+        self._inverse_transform = lambda x, y: inverse_transform(
+            x, y, inv_n_fft, inv_hop, inv_n_fft
+        )
+    def inverse_transform(self, spec: Tensor, phase: Tensor):
+        return self._inverse_transform(spec, phase)
+    def compute_mel(
+        self,
+        wave: Tensor,
+    ) -> Tensor:
+        """Returns: [B, M, ML]"""
+        mel_tensor = self.mel_spec(wave)  # [M, ML]
+        mel_tensor = (mel_tensor - self.mean) / self.std
+        return mel_tensor  # [B, M, ML]
+    def reverse_mel(self, mel: Tensor, n_iter: Optional[int] = None):
+        if isinstance(n_iter, int) and n_iter != self.n_iter:
+            self.giffin_lim = torchaudio.transforms.GriffinLim(
+                n_fft=self.n_fft,
+                n_iter=n_iter,
+                win_length=self.win_length,
+                hop_length=self.hop_length,
+            )
+            self.n_iter = n_iter
+        return self.giffin_lim.forward(
+            self.mel_rscale(mel),
+        )
+    def load_audio(
+        self,
+        path: PathLike,
+        top_db: float = 30,
+    ) -> Tensor:
+        is_file(path, True)
+        wave, sr = librosa.load(str(path), sr=self.sample_rate)
+        wave, _ = librosa.effects.trim(wave, top_db=top_db)
+        return (
+            torch.from_numpy(
+                librosa.resample(wave, orig_sr=sr, target_sr=self.sample_rate)
+                if sr != self.sample_rate
+                else wave
+            )
+            .float()
+            .unsqueeze(0)
+        )

lt_tensor/math_ops.py CHANGED Viewed

@@ -11,7 +11,7 @@ __all__ = [
     "phase",
 ]
-from ._torch_commons import *
+from .torch_commons import *
 def sin_tensor(x: Tensor, freq: float = 1.0) -> Tensor:

lt_tensor/misc_utils.py CHANGED Viewed

@@ -27,11 +27,12 @@ import gc
 import random
 import numpy as np
 from lt_utils.type_utils import is_str
-from ._torch_commons import *
+from .torch_commons import *
 from lt_utils.misc_utils import log_traceback, cache_wrapper
 from lt_utils.file_ops import load_json, load_yaml, save_json, save_yaml
 import math
+from lt_utils.common import *
+import torch.nn.functional as F
 def log_tensor(
     item: Union[Tensor, np.ndarray],
@@ -83,12 +84,12 @@ def set_seed(seed: int):
         torch.xpu.manual_seed_all(seed)
-def count_parameters(model: Module) -> int:
+def count_parameters(model: nn.Module) -> int:
     """Returns total number of trainable parameters."""
     return sum(p.numel() for p in model.parameters() if p.requires_grad)
-def freeze_all_except(model: Module, except_layers: Optional[list[str]] = None):
+def freeze_all_except(model: nn.Module, except_layers: Optional[list[str]] = None):
     """Freezes all model parameters except specified layers."""
     no_exceptions = not except_layers
     for name, param in model.named_parameters():
@@ -98,14 +99,14 @@ def freeze_all_except(model: Module, except_layers: Optional[list[str]] = None):
             param.requires_grad_(False)
-def freeze_selected_weights(model: Module, target_layers: list[str]):
+def freeze_selected_weights(model: nn.Module, target_layers: list[str]):
     """Freezes only parameters on specified layers."""
     for name, param in model.named_parameters():
         if any(layer in name for layer in target_layers):
             param.requires_grad_(False)
-def unfreeze_all_except(model: Module, except_layers: Optional[list[str]] = None):
+def unfreeze_all_except(model: nn.Module, except_layers: Optional[list[str]] = None):
     """Unfreezes all model parameters except specified layers."""
     no_exceptions = not except_layers
     for name, param in model.named_parameters():
@@ -115,14 +116,14 @@ def unfreeze_all_except(model: Module, except_layers: Optional[list[str]] = None
             param.requires_grad_(True)
-def unfreeze_selected_weights(model: Module, target_layers: list[str]):
+def unfreeze_selected_weights(model: nn.Module, target_layers: list[str]):
     """Unfreezes only parameters on specified layers."""
     for name, param in model.named_parameters():
         if not any(layer in name for layer in target_layers):
             param.requires_grad_(True)
-def clip_gradients(model: Module, max_norm: float = 1.0):
+def clip_gradients(model: nn.Module, max_norm: float = 1.0):
     """Applies gradient clipping."""
     return nn.utils.clip_grad_norm_(model.parameters(), max_norm)
@@ -576,7 +577,7 @@ def masked_cross_entropy(
     return loss
-class NoiseScheduler(Module):
+class NoiseScheduler(nn.Module):
     def __init__(self, timesteps: int = 512):
         super().__init__()

lt_tensor/{_basics.py → model_base.py} RENAMED Viewed

@@ -2,17 +2,35 @@ __all__ = ["Model"]
 import warnings
-from ._torch_commons import *
+from .torch_commons import *
+from lt_utils.common import *
-ROOT_DEVICE = torch.device(torch.zeros(1).device)
+T = TypeVar("T")
+ROOT_DEVICE = torch.zeros(1).device
-class _ModelDevice(nn.Module):
+POSSIBLE_OUTPUT_TYPES: TypeAlias = Union[
+    Tensor,
+    Sequence[Tensor],
+    Dict[Union[str, Tensor, Any], Union[Sequence[Tensor], Tensor, Any]],
+]
+class Model(nn.Module, ABC):
     """
     This makes it easier to assign a device and retrieves it later
     """
     _device: torch.device = ROOT_DEVICE
+    _autocast: bool = False
+    @property
+    def autocast(self):
+        return self._autocast
+    @autocast.setter
+    def autocast(self, value: bool):
+        self._autocast = value
     @property
     def device(self):
@@ -127,18 +145,6 @@ class _ModelDevice(nn.Module):
         self.device = "cpu"
         return self
-class Model(_ModelDevice, ABC):
-    _autocast: bool = False
-    @property
-    def autocast(self):
-        return self._autocast
-    @autocast.setter
-    def autocast(self, value: bool):
-        self._autocast = value
     def count_trainable_parameters(self, module_name: Optional[str] = None):
         """Gets the number of trainable parameters from either the entire model or from a specific module."""
         if module_name is not None:
@@ -263,6 +269,13 @@ class Model(_ModelDevice, ABC):
             self.train()
         return self(*args, **kwargs)
+    @torch.autocast(device_type=_device.type)
+    def ac_forward(self, *args, **kwargs):
+        return
+    def __call__(self, *args, **kwds) -> POSSIBLE_OUTPUT_TYPES:
+        return super().__call__(*args, **kwds)
     @abstractmethod
     def forward(
         self, *args, **kwargs

lt_tensor/model_zoo/__init__.py CHANGED Viewed

@@ -1,9 +1,11 @@
 __all__ = [
     "bsc",  # basic
     "rsd",  # residual
-    "tfr",  # transformer
+    "tfrms",  # transformer
     "pos",  # positional encoders
     "fsn",  # fusion
-    "dfs",  # diffusion
+    "gns",  # generators
+    "disc", # discriminators
+    "istft" # self-explanatory
 ]
-from . import bsc, dfs, fsn, pos, rsd, tfr
+from . import bsc, fsn, gns, istft, pos, rsd, tfrms, disc

lt_tensor/model_zoo/bsc.py CHANGED Viewed

@@ -10,8 +10,8 @@ __all__ = [
     "MultiScaleEncoder1D",
 ]
-from .._torch_commons import *
-from .._basics import Model
+from ..torch_commons import *
+from ..model_base import Model
 from ..transform import get_sinusoidal_embedding

lt_tensor/model_zoo/disc.py ADDED Viewed

@@ -0,0 +1,155 @@
+from ..torch_commons import *
+import torch.nn.functional as F
+from lt_tensor.model_base import Model
+from lt_utils.common import *
+class PeriodDiscriminator(Model):
+    def __init__(
+        self,
+        period: int,
+        use_spectral_norm=False,
+        kernel_size: int = 5,
+        stride: int = 3,
+        initial_s: int = 32,
+    ):
+        super().__init__()
+        self.period = period
+        self.norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        self.first_pass = nn.Sequential(
+            self.norm_f(
+                nn.Conv2d(
+                    1, initial_s * 4, (kernel_size, 1), (stride, 1), padding=(2, 0)
+                )
+            ),
+            nn.LeakyReLU(0.1),
+        )
+        self._last_sz = initial_s * 4
+        self.convs = nn.ModuleList([self._get_next(i == 3) for i in range(4)])
+        self.post_conv = nn.Conv2d(1024, 1, (stride, 1), 1, padding=(1, 0))
+        self.kernel_size = kernel_size
+        self.stride = stride
+    def _get_next(self, is_last: bool = False):
+        in_dim = self._last_sz
+        self._last_sz *= 4
+        print(self._last_sz, "-----------------------")
+        stride = (self.stride, 1) if not is_last else 1
+        return nn.Sequential(
+            self.norm_f(
+                nn.Conv2d(
+                    in_dim,
+                    self._last_sz,
+                    (self.kernel_size, 1),
+                    stride,
+                    padding=(2, 0),
+                )
+            ),
+            nn.LeakyReLU(0.1),
+        )
+    def forward(self, x: torch.Tensor):
+        """
+        x: (B, T)
+        """
+        b, t = x.shape
+        if t % self.period != 0:
+            pad_len = self.period - (t % self.period)
+            x = F.pad(x, (0, pad_len), mode="reflect")
+            t = t + pad_len
+        x = x.view(b, 1, t // self.period, self.period)  # (B, 1, T//P, P)
+        f_map = []
+        x = self.first_pass(x)
+        f_map.append(x)
+        for conv in self.convs:
+            x = conv(x)
+            f_map.append(x)
+        x = self.post_conv(x)
+        f_map.append(x)
+        return x.flatten(1, -1), f_map
+class MultiPeriodDiscriminator(Model):
+    def __init__(self, periods=[2, 3, 5, 7, 11]):
+        super().__init__()
+        self.discriminators = nn.ModuleList([PeriodDiscriminator(p) for p in periods])
+    def forward(self, x: torch.Tensor):
+        """
+        x: (B, T)
+        Returns: list of tuples of outputs from each period discriminator and the f_map.
+        """
+        return [d(x) for d in self.discriminators]
+class ScaleDiscriminator(nn.Module):
+    def __init__(self, use_spectral_norm=False):
+        super().__init__()
+        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        self.convs = nn.ModuleList(
+            [
+                norm_f(nn.Conv1d(1, 128, 15, 1, padding=7)),
+                norm_f(nn.Conv1d(128, 128, 41, 2, groups=4, padding=20)),
+                norm_f(nn.Conv1d(128, 256, 41, 2, groups=16, padding=20)),
+                norm_f(nn.Conv1d(256, 512, 41, 4, groups=16, padding=20)),
+                norm_f(nn.Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
+                norm_f(nn.Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
+                norm_f(nn.Conv1d(1024, 1024, 5, 1, padding=2)),
+            ]
+        )
+        self.post_conv = norm_f(nn.Conv1d(1024, 1, 3, 1, padding=1))
+        self.activation = nn.LeakyReLU(0.1)
+    def forward(self, x: torch.Tensor):
+        """
+        x: (B, T)
+        """
+        f_map = []
+        x = x.unsqueeze(1)  # (B, 1, T)
+        for conv in self.convs:
+            x = self.activation(conv(x))
+            f_map.append(x)
+        x = self.post_conv(x)
+        f_map.append(x)
+        return x.flatten(1, -1), f_map
+class MultiScaleDiscriminator(Model):
+    def __init__(self):
+        super().__init__()
+        self.pooling = nn.AvgPool1d(4, 2, padding=2)
+        self.discriminators = nn.ModuleList(
+            [ScaleDiscriminator(i == 0) for i in range(3)]
+        )
+    def forward(self, x: torch.Tensor):
+        """
+        x: (B, T)
+        Returns: list of outputs from each scale discriminator
+        """
+        outputs = []
+        for i, d in enumerate(self.discriminators):
+            if i != 0:
+                x = self.pooling(x)
+            outputs.append(d(x))
+        return outputs
+class GeneralLossDescriminator(Model):
+    """TODO: build an unified loss for both mpd and msd here."""
+    def __init__(self):
+        super().__init__()
+        self.mpd = MultiPeriodDiscriminator()
+        self.msd = MultiScaleDiscriminator()
+    def _get_group_(self):
+        pass
+    def forward(self, x: Tensor, y_hat: Tensor):
+        return

lt_tensor/model_zoo/fsn.py CHANGED Viewed

@@ -6,8 +6,8 @@ __all__ = [
     "GatedFusion",
 ]
-from .._torch_commons import *
-from .._basics import Model
+from ..torch_commons import *
+from ..model_base import Model
 class ConcatFusion(Model):
@@ -39,7 +39,7 @@ class BilinearFusion(Model):
         return self.bilinear(a, b)
-class CrossAttentionFusion(nn.Module):
+class CrossAttentionFusion(Model):
     def __init__(self, q_dim: int, kv_dim: int, n_heads: int = 4, d_model: int = 256):
         super().__init__()
         self.q_proj = nn.Linear(q_dim, d_model)
@@ -57,7 +57,7 @@ class CrossAttentionFusion(nn.Module):
         return output
-class GatedFusion(nn.Module):
+class GatedFusion(Model):
     def __init__(self, in_dim: int):
         super().__init__()
         self.gate = nn.Sequential(nn.Linear(in_dim * 2, in_dim), nn.Sigmoid())

lt_tensor/model_zoo/{dfs.py → gns.py} RENAMED Viewed

@@ -7,11 +7,13 @@ __all__ = [
     "NoisePredictor1D",
 ]
-from .._torch_commons import *
-from .._basics import Model
-from .rsd import ResBlock1D
+from ..torch_commons import *
+from ..model_base import Model
+from .rsd import ResBlock1D, ResBlocks
 from ..misc_utils import log_tensor
+import torch.nn.functional as F
 class Downsample1D(Model):
     def __init__(
@@ -179,3 +181,5 @@ class NoisePredictor1D(Model):
             cond_proj = self.proj(cond).unsqueeze(-1)  # [B, hidden, 1]
             x = x + cond_proj  # simple conditioning
         return self.net(x)  # [B, C, T]

lt_tensor/model_zoo/istft.py ADDED Viewed

@@ -0,0 +1,108 @@
+from ..torch_commons import *
+from ..model_base import Model
+from .rsd import ResBlocks
+from ..misc_utils import log_tensor
+import torch.nn.functional as F
+class Generator(Model):
+    """Based on the adaptation made by from Rishikesh
+    A Generator for audio processing, can be usd for tother things."""
+    def __init__(
+        self,
+        in_channels: int = 80,
+        upsample_rates: List[Union[int, List[int]]] = [8, 8],
+        upsample_kernel_sizes: List[Union[int, List[int]]] = [16, 16],
+        upsample_initial_channel: int = 512,
+        resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
+        resblock_dilation_sizes: List[Union[int, List[int]]] = [
+            [1, 3, 5],
+            [1, 3, 5],
+            [1, 3, 5],
+        ],
+        n_fft: int = 16,
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.num_upsamples = len(upsample_rates)
+        self.conv_pre = weight_norm(
+            nn.Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)
+        )
+        self.blocks = nn.ModuleList()
+        self.activation = activation
+        for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
+            self.blocks.append(
+                self._make_blocks(
+                    (i, k, u),
+                    upsample_initial_channel,
+                    resblock_kernel_sizes,
+                    resblock_dilation_sizes,
+                )
+            )
+        ch = upsample_initial_channel // (2 ** (i + 1))
+        self.post_n_fft = n_fft // 2 + 1
+        self.conv_post = weight_norm(nn.Conv1d(ch, n_fft + 2, 7, 1, padding=3))
+        self.conv_post.apply(self.init_weights)
+        self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
+    def _make_blocks(
+        self,
+        state: Tuple[int, int, int],
+        upsample_initial_channel: int,
+        resblock_kernel_sizes: List[Union[int, List[int]]],
+        resblock_dilation_sizes: List[int | List[int]],
+    ):
+        i, k, u = state
+        channels = upsample_initial_channel // (2 ** (i + 1))
+        return nn.ModuleDict(
+            dict(
+                up=nn.Sequential(
+                    self.activation,
+                    weight_norm(
+                        nn.ConvTranspose1d(
+                            upsample_initial_channel // (2**i),
+                            channels,
+                            k,
+                            u,
+                            padding=(k - u) // 2,
+                        )
+                    ),
+                ),
+                residual=ResBlocks(
+                    channels,
+                    resblock_kernel_sizes,
+                    resblock_dilation_sizes,
+                    self.activation,
+                ),
+            )
+        )
+    def forward(self, x):
+        x = self.conv_pre(x)
+        for block in self.blocks:
+            x = block["up"](x)
+            x = block["residual"](x)
+        x = self.reflection_pad(x)
+        x = self.conv_post(x)
+        spec = torch.exp(x[:, : self.post_n_fft, :])
+        phase = torch.sin(x[:, self.post_n_fft :, :])
+        return spec, phase
+    def remove_weight_norm(self):
+        for module in self.modules():
+            try:
+                remove_weight_norm(module)
+            except ValueError:
+                pass  # Not normed, skip
+    @staticmethod
+    def init_weights(m, mean=0.0, std=0.01):
+        classname = m.__class__.__name__
+        if "Conv" in classname:
+            m.weight.data.normal_(mean, std)

lt_tensor/model_zoo/pos.py CHANGED Viewed

@@ -5,11 +5,11 @@ __all__ = [
 ]
 import math
-from .._torch_commons import *
-from .._basics import Model
+from ..torch_commons import *
+from ..model_base import Model
-class RotaryEmbedding(Module):
+class RotaryEmbedding(nn.Module):
     def __init__(self, dim: int, base: int = 10000):
         """
         Rotary Positional Embedding Module.
@@ -76,7 +76,7 @@ class RotaryEmbedding(Module):
         return x_rotated.view(b, s, d)  # Back to [b, s, d]
-class PositionalEncoding(Module):
+class PositionalEncoding(nn.Module):
     def __init__(self, d_model: int, max_len: int = 8192):
         super().__init__()
         # create a matrix of [seq_len, hidden_dim] representing positional encoding for each token in sequence
@@ -100,7 +100,7 @@ class PositionalEncoding(Module):
         return x
-class LearnedPositionalEncoding(Module):
+class LearnedPositionalEncoding(nn.Module):
     def __init__(self, max_len: int, dim_model: int, dropout: float = 0.1):
         super().__init__()
         self.embedding = nn.Embedding(max_len, dim_model)

lt_tensor/model_zoo/rsd.py CHANGED Viewed

@@ -1,23 +1,24 @@
 __all__ = [
     "spectral_norm_select",
+    "ResBlock1D_BT",
     "ResBlock1D",
     "ResBlock2D",
-    "ResBlock1D_S",
+    "ResBlocks",
 ]
-from .._torch_commons import *
-from .._basics import Model
+from lt_utils.common import *
+from ..torch_commons import *
+from ..model_base import Model
 import math
 from ..misc_utils import log_tensor
-def spectral_norm_select(module: Module, enabled: bool):
+def spectral_norm_select(module: nn.Module, enabled: bool):
     if enabled:
         return spectral_norm(module)
     return module
-class ResBlock1D(Model):
+class ResBlock1D_BT(Model):
     def __init__(
         self,
         in_channels: int,
@@ -106,6 +107,103 @@ class ResBlock1D(Model):
             m.weight.data.normal_(mean, std)
+class ResBlock1D(Model):
+    def __init__(
+        self,
+        channels,
+        kernel_size=3,
+        dilation=(1, 3, 5),
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super(ResBlock1D, self).__init__()
+        self.convs = nn.ModuleList(
+            [
+                self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
+                for i in range(3)
+            ]
+        )
+        self.convs.apply(self.init_weights)
+    def _get_conv_layer(self, id, ch, k, stride, d, actv):
+        get_padding = lambda ks, d: int((ks * d - d) / 2)
+        return nn.Sequential(
+            actv,  # 1
+            weight_norm(
+                nn.Conv1d(
+                    ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
+                )
+            ),  # 2
+            actv,  # 3
+            weight_norm(
+                nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
+            ),  # 4
+        )
+    def forward(self, x: torch.Tensor):
+        for cnn in self.convs:
+            x = cnn(x) + x
+        return x
+    def remove_weight_norm(self):
+        for module in self.modules():
+            try:
+                remove_weight_norm(module)
+            except ValueError:
+                pass  # Not normed, skip
+    @staticmethod
+    def init_weights(m, mean=0.0, std=0.01):
+        classname = m.__class__.__name__
+        if "Conv" in classname:
+            m.weight.data.normal_(mean, std)
+class ResBlocks(Model):
+    def __init__(
+        self,
+        channels: int,
+        resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11],
+        resblock_dilation_sizes: List[Union[int, List[int]]] = [
+            [1, 3, 5],
+            [1, 3, 5],
+            [1, 3, 5],
+        ],
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.rb = nn.ModuleList()
+        self.activation = activation
+        for k, j in zip(resblock_kernel_sizes, resblock_dilation_sizes):
+            self.rb.append(ResBlock1D(channels, k, j, activation))
+        self.rb.apply(self.init_weights)
+    def forward(self, x: torch.Tensor):
+        xs = None
+        for i, block in enumerate(self.rb):
+            if i == 0:
+                xs = block(x)
+            else:
+                xs += block(x)
+        x = xs / self.num_kernels
+        return self.activation(x)
+    def remove_weight_norm(self):
+        for module in self.modules():
+            try:
+                remove_weight_norm(module)
+            except ValueError:
+                pass  # Not normed, skip
+    @staticmethod
+    def init_weights(m, mean=0.0, std=0.01):
+        classname = m.__class__.__name__
+        if "Conv" in classname:
+            m.weight.data.normal_(mean, std)
 class ResBlock2D(Model):
     def __init__(
         self,
@@ -137,22 +235,3 @@ class ResBlock2D(Model):
     def forward(self, x):
         return (self.block(x) + self.skip(x)) / self.sqrt_2
-class ResBlock1D_S(Model):
-    """Simplified version"""
-    def __init__(self, channels: int, kernel_size: int = 3, dilation: int = 1):
-        super().__init__()
-        padding = (kernel_size - 1) // 2 * dilation
-        self.net = nn.Sequential(
-            nn.Conv1d(
-                channels, channels, kernel_size, padding=padding, dilation=dilation
-            ),
-            nn.LeakyReLU(0.1),
-            nn.Conv1d(channels, channels, kernel_size, padding=padding, dilation=1),
-        )
-        self.activation = nn.LeakyReLU(0.1)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.activation(x + self.net(x))

lt_tensor/model_zoo/{tfr.py → tfrms.py} RENAMED Viewed

@@ -7,10 +7,10 @@ __all__ = [
 ]
 import math
-from .._torch_commons import *
-from .._basics import Model
+from ..torch_commons import *
+from ..model_base import Model
 from lt_utils.misc_utils import default
+from typing import Optional
 from .pos import *
 from .bsc import FeedForward

lt_tensor/torch_commons.py ADDED Viewed

@@ -0,0 +1,30 @@
+__all__ = [
+    "nn",
+    "torch",
+    "optim",
+    "Tensor",
+    "FloatTensor",
+    "LongTensor",
+    "HalfTensor",
+    "remove_weight_norm",
+    "remove_spectral_norm",
+    "weight_norm",
+    "spectral_norm",
+    "DeviceType",
+    # frequent typing
+    "Optional",
+    "List",
+    "Dict",
+    "Tuple",
+    "Union",
+    "TypeAlias",
+    "Sequence",
+    "Any",
+]
+import torch
+from torch.nn.utils import remove_weight_norm, remove_spectral_norm
+from torch.nn.utils.parametrizations import weight_norm, spectral_norm
+from torch import nn, optim, Tensor, FloatTensor, LongTensor, HalfTensor
+from typing import TypeAlias, Union, Optional, List, Dict, Tuple, Sequence, Any
+DeviceType: TypeAlias = Union[torch.device, str]

lt_tensor/transform.py CHANGED Viewed

@@ -20,10 +20,12 @@ __all__ = [
     "stft_istft_rebuild",
 ]
-from ._torch_commons import *
+from .torch_commons import *
 import torchaudio
 import math
 from .misc_utils import log_tensor
+from lt_utils.common import *
+import torch.nn.functional as F
 def to_mel_spectrogram(
@@ -196,7 +198,7 @@ def get_sinusoidal_embedding(timesteps: torch.Tensor, dim: int) -> torch.Tensor:
     return emb
-def _generate_window(
+def generate_window(
     M: int, alpha: float = 0.5, device: Optional[DeviceType] = None
 ) -> Tensor:
     if M < 1:
@@ -281,7 +283,7 @@ def window_sumsquare(
     x = torch.zeros(total_length, dtype=dtype, device=device)
     # Get the window (from scipy for now)
-    win = _generate_window(window_spec, win_length, fftbins=True)
+    win = generate_window(window_spec, win_length, fftbins=True)
     win = torch.tensor(win, dtype=dtype, device=device)
     # Normalize and square
@@ -301,14 +303,14 @@ def window_sumsquare(
 def inverse_transform(
     spec: Tensor,
     phase: Tensor,
-    window: Optional[Tensor] = None,
     n_fft: int = 2048,
     hop_length: int = 300,
     win_length: int = 1200,
     length: Optional[Any] = None,
+    window: Optional[Tensor] = None,
 ):
     if window is None:
-        window = _generate_window(win_length)
+        window = torch.hann_window(win_length or n_fft).to(spec.device)
     return torch.istft(
         spec * torch.exp(phase * 1j),
         n_fft,
@@ -317,33 +319,3 @@ def inverse_transform(
         window=window,
         length=length,
     )
-def stft_istft_rebuild(
-    input_data: Tensor,
-    window: Optional[Tensor] = None,
-    n_fft: int = 2048,
-    hop_length: int = 300,
-    win_length: int = 1200,
-):
-    """
-    Perform STFT followed by ISTFT reconstruction using magnitude and phase.
-    """
-    if window is None:
-        window = _generate_window(win_length)
-    st = torch.stft(
-        input_data,
-        n_fft,
-        hop_length,
-        win_length,
-        window=window,
-        return_complex=True,
-    )
-    return torch.istft(
-        torch.abs(st) * torch.exp(1j * torch.angle(st)),
-        n_fft,
-        hop_length,
-        win_length,
-        window=window,
-        length=input_data.shape[-1],
-    ).squeeze(0)

{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lt-tensor
-Version: 0.0.1a0
+Version: 0.0.1a3
 Summary: General utilities for PyTorch and others. Built for general use.
 Home-page: https://github.com/gr1336/lt-tensor/
 Author: gr1336
@@ -17,7 +17,8 @@ Requires-Dist: numpy>=1.26.4
 Requires-Dist: tokenizers
 Requires-Dist: pyyaml>=6.0.0
 Requires-Dist: numba>0.60.0
-Requires-Dist: lt-utils==0.0.1a0
+Requires-Dist: lt-utils==0.0.1.a3
+Requires-Dist: librosa>=0.11.0
 Dynamic: author
 Dynamic: classifier
 Dynamic: description

lt_tensor-0.0.1a3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,24 @@
+lt_tensor/__init__.py,sha256=bvCjaIsYjbGFbR5MNezgLyRgN4_CsyrjmVEvuClsgOU,303
+lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
+lt_tensor/math_ops.py,sha256=ZtnJ9WB-pbFQLsXuNfQl2dAaeob5BWfxmhkwpxITUZ4,2066
+lt_tensor/misc_utils.py,sha256=e44FCQbjNHP-4WOHIbtqqH0x590DzUE6CrD_4Vl_d38,19880
+lt_tensor/model_base.py,sha256=tmRu5pTcELKMFcybOiZ1thJPuJWRSPkbUUtp9Y1NJWw,9555
+lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
+lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
+lt_tensor/transform.py,sha256=IVAaQlq12OvMVhX3lX4lgsTCJYJce5n5MtMy7IK_AU4,8892
+lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lt_tensor/datasets/audio.py,sha256=5Bn9Apb3K5QnRah2EfhztcatBRsnpQsdItm_jTaDrUs,3350
+lt_tensor/model_zoo/__init__.py,sha256=jipEk50_DTMQbGg8FnDDukxmh7Bcwvl_QVRS3rkb7aY,283
+lt_tensor/model_zoo/bsc.py,sha256=muxIR7dU-Pvf-HFE-iy3zmRb1sTJlcs1vqdlnbU1Hss,6307
+lt_tensor/model_zoo/disc.py,sha256=ND6JR_x6b2Y1VqxZejalv8Cz5_TO3H_Z-0x6UnACbBM,4740
+lt_tensor/model_zoo/fsn.py,sha256=5ySsg2OHjvTV_coPAdZQ0f7bz4ugJB8mDYsItmd61qA,2102
+lt_tensor/model_zoo/gns.py,sha256=Tirr_grONp_FFQ_L7K-zV2lvkaC39h8mMl4QDpx9vLQ,6028
+lt_tensor/model_zoo/istft.py,sha256=RV7KVY7q4CYzzsWXH4NGJQwSqrYWwHh-16Q62lKoA2k,3594
+lt_tensor/model_zoo/pos.py,sha256=N28v-rF8CELouYxQ9r45Jbd4ri5DNydwDgg7nzmQ4Ig,4471
+lt_tensor/model_zoo/rsd.py,sha256=5bba50g1Hm5kMexuJ4SwOIJuyQ1qJd8Acrq-Ax6CqE8,6958
+lt_tensor/model_zoo/tfrms.py,sha256=kauh-A13pk08SZ5OspEE5a-gPKD4rZr6tqMKWu3KGhk,4237
+lt_tensor-0.0.1a3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lt_tensor-0.0.1a3.dist-info/METADATA,sha256=T5Gya3J6YebHzwR0gyvJ8lr5Rj9EJWtLSoo7--CSado,968
+lt_tensor-0.0.1a3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lt_tensor-0.0.1a3.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
+lt_tensor-0.0.1a3.dist-info/RECORD,,

lt_tensor/_torch_commons.py DELETED Viewed

@@ -1,12 +0,0 @@
-import torch
-from torch import nn, optim
-import torch.nn.functional as F
-from torch.optim import Optimizer
-from torch.nn import Module, L1Loss, MSELoss
-from torch.nn.utils import remove_weight_norm
-from torch import Tensor, FloatTensor, device, LongTensor
-from torch.nn.utils.parametrizations import weight_norm, spectral_norm
-from lt_utils.common import *
-DeviceType: TypeAlias = Union[device, str]

lt_tensor-0.0.1a0.dist-info/RECORD DELETED Viewed

@@ -1,20 +0,0 @@
-lt_tensor/__init__.py,sha256=pUB05ZkgkpP10ivzwoWdbq_HCxw-iOsbf6m8eFtx-YM,26
-lt_tensor/_basics.py,sha256=Zty5XZ5qeVFoZJRhtpGvOH7rg9hbAS7mIULOdrOKBDQ,9189
-lt_tensor/_torch_commons.py,sha256=_2Eck-MsQ46PxW5ku7NJvNSL5vg54_4GkLCqdzFevwA,402
-lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
-lt_tensor/math_ops.py,sha256=j4Arst-kOdm0bcZbXD4rzcVdiyYOJ59ZQQIyH7r0Wug,2067
-lt_tensor/misc_utils.py,sha256=3r6ikrBCj2IjSWZMRU1Lif0OgYTF3HExANG_IqhPtic,19799
-lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
-lt_tensor/transform.py,sha256=IYPT2YHT9NDvHrdtJvTLmxL9Cm26Ck2Uc9zE0k6l2aI,9504
-lt_tensor/model_zoo/__init__.py,sha256=ybyd3St8wiswnBGKFcy6FqRo5NlfGPJPC7jbRJlTlv8,205
-lt_tensor/model_zoo/bsc.py,sha256=6jBICcy8FT81EUiN9g1eZuHhPF4xA7gzS5kaVT3RngU,6305
-lt_tensor/model_zoo/dfs.py,sha256=0dTA1aveZT5OZu8eI6Cb8q8IGSjZyFYDcfc2FpDH5S8,5980
-lt_tensor/model_zoo/fsn.py,sha256=YDu1sbLwJwSKCPlmPlqQujivlgfNvwpwGa5q4SY9MYk,2108
-lt_tensor/model_zoo/pos.py,sha256=L2j6zYkdBWjrgROJt4cFOwdnne6j94m2lGi9m_QC7oc,4460
-lt_tensor/model_zoo/rsd.py,sha256=QGfkhoP7BVCGlCyBkIxHE7eWUp71JFkK6bM4dgBw1Hw,4720
-lt_tensor/model_zoo/tfr.py,sha256=mIwu6WqDxcLGlBfofIIspzGpUe2jsR0hrzT9mEW-MHE,4208
-lt_tensor-0.0.1a0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lt_tensor-0.0.1a0.dist-info/METADATA,sha256=hQVkxd4J5C7KX1DRVVYkIVKK0MIlGf-0kSLQ--HkTdY,936
-lt_tensor-0.0.1a0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lt_tensor-0.0.1a0.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
-lt_tensor-0.0.1a0.dist-info/RECORD,,

{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/WHEEL RENAMED Viewed

File without changes

{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lt_tensor-0.0.1a0.dist-info → lt_tensor-0.0.1a3.dist-info}/top_level.txt RENAMED Viewed

File without changes

lt-tensor 0.0.1a0__py3-none-any.whl → 0.0.1a3__py3-none-any.whl

lt-tensor 0.0.1a0py3-none-any.whl → 0.0.1a3py3-none-any.whl