PyPI - lt-tensor - Versions diffs - 0.0.1a34__py3-none-any.whl → 0.0.1a35__py3-none-any.whl - Mend

lt-tensor 0.0.1a34py3-none-any.whl → 0.0.1a35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lt_tensor/__init__.py +1 -1
lt_tensor/losses.py +11 -7
lt_tensor/lr_schedulers.py +147 -21
lt_tensor/misc_utils.py +35 -42
lt_tensor/model_zoo/activations/__init__.py +3 -0
lt_tensor/model_zoo/activations/alias_free/__init__.py +3 -0
lt_tensor/model_zoo/activations/{alias_free_torch → alias_free}/act.py +8 -6
lt_tensor/model_zoo/activations/snake/__init__.py +41 -43
lt_tensor/model_zoo/audio_models/__init__.py +2 -2
lt_tensor/model_zoo/audio_models/bigvgan/__init__.py +243 -0
lt_tensor/model_zoo/audio_models/hifigan/__init__.py +16 -347
lt_tensor/model_zoo/audio_models/istft/__init__.py +14 -349
lt_tensor/model_zoo/audio_models/resblocks.py +248 -0
lt_tensor/model_zoo/convs.py +21 -32
lt_tensor/model_zoo/losses/discriminators.py +143 -37
{lt_tensor-0.0.1a34.dist-info → lt_tensor-0.0.1a35.dist-info}/METADATA +1 -1
lt_tensor-0.0.1a35.dist-info/RECORD +40 -0
lt_tensor/model_zoo/activations/alias_free_torch/__init__.py +0 -1
lt_tensor-0.0.1a34.dist-info/RECORD +0 -37
/lt_tensor/model_zoo/activations/{alias_free_torch → alias_free}/filter.py +0 -0
/lt_tensor/model_zoo/activations/{alias_free_torch → alias_free}/resample.py +0 -0
{lt_tensor-0.0.1a34.dist-info → lt_tensor-0.0.1a35.dist-info}/WHEEL +0 -0
{lt_tensor-0.0.1a34.dist-info → lt_tensor-0.0.1a35.dist-info}/licenses/LICENSE +0 -0
{lt_tensor-0.0.1a34.dist-info → lt_tensor-0.0.1a35.dist-info}/top_level.txt +0 -0

lt_tensor/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.0.1a34"
+__version__ = "0.0.1a35"
 from . import (
     lr_schedulers,

lt_tensor/losses.py CHANGED Viewed

@@ -130,7 +130,9 @@ class MultiMelScaleLoss(Model):
         hops: List[int] = [8, 16, 32, 64, 128, 256, 512],
         f_min: float = [0, 0, 0, 0, 0, 0, 0],
         f_max: Optional[float] = [None, None, None, None, None, None, None],
-        loss_fn: Callable = nn.L1Loss(),
+        loss_mel_fn: Callable[[Tensor, Tensor], Tensor] = nn.L1Loss(),
+        loss_pitch_fn: Callable[[Tensor, Tensor], Tensor] = nn.L1Loss(),
+        loss_rms_fn: Callable[[Tensor, Tensor], Tensor] = nn.L1Loss(),
         center: bool = True,
         power: float = 1.0,
         normalized: bool = False,
@@ -141,8 +143,8 @@ class MultiMelScaleLoss(Model):
         use_istft_norm: bool = True,
         use_pitch_loss: bool = True,
         use_rms_loss: bool = True,
-        norm_pitch_fn: Callable[[Tensor], Tensor] = normalize_unit_norm,
-        norm_rms_fn: Callable[[Tensor], Tensor] = normalize_unit_norm,
+        norm_pitch_fn: Callable[[Tensor], Tensor] = normalize_minmax,
+        norm_rms_fn: Callable[[Tensor], Tensor] = normalize_zscore,
         lambda_mel: float = 1.0,
         lambda_rms: float = 1.0,
         lambda_pitch: float = 1.0,
@@ -157,7 +159,9 @@ class MultiMelScaleLoss(Model):
             == len(f_min)
             == len(f_max)
         )
-        self.loss_fn = loss_fn
+        self.loss_mel_fn = loss_mel_fn
+        self.loss_pitch_fn = loss_pitch_fn
+        self.loss_rms_fn = loss_rms_fn
         self.lambda_mel = lambda_mel
         self.weight = weight
         self.use_istft_norm = use_istft_norm
@@ -257,21 +261,21 @@ class MultiMelScaleLoss(Model):
             x_mels = M(input_proc)
             y_mels = M(target_proc)
-            loss = self.loss_fn(x_mels.squeeze(), y_mels.squeeze())
+            loss = self.loss_mel_fn(x_mels.squeeze(), y_mels.squeeze())
             losses += loss * self.lambda_mel
             # pitch/f0 loss
             if self.use_pitch_loss:
                 x_pitch = self.norm_pitch_fn(M.compute_pitch(input_proc))
                 y_pitch = self.norm_pitch_fn(M.compute_pitch(target_proc))
-                f0_loss = self.loss_fn(x_pitch, y_pitch)
+                f0_loss = self.loss_pitch_fn(x_pitch, y_pitch)
                 losses += f0_loss * self.lambda_pitch
             # energy/rms loss
             if self.use_rms_loss:
                 x_rms = self.norm_rms(M.compute_rms(input_proc, x_mels))
                 y_rms = self.norm_rms(M.compute_rms(target_proc, y_mels))
-                rms_loss = self.loss_fn(x_rms, y_rms)
+                rms_loss = self.loss_rms_fn(x_rms, y_rms)
                 losses += rms_loss * self.lambda_rms
         return losses * self.weight

lt_tensor/lr_schedulers.py CHANGED Viewed

@@ -1,15 +1,20 @@
 __all__ = [
     "WarmupDecayScheduler",
     "AdaptiveDropScheduler",
-    "WaveringLRScheduler",
+    "SinusoidalDecayLR",
+    "GuidedWaveringLR",
+    "FloorExponentialLR",
 ]
 import math
 from torch.optim import Optimizer
-from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.lr_scheduler import LRScheduler
+from typing import Optional
+from numbers import Number
+from lt_tensor.misc_utils import update_lr
-class WarmupDecayScheduler(_LRScheduler):
+class WarmupDecayScheduler(LRScheduler):
     def __init__(
         self,
         optimizer: Optimizer,
@@ -49,7 +54,7 @@ class WarmupDecayScheduler(_LRScheduler):
         return lrs
-class AdaptiveDropScheduler(_LRScheduler):
+class AdaptiveDropScheduler(LRScheduler):
     def __init__(
         self,
         optimizer,
@@ -89,26 +94,147 @@ class AdaptiveDropScheduler(_LRScheduler):
         return [group["lr"] for group in self.optimizer.param_groups]
-class WaveringLRScheduler(_LRScheduler):
+class SinusoidalDecayLR(LRScheduler):
     def __init__(
-        self, optimizer, base_lr, max_lr, period=1000, decay=0.999, last_epoch=-1
+        self,
+        optimizer: Optimizer,
+        initial_lr: float = 1e-3,
+        target_lr: float = 1e-5,
+        floor_lr: float = 1e-7,
+        decay_rate: float = 1e-6,  # decay per period
+        wave_amplitude: float = 1e-5,
+        period: int = 256,
+        last_epoch: int = -1,
     ):
-        """
-        Sinusoidal-like oscillating LR. Can escape shallow local minima.
-        - base_lr: minimum LR
-        - max_lr: maximum LR
-        - period: full sine cycle in steps
-        - decay: multiplies max_lr each cycle
-        """
-        self.base_lr = base_lr
-        self.max_lr = max_lr
+        assert decay_rate != 0.0, "decay_rate must be different from 0.0"
+        assert (
+            initial_lr >= target_lr >= floor_lr
+        ), "Must satisfy: initial_lr ≥ target_lr ≥ floor_lr"
+        self.initial_lr = initial_lr
+        self.target_lr = target_lr
+        self.floor_lr = floor_lr
+        self.decay_rate = decay_rate
+        self.wave_amplitude = wave_amplitude
         self.period = period
-        self.decay = decay
         super().__init__(optimizer, last_epoch)
     def get_lr(self):
-        cycle = self.last_epoch // self.period
-        step_in_cycle = self.last_epoch % self.period
-        factor = math.sin(math.pi * step_in_cycle / self.period)
-        amplitude = (self.max_lr - self.base_lr) * (self.decay**cycle)
-        return [self.base_lr + amplitude * factor for _ in self.optimizer.param_groups]
+        step = self.last_epoch + 1
+        cycles = step // self.period
+        t = step % self.period
+        # Decay center down to target_lr, then freeze
+        center_decay = math.exp(-self.decay_rate * cycles)
+        center = max(self.target_lr, self.initial_lr * center_decay)
+        # Decay amplitude in sync with center (relative to initial)
+        amplitude_decay = math.exp(-self.decay_rate * cycles)
+        current_amplitude = self.wave_amplitude * self.initial_lr * amplitude_decay
+        sin_offset = math.sin(2 * math.pi * t / self.period)
+        lr = max(center + current_amplitude * sin_offset, self.floor_lr)
+        return [lr for _ in self.optimizer.param_groups]
+class GuidedWaveringLR(LRScheduler):
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        initial_lr: float = 1e-3,
+        target_lr: float = 1e-5,
+        floor_lr: float = 1e-7,
+        decay_rate: float = 0.01,
+        wave_amplitude: float = 0.02,
+        period: int = 256,
+        stop_decay_after: int = None,
+        last_epoch: int = -1,
+    ):
+        assert decay_rate != 0.0, "decay_rate must be non-zero"
+        assert (
+            initial_lr >= target_lr >= floor_lr
+        ), "Must satisfy: initial ≥ target ≥ floor"
+        self.initial_lr = initial_lr
+        self.target_lr = target_lr
+        self.floor_lr = floor_lr
+        self.decay_rate = decay_rate
+        self.wave_amplitude = wave_amplitude
+        self.period = period
+        self.stop_decay_after = stop_decay_after
+        super().__init__(optimizer, last_epoch)
+    def get_lr(self):
+        step = self.last_epoch + 1
+        cycles = step // self.period
+        t = step % self.period
+        decay_cycles = (
+            min(cycles, self.stop_decay_after) if self.stop_decay_after else cycles
+        )
+        center = max(
+            self.target_lr, self.initial_lr * math.exp(-self.decay_rate * decay_cycles)
+        )
+        amp = (
+            self.wave_amplitude
+            * self.initial_lr
+            * math.exp(-self.decay_rate * decay_cycles)
+        )
+        phase = 2 * math.pi * t / self.period
+        wave = math.sin(phase) * math.cos(phase)
+        lr = max(center + amp * wave, self.floor_lr)
+        return [lr for _ in self.optimizer.param_groups]
+class FloorExponentialLR(LRScheduler):
+    """Modified version from exponential lr, to have a minimum and reset functions.
+    Decays the learning rate of each parameter group by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        gamma (float): Multiplicative factor of learning rate decay.
+        last_epoch (int): The index of last epoch. Default: -1.
+    """
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        initial_lr: float = 1e-4,
+        gamma: float = 0.99998,
+        last_epoch: int = -1,
+        floor_lr: float = 1e-6,
+    ):
+        self.gamma = gamma
+        self.floor_lr = floor_lr
+        self.initial_lr = initial_lr
+        super().__init__(optimizer, last_epoch)
+    def set_floor(self, new_value: float):
+        assert isinstance(new_value, Number)
+        self.floor_lr = new_value
+    def reset_lr(self, new_value: Optional[float] = None):
+        new_lr = new_value if isinstance(new_value, Number) else self.initial_lr
+        self.initial_lr = new_lr
+        update_lr(self.optimizer, new_lr)
+    def get_lr(self):
+        if self.last_epoch == 0:
+            return [
+                max(group["lr"], self.floor_lr) for group in self.optimizer.param_groups
+            ]
+        return [
+            max(group["lr"] * self.gamma, self.floor_lr)
+            for group in self.optimizer.param_groups
+        ]
+    def _get_closed_form_lr(self):
+        return [
+            max(base_lr * self.gamma**self.last_epoch, self.floor_lr)
+            for base_lr in self.base_lrs
+        ]

lt_tensor/misc_utils.py CHANGED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "plot_view",
     "get_weights",
     "get_activated_conv",
+    "update_lr",
 ]
 import re
@@ -77,6 +78,33 @@ def get_activated_conv(
     )
+def get_loss_average(losses: List[float]):
+    """A little helper for training, for example:
+    ```python
+    losses = []
+    for epoch in range(100):
+        for inp, label in dataloader:
+            optimizer.zero_grad()
+            out = model(inp)
+            loss = loss_fn(out, label)
+            optimizer.step()
+            losses.append(loss.item())
+        print(f"Epoch {epoch+1} | Loss: {get_loss_average(losses):.4f}")
+    """
+    if not losses:
+        return float("nan")
+    return sum(losses) / len(losses)
+def update_lr(optimizer: optim.Optimizer, new_value: float = 1e-4):
+    for param_group in optimizer.param_groups:
+        if isinstance(param_group["lr"], Tensor):
+            param_group["lr"].fill_(new_value)
+        else:
+            param_group["lr"] = new_value
+    return optimizer
 def plot_view(
     data: Dict[str, List[Any]],
     title: str = "Loss",
@@ -520,49 +548,14 @@ def sample_tensor(tensor: torch.Tensor, num_samples: int = 5):
     return flat[idx]
-class TorchCacheUtils:
-    cached_shortcuts: dict[str, Callable[[None], None]] = {}
-    has_cuda: bool = torch.cuda.is_available()
-    has_xpu: bool = torch.xpu.is_available()
-    has_mps: bool = torch.mps.is_available()
-    _ignore: list[str] = []
-    def __init__(self):
-        pass
-    def _apply_clear(self, device: str):
-        if device in self._ignore:
-            gc.collect()
-            return
-        try:
-            clear_fn = self.cached_shortcuts.get(
-                device, getattr(torch, device).empty_cache
-            )
-            if device not in self.cached_shortcuts:
-                self.cached_shortcuts.update({device: clear_fn})
-        except Exception as e:
-            print(e)
-            self._ignore.append(device)
-    def clear(self):
-        gc.collect()
-        if self.has_xpu:
-            self._apply_clear("xpu")
-        if self.has_cuda:
-            self._apply_clear("cuda")
-        if self.has_mps:
-            self._apply_clear("mps")
-        gc.collect()
-_clear_cache_cls = TorchCacheUtils()
 def clear_cache():
-    _clear_cache_cls.clear()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    if torch.mps.is_available():
+        torch.mps.empty_cache()
+    if torch.xpu.is_available():
+        torch.xpu.empty_cache()
+    gc.collect()
 @cache_wrapper

lt_tensor/model_zoo/activations/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from . import alias_free, snake
+__all__ = ["snake", "alias_free"]

lt_tensor/model_zoo/activations/alias_free/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .act import *
+from .filter import *
+from .resample import *

lt_tensor/model_zoo/activations/{alias_free_torch → alias_free}/act.py RENAMED Viewed

@@ -1,15 +1,17 @@
-import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from .resample import UpSample1d, DownSample1d
-from .resample import UpSample2d, DownSample2d
+from lt_tensor.model_zoo.activations.alias_free.resample import (
+    UpSample2d,
+    DownSample2d,
+    UpSample1d,
+    DownSample1d,
+)
 class Activation1d(nn.Module):
     def __init__(
         self,
-        activation,
+        activation: nn.Module,
         up_ratio: int = 2,
         down_ratio: int = 2,
         up_kernel_size: int = 12,
@@ -34,7 +36,7 @@ class Activation2d(nn.Module):
     def __init__(
         self,
-        activation,
+        activation: nn.Module,
         up_ratio: int = 2,
         down_ratio: int = 2,
         up_kernel_size: int = 12,

lt_tensor/model_zoo/activations/snake/__init__.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# Implementation adapted from https://github.com/EdwardDixon/snake under the MIT license.
+# Implementation adapted and modified from https://github.com/EdwardDixon/snake under the MIT license.
 import torch
 from torch import nn, sin, pow
-from torch.nn import Parameter
 class Snake(nn.Module):
@@ -24,10 +23,11 @@ class Snake(nn.Module):
     def __init__(
         self,
-        in_features,
-        alpha=1.0,
-        alpha_trainable=True,
-        alpha_logscale=False,
+        in_features: int,
+        alpha: float = 1.0,
+        requires_grad: bool = True,
+        alpha_logscale: bool = False,
+        batched: bool = True,
     ):
         """
         Initialization.
@@ -37,31 +37,27 @@ class Snake(nn.Module):
             alpha is initialized to 1 by default, higher values = higher-frequency.
             alpha will be trained along with the rest of your model.
         """
-        super(Snake, self).__init__()
+        super().__init__()
         self.in_features = in_features
-        # initialize alpha
         self.alpha_logscale = alpha_logscale
-        if self.alpha_logscale:  # log scale alphas initialized to zeros
-            self.alpha = Parameter(torch.zeros(in_features) * alpha)
-        else:  # linear scale alphas initialized to ones
-            self.alpha = Parameter(torch.ones(in_features) * alpha)
+        param_fn = torch.zeros if self.alpha_logscale else torch.ones
+        _shape = (1, in_features, 1) if batched else (in_features, 1)
+        self.alpha = nn.Parameter(param_fn(_shape) * alpha, requires_grad=requires_grad)
+        self.eps = 1e-8
-        self.alpha.requires_grad = alpha_trainable
-        self.no_div_by_zero = 1e-7
+    def _log_scale(self):
+        if self.alpha_logscale:
+            return self.alpha.exp()
+        return self.alpha
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         """
         Forward pass of the function.
         Applies the function to the input elementwise.
         Snake ∶= x + 1/a * sin^2 (xa)
         """
-        alpha = self.alpha.unsqueeze(0).unsqueeze(-1)  # line up with x to [B, C, T]
-        if self.alpha_logscale:
-            alpha = torch.exp(alpha)
-        x = x + (1.0 / (alpha + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
+        alpha = self._log_scale()
+        x = x + (1.0 / (alpha + self.eps)) * pow(sin(x * alpha), 2)
         return x
@@ -84,7 +80,12 @@ class SnakeBeta(nn.Module):
     """
     def __init__(
-        self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False
+        self,
+        in_features: int,
+        alpha: float = 1.0,
+        requires_grad: bool = True,
+        alpha_logscale: bool = False,
+        batched: bool = True,
     ):
         """
         Initialization.
@@ -96,34 +97,31 @@ class SnakeBeta(nn.Module):
             beta is initialized to 1 by default, higher values = higher-magnitude.
             alpha will be trained along with the rest of your model.
         """
-        super(SnakeBeta, self).__init__()
+        super().__init__()
         self.in_features = in_features
         # initialize alpha
         self.alpha_logscale = alpha_logscale
-        if self.alpha_logscale:  # log scale alphas initialized to zeros
-            self.alpha = Parameter(torch.zeros(in_features) * alpha)
-            self.beta = Parameter(torch.zeros(in_features) * alpha)
-        else:  # linear scale alphas initialized to ones
-            self.alpha = Parameter(torch.ones(in_features) * alpha)
-            self.beta = Parameter(torch.ones(in_features) * alpha)
-        self.alpha.requires_grad = alpha_trainable
-        self.beta.requires_grad = alpha_trainable
+        """
+        if log scale alphas initialized to zeros
+        else linear scale alphas is initialized to ones
+        """
+        param_fn = torch.zeros if alpha_logscale else torch.ones
+        _shape = (1, in_features, 1) if batched else (in_features, 1)
+        self.alpha = nn.Parameter(param_fn(_shape) * alpha, requires_grad=requires_grad)
+        self.beta = nn.Parameter(param_fn(_shape) * alpha, requires_grad=requires_grad)
+        self.eps = 1e-8
-        self.no_div_by_zero = 1e-7
+    def _log_scale(self):
+        if self.alpha_logscale:
+            return self.alpha.exp(), self.beta.exp()
+        return self.alpha, self.beta
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         """
         Forward pass of the function.
         Applies the function to the input elementwise.
         SnakeBeta ∶= x + 1/b * sin^2 (xa)
         """
-        alpha = self.alpha.unsqueeze(0).unsqueeze(-1)  # line up with x to [B, C, T]
-        beta = self.beta.unsqueeze(0).unsqueeze(-1)
-        if self.alpha_logscale:
-            alpha = torch.exp(alpha)
-            beta = torch.exp(beta)
-        x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
-        return x
+        alpha, beta = self._log_scale()
+        return x + (1.0 / (beta + self.eps)) * pow(sin(x * alpha), 2)

lt_tensor/model_zoo/audio_models/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from . import diffwave, istft, hifigan
+from . import diffwave, istft, hifigan, bigvgan
-__all__ = ["diffwave", "istft", "hifigan"]
+__all__ = ["diffwave", "istft", "hifigan", "bigvgan"]

lt-tensor 0.0.1a34__py3-none-any.whl → 0.0.1a35__py3-none-any.whl

lt-tensor 0.0.1a34py3-none-any.whl → 0.0.1a35py3-none-any.whl