PyPI - lt-tensor - Versions diffs - 0.0.1a16__py3-none-any.whl → 0.0.1a18__py3-none-any.whl - Mend

lt-tensor 0.0.1a16py3-none-any.whl → 0.0.1a18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

lt_tensor/config_templates.py CHANGED Viewed

@@ -7,16 +7,14 @@ from lt_tensor.misc_utils import updateDict
 class ModelConfig(ABC, OrderedDict):
     _default_settings: Dict[str, Any] = {}
-    _forbidden_list: List[str] = [
-        "_settings",
-    ]
+    _forbidden_list: List[str] = ["_default_settings", "_forbidden_list" "path_name"]
     def __init__(
         self,
-        settings: Dict[str, Any] = None,
+        settings: Dict[str, Any] = {},
         path_name: Optional[Union[str, PathLike]] = None,
     ):
-        assert is_dict(settings)
+        assert is_dict(settings, False)
         self._default_settings = settings
         if path_name is not None and is_pathlike(path_name):
             if not str(path_name).endswith(".json"):
@@ -37,31 +35,41 @@ class ModelConfig(ABC, OrderedDict):
                 self.path_name += ".json"
     def reset_settings(self):
+        dk_keys = self.__dict__.keys()
         for s_name, setting in self._default_settings.items():
-            if s_name in self._forbidden_list:
+            if s_name in self._forbidden_list or s_name not in dk_keys:
                 continue
             updateDict(self, {s_name: setting})
     def save_config(
         self,
-        path_name: Union[PathLike, str],
+        path_name: Optional[Union[PathLike, str]] = None,
     ):
-        assert is_pathlike(
-            path_name, True
-        ), f"path_name should be a non-empty string or pathlike object! received instead: {path_name}"
-        self._setup_path_name(path_name)
-        base = {k: y for k, y in self.__dict__.items() if k not in self._forbidden_list}
-        save_json(self.path_name, base, indent=2)
+        if not is_pathlike(path_name, True):
+            assert (
+                path_name is None
+            ), f"path_name should be a non-empty string or pathlike object! received instead: {path_name}."
+            path_name = self.path_name
+        else:
+            self._setup_path_name(path_name)
-    def to_dict(self):
-        return {k: y for k, y in self.__dict__.items() if k not in self._forbidden_list}
+        base = self.get_state_dict()
+        save_json(self.path_name, base, indent=2)
     def set_value(self, var_name: str, value: str) -> None:
+        assert var_name in self.__dict__, "Value not registered!"
+        assert var_name not in self._forbidden_list, "Not allowed!"
         updateDict(self, {var_name: value})
     def get_value(self, var_name: str) -> Any:
         return self.__dict__.get(var_name)
+    def __getattribute__(self, name):
+        return self.__dict__.get(name)
+    def get_state_dict(self):
+        return {k: y for k, y in self.__dict__.items() if k not in self._forbidden_list}
     @classmethod
     def from_dict(
         cls, dictionary: Dict[str, Any], path: Optional[Union[str, PathLike]] = None

lt_tensor/model_base.py CHANGED Viewed

@@ -70,16 +70,6 @@ class LossTracker:
 class _Devices_Base(nn.Module):
     _device: torch.device = ROOT_DEVICE
-    _autocast: bool = False
-    _loss_history: LossTracker = LossTracker(100_000)
-    @property
-    def autocast(self):
-        return self._autocast
-    @autocast.setter
-    def autocast(self, value: bool):
-        self._autocast = value
     @property
     def device(self):
@@ -90,6 +80,30 @@ class _Devices_Base(nn.Module):
         assert isinstance(device, (str, torch.device))
         self._device = torch.device(device) if isinstance(device, str) else device
+    def _apply_device(self):
+        """Add here components that are needed to have device applied to them,
+        that usually the '.to()' function fails to apply
+        example:
+        ```
+        def _apply_device_to(self):
+            self.my_tensor = self.my_tensor.to(device=self.device)
+        ```
+        """
+        pass
+    def _to_dvc(
+        self, device_name: str, device_id: Optional[Union[int, torch.device]] = None
+    ):
+        device = device_name
+        if device_id is not None:
+            if isinstance(device_id, Number):
+                device += ":" + str(int(device_id))
+            elif hasattr(device_id, "index"):
+                device += ":" + str(device_id.index)
+        self.device = device
+        self._apply_device()
     def to(self, *args, **kwargs):
         device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(
             *args, **kwargs
@@ -133,20 +147,9 @@ class _Devices_Base(nn.Module):
                     raise
         self._apply(convert)
-        self.device = device
+        self._to_dvc(device)
         return self
-    def _to_dvc(
-        self, device_name: str, device_id: Optional[Union[int, torch.device]] = None
-    ):
-        device = device_name
-        if device_id is not None:
-            if isinstance(device_id, Number):
-                device += ":" + str(int(device_id))
-            elif hasattr(device_id, "index"):
-                device += ":" + str(device_id.index)
-        self.device = device
     def ipu(self, device: Optional[Union[int, torch.device]] = None) -> T:
         super().ipu(device)
         self._to_dvc("ipu", device)
@@ -178,11 +181,12 @@ class Model(_Devices_Base, ABC):
     This makes it easier to assign a device and retrieves it later
     """
+    _autocast: bool = False
     _is_unfrozen: bool = False
     # list with modules that can be frozen or unfrozen
     registered_freezable_modules: List[str] = []
     is_frozen: bool = False
-    _is_gradient_freezable: bool = (
+    _can_be_frozen: bool = (
         False  # to control if the module can or cannot be freezed by other modules from 'Model' class
     )
     # this is to be used on the case of they module requires low-rank adapters
@@ -193,18 +197,15 @@ class Model(_Devices_Base, ABC):
     # dont save list:
     _dont_save_items: List[str] = []
+    _loss_history: LossTracker = LossTracker(20_000)
-    def _apply_device_to(self):
-        """Add here components that are needed to have device applied to them,
-        that usually the '.to()' function fails to apply
+    @property
+    def autocast(self):
+        return self._autocast
-        example:
-        ```
-        def _apply_device_to(self):
-            self.my_tensor = self.my_tensor.to(device=self.device)
-        ```
-        """
-        pass
+    @autocast.setter
+    def autocast(self, value: bool):
+        self._autocast = value
     def freeze_all(self, exclude: Optional[List[str]] = None):
         no_exclusions = not exclude
@@ -251,7 +252,7 @@ class Model(_Devices_Base, ABC):
     def change_frozen_state(self, freeze: bool, module: nn.Module):
         try:
             if isinstance(module, Model):
-                if module._is_gradient_freezable:
+                if module._can_be_frozen:
                     if freeze:
                         return module.freeze_all()
                     return module.unfreeze_all()
@@ -496,10 +497,7 @@ class Model(_Devices_Base, ABC):
         return self(*inputs, **kwargs)
     def __call__(self, *args, **kwds) -> POSSIBLE_OUTPUT_TYPES:
-        if self.autocast and not self.training:
-            with torch.autocast(device_type=self.device.type):
-                return super().__call__(*args, **kwds)
-        else:
+        with torch.autocast(device_type=self.device.type, enabled=self.autocast):
             return super().__call__(*args, **kwds)
     @abstractmethod
@@ -541,52 +539,3 @@ class Model(_Devices_Base, ABC):
         if quantity > 0:
             t_list = t_list[-quantity:]
         return sum(t_list) / len(t_list)
-    def freeze_unfreeze_loss(
-        self,
-        losses: Optional[Union[float, List[float]]] = None,
-        trigger_loss: Union[float, bool] = 0.1,
-        excluded_modules: Optional[List[str]] = None,
-        max_items: int = 1000,
-        loss_name: str = "train",
-    ):
-        """If a certain threshold is reached the weights will freeze or unfreeze the modules.
-        the biggest use-case for this function is when training GANs where the balance
-        from the discriminator and generator must be kept.
-        Args:
-            losses (Union[float, List[float]], Optional): The loss value or a list of losses that will be used to determine if it has reached or not the threshold. Defaults to None.
-            trigger_loss (float, bool, optional): The value where the weights will be either freeze or unfreeze. If set to a boolean it will freeze or unfreeze immediately according to the value (True = Freeze, False = Unfreeze). Defaults to 0.1.
-            excluded_modules (list[str], optional): The list of modules (names) that is not to be changed by either freezing nor unfreezing. Defaults to None.
-            max_items (float, optional): The number of previous losses to be locked behind to calculate the current average. Default to 1000.
-            loss_name (str, optional): Responsible to define with key to recover the loss.
-        returns:
-            bool: True when its frozen and false when its trainable.
-        """
-        if losses is not None:
-            self.add_loss(losses, "train")
-        if isinstance(trigger_loss, bool):
-            if trigger_loss:
-                if self._is_unfrozen:
-                    self.freeze_all(excluded_modules)
-                    self._is_unfrozen = False
-                return True
-            # else
-            if not self._is_unfrozen:
-                self.unfreeze_all(excluded_modules)
-                self._is_unfrozen = True
-            return False
-        value = self.get_loss_avg(loss_name, max_items)
-        if value <= trigger_loss:
-            if self._is_unfrozen:
-                self.freeze_all(excluded_modules)
-                self._is_unfrozen = False
-            return True
-        else:
-            if not self._is_unfrozen:
-                self.unfreeze_all(excluded_modules)
-                self._is_unfrozen = True
-            return False

lt_tensor/model_zoo/audio_models/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
 from . import diffwave, istft, hifigan
+__all__ = ["diffwave", "istft", "hifigan"]

lt_tensor/model_zoo/audio_models/diffwave/__init__.py CHANGED Viewed

@@ -1,3 +1,216 @@
-__all__ = ["DiffWave", "SpectrogramUpsampler", "DiffusionEmbedding"]
+__all__ = ["DiffWave", "DiffWaveConfig", "SpectrogramUpsample", "DiffusionEmbedding"]
-from .model import DiffWave, SpectrogramUpsampler, DiffusionEmbedding
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from lt_tensor.config_templates import ModelConfig
+from lt_tensor.torch_commons import *
+from lt_tensor.model_base import Model
+from math import sqrt
+from lt_utils.common import *
+class DiffWaveConfig(ModelConfig):
+    # Training params
+    batch_size = 16
+    learning_rate = 2e-4
+    max_grad_norm = None
+    # Data params
+    sample_rate = 24000
+    n_mels = 80
+    n_fft = 1024
+    hop_samples = 256
+    # Model params
+    residual_layers = 30
+    residual_channels = 64
+    dilation_cycle_length = 10
+    unconditional = False
+    noise_schedule: list[int] = np.linspace(1e-4, 0.05, 50).tolist()
+    # settings for auto-fixes
+    interpolate = False
+    interpolation_mode: Literal[
+        "nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"
+    ] = "nearest"
+    def __init__(
+        self,
+        settings: Dict[str, Any] = {},
+        path_name: Optional[Union[str, PathLike]] = None,
+    ):
+        super().__init__(settings, path_name)
+def Conv1d(*args, **kwargs):
+    layer = nn.Conv1d(*args, **kwargs)
+    nn.init.kaiming_normal_(layer.weight)
+    return layer
+class DiffusionEmbedding(Model):
+    def __init__(self, max_steps: int):
+        super().__init__()
+        self.register_buffer(
+            "embedding", self._build_embedding(max_steps), persistent=False
+        )
+        self.projection1 = nn.Linear(128, 512)
+        self.projection2 = nn.Linear(512, 512)
+        self.activation = nn.SiLU()
+    def forward(self, diffusion_step):
+        if diffusion_step.dtype in [torch.int32, torch.int64]:
+            x = self.embedding[diffusion_step]
+        else:
+            x = self._lerp_embedding(diffusion_step)
+        x = self.projection1(x)
+        x = self.activation(x)
+        x = self.projection2(x)
+        x = self.activation(x)
+        return x
+    def _lerp_embedding(self, t):
+        low_idx = torch.floor(t).long()
+        high_idx = torch.ceil(t).long()
+        low = self.embedding[low_idx]
+        high = self.embedding[high_idx]
+        return low + (high - low) * (t - low_idx)
+    def _build_embedding(self, max_steps):
+        steps = torch.arange(max_steps).unsqueeze(1)  # [T,1]
+        dims = torch.arange(64).unsqueeze(0)  # [1,64]
+        table = steps * 10.0 ** (dims * 4.0 / 63.0)  # [T,64]
+        table = torch.cat([torch.sin(table), torch.cos(table)], dim=1)
+        return table
+class SpectrogramUpsample(Model):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.ConvTranspose2d(1, 1, [3, 32], stride=[1, 16], padding=[1, 8])
+        self.conv2 = nn.ConvTranspose2d(1, 1, [3, 32], stride=[1, 16], padding=[1, 8])
+        self.activation = nn.LeakyReLU(0.4)
+    def forward(self, x):
+        x = torch.unsqueeze(x, 1)
+        x = self.activation(self.conv1(x))
+        x = self.activation(self.conv2(x))
+        x = torch.squeeze(x, 1)
+        return x
+class ResidualBlock(Model):
+    def __init__(self, n_mels, residual_channels, dilation, uncond=False):
+        """
+        :param n_mels: inplanes of conv1x1 for spectrogram conditional
+        :param residual_channels: audio conv
+        :param dilation: audio conv dilation
+        :param uncond: disable spectrogram conditional
+        """
+        super().__init__()
+        self.dilated_conv = Conv1d(
+            residual_channels,
+            2 * residual_channels,
+            3,
+            padding=dilation,
+            dilation=dilation,
+        )
+        self.diffusion_projection = nn.Linear(512, residual_channels)
+        if not uncond:  # conditional model
+            self.conditioner_projection = Conv1d(n_mels, 2 * residual_channels, 1)
+        else:  # unconditional model
+            self.conditioner_projection = None
+        self.output_projection = Conv1d(residual_channels, 2 * residual_channels, 1)
+    def forward(
+        self,
+        x: Tensor,
+        diffusion_step: Tensor,
+        conditioner: Optional[Tensor] = None,
+    ):
+        diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1)
+        y = x + diffusion_step
+        if (
+            conditioner is None or self.conditioner_projection is None
+        ):  # using a unconditional model
+            y = self.dilated_conv(y)
+        else:
+            conditioner = self.conditioner_projection(conditioner)
+            y = self.dilated_conv(y) + conditioner
+        gate, filter = torch.chunk(y, 2, dim=1)
+        y = torch.sigmoid(gate) * torch.tanh(filter)
+        y = self.output_projection(y)
+        residual, skip = torch.chunk(y, 2, dim=1)
+        return (x + residual) / sqrt(2.0), skip
+class DiffWave(Model):
+    def __init__(self, params: DiffWaveConfig = DiffWaveConfig()):
+        super().__init__()
+        self.params = params
+        self.n_hop = self.params.hop_samples
+        self.interpolate = self.params.interpolate
+        self.interpolate_mode = self.params.interpolation_mode
+        self.input_projection = Conv1d(1, params.residual_channels, 1)
+        self.diffusion_embedding = DiffusionEmbedding(len(params.noise_schedule))
+        if self.params.unconditional:  # use unconditional model
+            self.spectrogram_upsample = None
+        else:
+            self.spectrogram_upsample = SpectrogramUpsample()
+        self.residual_layers = nn.ModuleList(
+            [
+                ResidualBlock(
+                    params.n_mels,
+                    params.residual_channels,
+                    2 ** (i % params.dilation_cycle_length),
+                    uncond=params.unconditional,
+                )
+                for i in range(params.residual_layers)
+            ]
+        )
+        self.skip_projection = Conv1d(
+            params.residual_channels, params.residual_channels, 1
+        )
+        self.output_projection = Conv1d(params.residual_channels, 1, 1)
+        self.activation = nn.LeakyReLU(0.1)
+        self.r_sqrt = sqrt(len(self.residual_layers))
+        nn.init.zeros_(self.output_projection.weight)
+    def forward(
+        self,
+        audio: Tensor,
+        diffusion_step: Tensor,
+        spectrogram: Optional[Tensor] = None,
+    ):
+        T = x.shape[-1]
+        if x.ndim == 2:
+            x = audio.unsqueeze(1)
+        x = self.activation(self.input_projection(x))
+        diffusion_step = self.diffusion_embedding(diffusion_step)
+        if spectrogram is not None and self.spectrogram_upsample is not None:
+            if self.auto_interpolate:
+                # a little heavy, but helps a lot to fix mismatched shapes,
+                # not always recommended due to data loss
+                spectrogram = F.interpolate(
+                    input=spectrogram,
+                    size=int(T * self.n_hop),
+                    mode=self.interpolate_mode,
+                )
+            spectrogram = self.spectrogram_upsample(spectrogram)
+        skip = None
+        for i, layer in enumerate(self.residual_layers):
+            x, skip_connection = layer(x, diffusion_step, spectrogram)
+            if i == 0:
+                skip = skip_connection
+            else:
+                skip = skip_connection + skip
+        x = skip / self.r_sqrt
+        x = self.activation(self.skip_projection(x))
+        x = self.output_projection(x)
+        return x

lt_tensor/model_zoo/audio_models/hifigan/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__all__ = ["HifiganGenerator"]
+__all__ = ["HifiganGenerator", "HifiganConfig"]
 from lt_utils.common import *
 from lt_tensor.torch_commons import *
 from lt_tensor.model_zoo.residual import ConvNets
@@ -13,6 +13,33 @@ def get_padding(kernel_size, dilation=1):
     return int((kernel_size * dilation - dilation) / 2)
+from lt_tensor.config_templates import ModelConfig
+class HifiganConfig(ModelConfig):
+    # Training params
+    in_channels: int = 80
+    upsample_rates: List[Union[int, List[int]]] = [8, 8]
+    upsample_kernel_sizes: List[Union[int, List[int]]] = [16, 16]
+    upsample_initial_channel: int = (512,)
+    resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11]
+    resblock_dilation_sizes: List[Union[int, List[int]]] = [
+        [1, 3, 5],
+        [1, 3, 5],
+        [1, 3, 5],
+    ]
+    activation: nn.Module = nn.LeakyReLU(0.1)
+    resblock: int = 0
+    def __init__(
+        self,
+        settings: Dict[str, Any] = {},
+        path_name: Optional[Union[str, PathLike]] = None,
+    ):
+        super().__init__(settings, path_name)
 class ResBlock1(ConvNets):
     def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
         super().__init__()
@@ -142,23 +169,23 @@ class ResBlock2(ConvNets):
 class HifiganGenerator(ConvNets):
-    def __init__(self, h):
+    def __init__(self, cfg: HifiganConfig = HifiganConfig()):
         super().__init__()
-        self.h = h
-        self.num_kernels = len(h.resblock_kernel_sizes)
-        self.num_upsamples = len(h.upsample_rates)
+        self.cfg = cfg
+        self.num_kernels = len(cfg.resblock_kernel_sizes)
+        self.num_upsamples = len(cfg.upsample_rates)
         self.conv_pre = weight_norm(
-            nn.Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3)
+            nn.Conv1d(cfg.in_channels, cfg.upsample_initial_channel, 7, 1, padding=3)
         )
-        resblock = ResBlock1 if h.resblock == "1" else ResBlock2
-        self.activation = nn.LeakyReLU(0.1)
+        resblock = ResBlock1 if resblock == 0 else ResBlock2
+        self.activation = cfg.activation
         self.ups = nn.ModuleList()
-        for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
+        for i, (u, k) in enumerate(zip(cfg.psample_rates, cfg.upsample_kernel_sizes)):
             self.ups.append(
                 weight_norm(
                     nn.ConvTranspose1d(
-                        h.upsample_initial_channel // (2**i),
-                        h.upsample_initial_channel // (2 ** (i + 1)),
+                        cfg.upsample_initial_channel // (2**i),
+                        cfg.upsample_initial_channel // (2 ** (i + 1)),
                         k,
                         u,
                         padding=(k - u) // 2,
@@ -168,17 +195,17 @@ class HifiganGenerator(ConvNets):
         self.resblocks = nn.ModuleList()
         for i in range(len(self.ups)):
-            ch = h.upsample_initial_channel // (2 ** (i + 1))
+            ch = cfg.upsample_initial_channel // (2 ** (i + 1))
             for j, (k, d) in enumerate(
-                zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)
+                zip(cfg.resblock_kernel_sizes, cfg.resblock_dilation_sizes)
             ):
-                self.resblocks.append(resblock(h, ch, k, d))
+                self.resblocks.append(resblock(ch, k, d))
         self.conv_post = weight_norm(nn.Conv1d(ch, 1, 7, 1, padding=3))
         self.ups.apply(self.init_weights)
         self.conv_post.apply(self.init_weights)
-    def forward(self, x):
+    def forward(self, x: Tensor):
         x = self.conv_pre(x)
         for i in range(self.num_upsamples):
             x = self.ups[i](self.activation(x))

lt_tensor/model_zoo/audio_models/istft/__init__.py CHANGED Viewed

@@ -1,8 +1,35 @@
-__all__ = ["iSTFTGenerator"]
+__all__ = ["iSTFTNetGenerator", "iSTFTNetConfig"]
 from lt_utils.common import *
 from lt_tensor.torch_commons import *
 from lt_tensor.model_zoo.residual import ConvNets
 from torch.nn import functional as F
+from lt_tensor.config_templates import ModelConfig
+class iSTFTNetConfig(ModelConfig):
+    # Training params
+    in_channels: int = 80
+    upsample_rates: List[Union[int, List[int]]] = [8, 8]
+    upsample_kernel_sizes: List[Union[int, List[int]]] = [16, 16]
+    upsample_initial_channel: int = (512,)
+    resblock_kernel_sizes: List[Union[int, List[int]]] = [3, 7, 11]
+    resblock_dilation_sizes: List[Union[int, List[int]]] = [
+        [1, 3, 5],
+        [1, 3, 5],
+        [1, 3, 5],
+    ]
+    activation: nn.Module = nn.LeakyReLU(0.1)
+    resblock: int = 0
+    gen_istft_n_fft: int = 16
+    sampling_rate: Number = 24000
+    def __init__(
+        self,
+        settings: Dict[str, Any] = {},
+        path_name: Optional[Union[str, PathLike]] = None,
+    ):
+        super().__init__(settings, path_name)
 def get_padding(ks, d):
@@ -10,9 +37,8 @@ def get_padding(ks, d):
 class ResBlock1(ConvNets):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
         super().__init__()
-        self.h = h
         self.convs1 = nn.ModuleList(
             [
                 weight_norm(
@@ -95,10 +121,10 @@ class ResBlock1(ConvNets):
             x = xt + x
         return x
 class ResBlock2(ConvNets):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
         super().__init__()
-        self.h = h
         self.convs = nn.ModuleList(
             [
                 weight_norm(
@@ -134,25 +160,25 @@ class ResBlock2(ConvNets):
         return x
-class iSTFTGenerator(ConvNets):
-    def __init__(self, h):
+class iSTFTNetGenerator(ConvNets):
+    def __init__(self, cfg: iSTFTNetConfig = iSTFTNetConfig()):
         super().__init__()
-        self.h = h
-        self.num_kernels = len(h.resblock_kernel_sizes)
-        self.num_upsamples = len(h.upsample_rates)
+        self.cfg = cfg
+        self.num_kernels = len(cfg.resblock_kernel_sizes)
+        self.num_upsamples = len(cfg.upsample_rates)
         self.conv_pre = weight_norm(
-            nn.Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3)
+            nn.Conv1d(cfg.in_channels, cfg.upsample_initial_channel, 7, 1, padding=3)
         )
-        resblock = ResBlock1 if h.resblock == "1" else ResBlock2
+        resblock = ResBlock1 if resblock == 0 else ResBlock2
         self.ups = nn.ModuleList()
-        for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
-            if h.sampling_rate % 16000:
+        for i, (u, k) in enumerate(zip(cfg.upsample_rates, cfg.upsample_kernel_sizes)):
+            if cfg.sampling_rate % 16000:
                 self.ups.append(
                     weight_norm(
                         nn.ConvTranspose1d(
-                            h.upsample_initial_channel // (2**i),
-                            h.upsample_initial_channel // (2 ** (i + 1)),
+                            cfg.upsample_initial_channel // (2**i),
+                            cfg.upsample_initial_channel // (2 ** (i + 1)),
                             k,
                             u,
                             padding=(k - u) // 2,
@@ -163,8 +189,8 @@ class iSTFTGenerator(ConvNets):
                 self.ups.append(
                     weight_norm(
                         nn.ConvTranspose1d(
-                            h.upsample_initial_channel // (2**i),
-                            h.upsample_initial_channel // (2 ** (i + 1)),
+                            cfg.upsample_initial_channel // (2**i),
+                            cfg.upsample_initial_channel // (2 ** (i + 1)),
                             k,
                             u,
                             padding=(u // 2 + u % 2),
@@ -175,19 +201,19 @@ class iSTFTGenerator(ConvNets):
         self.resblocks = nn.ModuleList()
         for i in range(len(self.ups)):
-            ch = h.upsample_initial_channel // (2 ** (i + 1))
+            ch = cfg.upsample_initial_channel // (2 ** (i + 1))
             for j, (k, d) in enumerate(
-                zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)
+                zip(cfg.resblock_kernel_sizes, cfg.resblock_dilation_sizes)
             ):
-                self.resblocks.append(resblock(h, ch, k, d))
+                self.resblocks.append(resblock(ch, k, d))
-        self.post_n_fft = h.gen_istft_n_fft
+        self.post_n_fft = cfg.gen_istft_n_fft
         self.conv_post = weight_norm(
             nn.Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3)
         )
         self.ups.apply(self.init_weights)
         self.conv_post.apply(self.init_weights)
-        self.activation = nn.LeakyReLU(0.1)
+        self.activation = cfg.activation
         self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
     def forward(self, x):

{lt_tensor-0.0.1a16.dist-info → lt_tensor-0.0.1a18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lt-tensor
-Version: 0.0.1a16
+Version: 0.0.1a18
 Summary: General utilities for PyTorch and others. Built for general use.
 Home-page: https://github.com/gr1336/lt-tensor/
 Author: gr1336

{lt_tensor-0.0.1a16.dist-info → lt_tensor-0.0.1a18.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
-lt_tensor/config_templates.py,sha256=FRN4-i1amoqMh_wyp4gNsw61ABWTIhGC62Uc3l3SNss,3515
+lt_tensor/config_templates.py,sha256=xWZhktYVlkwvJVreqyACpWo-lJ5htG9vTZyqZ6OexzA,3899
 lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
 lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
 lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
 lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
-lt_tensor/model_base.py,sha256=qqqIVpYz6nv01MnZuuAj1dxq4_NN-zSivP1GaegA9TI,21597
+lt_tensor/model_base.py,sha256=J-f-iQ9qGyYD4NkLljyAEkwtHKKbUKIrIpunMiSmh90,19155
 lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
 lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
 lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
@@ -18,15 +18,14 @@ lt_tensor/model_zoo/fusion.py,sha256=usC1bcjQRNivDc8xzkIS5T1glm78OLcs2V_tPqfp-eI
 lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmhw_UYM,4487
 lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
 lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
-lt_tensor/model_zoo/audio_models/__init__.py,sha256=CLoLqvbA_ltqm3KOg5AH3A0co0HtsLfFPUBsxxLSCgI,39
-lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=aFSmr8PYpmOfbe15lhNoj-ZzP5ChrZcikovKLZKg7nw,140
-lt_tensor/model_zoo/audio_models/diffwave/model.py,sha256=kHo76bxLJtTBn1m0gq5KKrUsjm9ASsCCwf8MvWaB1R8,6901
-lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=BOBZSK2HFOdMcFyjrzwZi_TeAtBGIcpb8pQxiGlwLEE,12302
-lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=o7Ie1qI22u_g9t1252PX4vl4uF6JHynAJryuz2lAZE0,12920
+lt_tensor/model_zoo/audio_models/__init__.py,sha256=MoG9YjxLyvscq_6njK1ljGBletK9iedBXt66bplzW-s,83
+lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=R14hY-nCbCO-T3ox9f4MXCPgQQogFUKAJ2WtntLz09w,7393
+lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=6ZGYyNiTMGHnOjGU0gq_TSM8Y9LtYlP3neGwa01Ghyk,13135
+lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=noi4GLGZQ_qg5H-ipe5d7j8rvt4Hic_sXiME-TE-B2c,13783
 lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
 lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
-lt_tensor-0.0.1a16.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
-lt_tensor-0.0.1a16.dist-info/METADATA,sha256=uxk1cMeQkLniYUIgEjHD2eJ8_JGwAKS2minrCmAJfMo,1033
-lt_tensor-0.0.1a16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lt_tensor-0.0.1a16.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
-lt_tensor-0.0.1a16.dist-info/RECORD,,
+lt_tensor-0.0.1a18.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
+lt_tensor-0.0.1a18.dist-info/METADATA,sha256=fgRzOiw5tMmkaEY9HrGEKNL2v9mN5JVbf9r-bf18Am0,1033
+lt_tensor-0.0.1a18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lt_tensor-0.0.1a18.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
+lt_tensor-0.0.1a18.dist-info/RECORD,,

lt_tensor/model_zoo/audio_models/diffwave/model.py DELETED Viewed

@@ -1,201 +0,0 @@
-__all__ = ["DiffWave", "SpectrogramUpsampler", "DiffusionEmbedding"]
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from math import sqrt
-class AttrDict(dict):
-    def __init__(self, *args, **kwargs):
-        super(AttrDict, self).__init__(*args, **kwargs)
-        self.__dict__ = self
-    def override(self, attrs):
-        if isinstance(attrs, dict):
-            self.__dict__.update(**attrs)
-        elif isinstance(attrs, (list, tuple, set)):
-            for attr in attrs:
-                self.override(attr)
-        elif attrs is not None:
-            raise NotImplementedError
-        return self
-params = AttrDict(
-    # Training params
-    batch_size=16,
-    learning_rate=2e-4,
-    max_grad_norm=None,
-    # Data params
-    sample_rate=22050,
-    n_mels=80,
-    n_fft=1024,
-    hop_samples=256,
-    crop_mel_frames=62,  # Probably an error in paper.
-    # Model params
-    residual_layers=30,
-    residual_channels=64,
-    dilation_cycle_length=10,
-    unconditional=False,
-    noise_schedule=np.linspace(1e-4, 0.05, 50).tolist(),
-    inference_noise_schedule=[0.0001, 0.001, 0.01, 0.05, 0.2, 0.5],
-    # unconditional sample len
-    audio_len=22050 * 5,  # unconditional_synthesis_samples
-)
-def Conv1d(*args, **kwargs):
-    layer = nn.Conv1d(*args, **kwargs)
-    nn.init.kaiming_normal_(layer.weight)
-    return layer
-class DiffusionEmbedding(nn.Module):
-    def __init__(self, max_steps):
-        super().__init__()
-        self.register_buffer(
-            "embedding", self._build_embedding(max_steps), persistent=False
-        )
-        self.projection1 = nn.Linear(128, 512)
-        self.projection2 = nn.Linear(512, 512)
-        self.activation = nn.SiLU()
-    def forward(self, diffusion_step):
-        if diffusion_step.dtype in [torch.int32, torch.int64]:
-            x = self.embedding[diffusion_step]
-        else:
-            x = self._lerp_embedding(diffusion_step)
-        x = self.projection1(x)
-        x = self.activation(x)
-        x = self.projection2(x)
-        x = self.activation(x)
-        return x
-    def _lerp_embedding(self, t):
-        low_idx = torch.floor(t).long()
-        high_idx = torch.ceil(t).long()
-        low = self.embedding[low_idx]
-        high = self.embedding[high_idx]
-        return low + (high - low) * (t - low_idx)
-    def _build_embedding(self, max_steps):
-        steps = torch.arange(max_steps).unsqueeze(1)  # [T,1]
-        dims = torch.arange(64).unsqueeze(0)  # [1,64]
-        table = steps * 10.0 ** (dims * 4.0 / 63.0)  # [T,64]
-        table = torch.cat([torch.sin(table), torch.cos(table)], dim=1)
-        return table
-class SpectrogramUpsampler(nn.Module):
-    def __init__(self, n_mels):
-        super().__init__()
-        self.conv1 = nn.ConvTranspose2d(1, 1, [3, 32], stride=[1, 16], padding=[1, 8])
-        self.conv2 = nn.ConvTranspose2d(1, 1, [3, 32], stride=[1, 16], padding=[1, 8])
-    def forward(self, x):
-        x = torch.unsqueeze(x, 1)
-        x = self.conv1(x)
-        x = F.leaky_relu(x, 0.4)
-        x = self.conv2(x)
-        x = F.leaky_relu(x, 0.4)
-        x = torch.squeeze(x, 1)
-        return x
-class ResidualBlock(nn.Module):
-    def __init__(self, n_mels, residual_channels, dilation, uncond=False):
-        """
-        :param n_mels: inplanes of conv1x1 for spectrogram conditional
-        :param residual_channels: audio conv
-        :param dilation: audio conv dilation
-        :param uncond: disable spectrogram conditional
-        """
-        super().__init__()
-        self.dilated_conv = Conv1d(
-            residual_channels,
-            2 * residual_channels,
-            3,
-            padding=dilation,
-            dilation=dilation,
-        )
-        self.diffusion_projection = nn.Linear(512, residual_channels)
-        if not uncond:  # conditional model
-            self.conditioner_projection = Conv1d(n_mels, 2 * residual_channels, 1)
-        else:  # unconditional model
-            self.conditioner_projection = None
-        self.output_projection = Conv1d(residual_channels, 2 * residual_channels, 1)
-    def forward(self, x, diffusion_step, conditioner=None):
-        assert (conditioner is None and self.conditioner_projection is None) or (
-            conditioner is not None and self.conditioner_projection is not None
-        )
-        diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1)
-        y = x + diffusion_step
-        if self.conditioner_projection is None:  # using a unconditional model
-            y = self.dilated_conv(y)
-        else:
-            conditioner = self.conditioner_projection(conditioner)
-            y = self.dilated_conv(y) + conditioner
-        gate, filter = torch.chunk(y, 2, dim=1)
-        y = torch.sigmoid(gate) * torch.tanh(filter)
-        y = self.output_projection(y)
-        residual, skip = torch.chunk(y, 2, dim=1)
-        return (x + residual) / sqrt(2.0), skip
-class DiffWave(nn.Module):
-    def __init__(self, params):
-        super().__init__()
-        self.params = params
-        self.input_projection = Conv1d(1, params.residual_channels, 1)
-        self.diffusion_embedding = DiffusionEmbedding(len(params.noise_schedule))
-        if self.params.unconditional:  # use unconditional model
-            self.spectrogram_upsampler = None
-        else:
-            self.spectrogram_upsampler = SpectrogramUpsampler(params.n_mels)
-        self.residual_layers = nn.ModuleList(
-            [
-                ResidualBlock(
-                    params.n_mels,
-                    params.residual_channels,
-                    2 ** (i % params.dilation_cycle_length),
-                    uncond=params.unconditional,
-                )
-                for i in range(params.residual_layers)
-            ]
-        )
-        self.skip_projection = Conv1d(
-            params.residual_channels, params.residual_channels, 1
-        )
-        self.output_projection = Conv1d(params.residual_channels, 1, 1)
-        nn.init.zeros_(self.output_projection.weight)
-    def forward(self, audio, diffusion_step, spectrogram=None):
-        assert (spectrogram is None and self.spectrogram_upsampler is None) or (
-            spectrogram is not None and self.spectrogram_upsampler is not None
-        )
-        x = audio.unsqueeze(1)
-        x = self.input_projection(x)
-        x = F.relu(x)
-        diffusion_step = self.diffusion_embedding(diffusion_step)
-        if self.spectrogram_upsampler:  # use conditional model
-            spectrogram = self.spectrogram_upsampler(spectrogram)
-        skip = None
-        for layer in self.residual_layers:
-            x, skip_connection = layer(x, diffusion_step, spectrogram)
-            skip = skip_connection if skip is None else skip_connection + skip
-        x = skip / sqrt(len(self.residual_layers))
-        x = self.skip_projection(x)
-        x = F.relu(x)
-        x = self.output_projection(x)
-        return x

{lt_tensor-0.0.1a16.dist-info → lt_tensor-0.0.1a18.dist-info}/WHEEL RENAMED Viewed

File without changes

{lt_tensor-0.0.1a16.dist-info → lt_tensor-0.0.1a18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lt_tensor-0.0.1a16.dist-info → lt_tensor-0.0.1a18.dist-info}/top_level.txt RENAMED Viewed

File without changes

lt-tensor 0.0.1a16__py3-none-any.whl → 0.0.1a18__py3-none-any.whl

lt-tensor 0.0.1a16py3-none-any.whl → 0.0.1a18py3-none-any.whl