PyPI - lt-tensor - Versions diffs - 0.0.1a12__py3-none-any.whl → 0.0.1a13__py3-none-any.whl - Mend

lt-tensor 0.0.1a12py3-none-any.whl → 0.0.1a13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

lt_tensor/datasets/audio.py +141 -46
lt_tensor/misc_utils.py +37 -0
lt_tensor/model_zoo/__init__.py +18 -9
lt_tensor/model_zoo/{bsc.py → basic.py} +118 -2
lt_tensor/model_zoo/features.py +416 -0
lt_tensor/model_zoo/fusion.py +164 -0
lt_tensor/model_zoo/istft/generator.py +2 -2
lt_tensor/model_zoo/istft/sg.py +142 -0
lt_tensor/model_zoo/istft/trainer.py +37 -12
lt_tensor/model_zoo/residual.py +217 -0
lt_tensor/model_zoo/{tfrms.py → transformer.py} +2 -2
lt_tensor/processors/audio.py +218 -80
lt_tensor/transform.py +7 -16
{lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a13.dist-info}/METADATA +6 -4
lt_tensor-0.0.1a13.dist-info/RECORD +32 -0
lt_tensor/model_zoo/fsn.py +0 -67
lt_tensor/model_zoo/gns.py +0 -185
lt_tensor/model_zoo/istft.py +0 -591
lt_tensor/model_zoo/rsd.py +0 -107
lt_tensor-0.0.1a12.dist-info/RECORD +0 -32
/lt_tensor/model_zoo/{disc.py → discriminator.py} +0 -0
/lt_tensor/model_zoo/{pos.py → pos_encoder.py} +0 -0
{lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a13.dist-info}/WHEEL +0 -0
{lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a13.dist-info}/licenses/LICENSE +0 -0
{lt_tensor-0.0.1a12.dist-info → lt_tensor-0.0.1a13.dist-info}/top_level.txt +0 -0

lt_tensor/model_zoo/istft/sg.py ADDED Viewed

@@ -0,0 +1,142 @@
+import torch
+import torch.nn as nn
+import math
+from einops import repeat
+class SineGen(nn.Module):
+    def __init__(
+        self,
+        samp_rate,
+        upsample_scale,
+        harmonic_num=0,
+        sine_amp=0.1,
+        noise_std=0.003,
+        voiced_threshold=0,
+        flag_for_pulse=False,
+    ):
+        super().__init__()
+        self.sampling_rate = samp_rate
+        self.upsample_scale = upsample_scale
+        self.harmonic_num = harmonic_num
+        self.sine_amp = sine_amp
+        self.noise_std = noise_std
+        self.voiced_threshold = voiced_threshold
+        self.flag_for_pulse = flag_for_pulse
+        self.dim = self.harmonic_num + 1  # fundamental + harmonics
+    def _f02uv_b(self, f0):
+        return (f0 > self.voiced_threshold).float()  # [B, T]
+    def _f02uv(self, f0):
+        return (f0 > self.voiced_threshold).float().unsqueeze(-1)  # -> (B, T, 1)
+    @torch.no_grad()
+    def _f02sine(self, f0_values):
+        """
+        f0_values: (B, T, 1)
+        Output: sine waves (B, T * upsample, dim)
+        """
+        B, T, _ = f0_values.size()
+        f0_upsampled = repeat(
+            f0_values, "b t d -> b (t r) d", r=self.upsample_scale
+        )  # (B, T_up, 1)
+        # Create harmonics
+        harmonics = (
+            torch.arange(1, self.dim + 1, device=f0_values.device)
+            .float()
+            .view(1, 1, -1)
+        )
+        f0_harm = f0_upsampled * harmonics  # (B, T_up, dim)
+        # Convert Hz to radians (2πf/sr), then integrate to get phase
+        rad_values = f0_harm / self.sampling_rate  # normalized freq
+        rad_values = rad_values % 1.0  # remove multiples of 2π
+        # Random initial phase for each harmonic (except 0th if pulse mode)
+        if self.flag_for_pulse:
+            rand_ini = torch.zeros((B, 1, self.dim), device=f0_values.device)
+        else:
+            rand_ini = torch.rand((B, 1, self.dim), device=f0_values.device)
+        rand_ini = rand_ini * 2 * math.pi
+        # Compute cumulative phase
+        rad_values = rad_values * 2 * math.pi
+        phase = torch.cumsum(rad_values, dim=1) + rand_ini  # (B, T_up, dim)
+        sine_waves = torch.sin(phase)  # (B, T_up, dim)
+        return sine_waves
+    def _forward(self, f0):
+        """
+        f0: (B, T, 1)
+        returns: sine signal with harmonics and noise added
+        """
+        sine_waves = self._f02sine(f0)  # (B, T_up, dim)
+        uv = self._f02uv_b(f0) # (B, T, 1)
+        uv = repeat(uv, "b t d -> b (t r) d", r=self.upsample_scale)  # (B, T_up, 1)
+        # voiced sine + unvoiced noise
+        sine_signal = self.sine_amp * sine_waves * uv  # (B, T_up, dim)
+        noise = torch.randn_like(sine_signal) * self.noise_std
+        output = sine_signal + noise * (1.0 - uv)  # noise added only on unvoiced
+        return output  # (B, T_up, dim)
+    def forward(self, f0):
+        """
+        Args:
+            f0: (B, T) in Hz (before upsampling)
+        Returns:
+            sine_waves: (B, T_up, dim)
+            uv: (B, T_up, 1)
+            noise: (B, T_up, 1)
+        """
+        B, T = f0.shape
+        device = f0.device
+        # Get uv mask (before upsampling)
+        uv = self._f02uv(f0)  # (B, T, 1)
+        # Expand f0 to include harmonics: (B, T, dim)
+        f0 = f0.unsqueeze(-1)  # (B, T, 1)
+        harmonics = (
+            torch.arange(1, self.dim + 1, device=device).float().view(1, 1, -1)
+        )  # (1, 1, dim)
+        f0_harm = f0 * harmonics  # (B, T, dim)
+        # Upsample
+        f0_harm_up = repeat(
+            f0_harm, "b t d -> b (t r) d", r=self.upsample_scale
+        )  # (B, T_up, dim)
+        uv_up = repeat(uv, "b t d -> b (t r) d", r=self.upsample_scale)  # (B, T_up, 1)
+        # Convert to radians
+        rad_per_sample = f0_harm_up / self.sampling_rate  # Hz → cycles/sample
+        rad_per_sample = rad_per_sample * 2 * math.pi  # cycles → radians/sample
+        # Random phase init for each sample
+        B, T_up, D = rad_per_sample.shape
+        rand_phase = torch.rand(B, D, device=device) * 2 * math.pi  # (B, D)
+        # Compute cumulative phase
+        phase = torch.cumsum(rad_per_sample, dim=1) + rand_phase.unsqueeze(
+            1
+        )  # (B, T_up, D)
+        # Apply sine
+        sine_waves = torch.sin(phase) * self.sine_amp  # (B, T_up, D)
+        # Handle unvoiced: create noise only for fundamental
+        noise = torch.randn(B, T_up, 1, device=device) * self.noise_std
+        if self.flag_for_pulse:
+            # If pulse mode is on, align phase at start of voiced segments
+            # Optional and tricky to implement — may require segmenting uv
+            pass
+        # Replace sine by noise for unvoiced (only on fundamental)
+        sine_waves[:, :, 0:1] = sine_waves[:, :, 0:1] * uv_up + noise * (1 - uv_up)
+        return sine_waves, uv_up, noise

lt_tensor/model_zoo/istft/trainer.py CHANGED Viewed

@@ -13,18 +13,45 @@ from lt_tensor.misc_utils import set_seed, clear_cache
 from lt_utils.type_utils import is_dir, is_pathlike, is_file
 from lt_tensor.config_templates import updateDict, ModelConfig
 from lt_tensor.model_zoo.istft.generator import iSTFTGenerator
-from lt_tensor.model_zoo.rsd import ResBlock1D, ConvNets, get_weight_norm
-from lt_tensor.model_zoo.disc import MultiPeriodDiscriminator, MultiScaleDiscriminator
+from lt_tensor.model_zoo.residual import ResBlock1D, ConvNets, get_weight_norm
+from lt_tensor.model_zoo.discriminator import MultiPeriodDiscriminator, MultiScaleDiscriminator
-def feature_loss(real_feats, fake_feats):
-    loss = 0.0
-    for r, f in zip(real_feats, fake_feats):
-        for ri, fi in zip(r, f):
-            loss += F.l1_loss(ri, fi)
+def feature_loss(fmap_r, fmap_g):
+    loss = 0
+    for dr, dg in zip(fmap_r, fmap_g):
+        for rl, gl in zip(dr, dg):
+            loss += torch.mean(torch.abs(rl - gl))
+    return loss * 2
+def generator_adv_loss(disc_outputs):
+    loss = 0
+    for dg in disc_outputs:
+        l = torch.mean((1 - dg) ** 2)
+        loss += l
+    return loss
+def discriminator_loss(disc_real_outputs, disc_generated_outputs):
+    loss = 0
+    for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
+        r_loss = torch.mean((1 - dr) ** 2)
+        g_loss = torch.mean(dg**2)
+        loss += r_loss + g_loss
     return loss
+"""def feature_loss(fmap_r, fmap_g):
+    loss = 0
+    for dr, dg in zip(fmap_r, fmap_g):
+        for rl, gl in zip(dr, dg):
+            loss += torch.mean(torch.abs(rl - gl))
+    return loss * 2
 def generator_adv_loss(fake_preds):
     loss = 0.0
     for f in fake_preds:
@@ -37,6 +64,7 @@ def discriminator_loss(real_preds, fake_preds):
     for r, f in zip(real_preds, fake_preds):
         loss += torch.mean((r - 1.0) ** 2) + torch.mean(f**2)
     return loss
+"""
 class AudioSettings(ModelConfig):
@@ -284,9 +312,6 @@ class AudioDecoder(Model):
             win_length=self.settings.n_fft,
             # length=real_audio.shape[-1]
         )[:, : real_audio.shape[-1]]
-        # smallest = min(real_audio.shape[-1], fake_audio.shape[-1])
-        # real_audio = real_audio[:, :, :smallest].squeeze(1)
-        # fake_audio = fake_audio[:, :smallest]
         disc_kwargs = dict(
             real_audio=real_audio,
@@ -372,13 +397,13 @@ class AudioDecoder(Model):
         loss_stft = self.audio_processor.stft_loss(fake_audio, real_audio) * stft_scale
         loss_mel = (
-            F.l1_loss(self.audio_processor.compute_mel(fake_audio), mels) * mel_scale
+            F.huber_loss(self.audio_processor.compute_mel(fake_audio), mels) * mel_scale
         )
         loss_fm = ((loss_fm_mpd + loss_fm_msd) * fm_scale) + fm_add
         loss_adv = (loss_adv_mpd + loss_adv_msd) * adv_scale
-        loss_g = loss_adv + loss_fm + loss_stft + loss_mel
+        loss_g = loss_adv + loss_fm + loss_stft  # + loss_mel
         if not am_i_frozen:
             self.g_optim.zero_grad()
             loss_g.backward()

lt_tensor/model_zoo/residual.py ADDED Viewed

@@ -0,0 +1,217 @@
+__all__ = [
+    "spectral_norm_select",
+    "get_weight_norm",
+    "ResBlock1D",
+    "ResBlock2D",
+    "ResBlock1DShuffled",
+    "AdaResBlock1D",
+]
+import math
+from lt_utils.common import *
+from lt_tensor.torch_commons import *
+from lt_tensor.model_base import Model
+from lt_tensor.misc_utils import log_tensor
+import torch.nn.functional as F
+from lt_tensor.model_zoo.fusion import AdaFusion1D, AdaIN1D
+def spectral_norm_select(module: nn.Module, enabled: bool):
+    if enabled:
+        return spectral_norm(module)
+    return module
+def get_weight_norm(norm_type: Optional[Literal["weight", "spectral"]] = None):
+    if not norm_type:
+        return lambda x: x
+    if norm_type == "weight":
+        return lambda x: weight_norm(x)
+    return lambda x: spectral_norm(x)
+class ConvNets(Model):
+    def remove_weight_norm(self):
+        for module in self.modules():
+            try:
+                remove_weight_norm(module)
+            except ValueError:
+                pass
+    @staticmethod
+    def init_weights(m, mean=0.0, std=0.01):
+        classname = m.__class__.__name__
+        if "Conv" in classname:
+            m.weight.data.normal_(mean, std)
+class ResBlock1D(ConvNets):
+    def __init__(
+        self,
+        channels,
+        kernel_size=3,
+        dilation=(1, 3, 5),
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        self.conv_nets = nn.ModuleList(
+            [
+                self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
+                for i in range(3)
+            ]
+        )
+        self.conv_nets.apply(self.init_weights)
+        self.last_index = len(self.conv_nets) - 1
+    def _get_conv_layer(self, id, ch, k, stride, d, actv):
+        get_padding = lambda ks, d: int((ks * d - d) / 2)
+        return nn.Sequential(
+            actv,  # 1
+            weight_norm(
+                nn.Conv1d(
+                    ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
+                )
+            ),  # 2
+            actv,  # 3
+            weight_norm(
+                nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
+            ),  # 4
+        )
+    def forward(self, x: Tensor):
+        for cnn in self.conv_nets:
+            x = cnn(x) + x
+        return x
+class ResBlock1DShuffled(ConvNets):
+    def __init__(
+        self,
+        channels,
+        kernel_size=3,
+        dilation=(1, 3, 5),
+        activation: nn.Module = nn.LeakyReLU(0.1),
+        add_channel_shuffle: bool = False,  # requires pytorch 2.7.0 +
+        channel_shuffle_groups=1,
+    ):
+        super().__init__()
+        self.channel_shuffle = (
+            nn.ChannelShuffle(channel_shuffle_groups)
+            if add_channel_shuffle
+            else nn.Identity()
+        )
+        self.conv_nets = nn.ModuleList(
+            [
+                self._get_conv_layer(i, channels, kernel_size, 1, dilation, activation)
+                for i in range(3)
+            ]
+        )
+        self.conv_nets.apply(self.init_weights)
+        self.last_index = len(self.conv_nets) - 1
+    def _get_conv_layer(self, id, ch, k, stride, d, actv):
+        get_padding = lambda ks, d: int((ks * d - d) / 2)
+        return nn.Sequential(
+            actv,  # 1
+            weight_norm(
+                nn.Conv1d(
+                    ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
+                )
+            ),  # 2
+            actv,  # 3
+            weight_norm(
+                nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
+            ),  # 4
+        )
+    def forward(self, x: Tensor):
+        b = x.clone() * 0.5
+        for cnn in self.conv_nets:
+            x = cnn(self.channel_shuffle(x)) + b
+        return x
+class ResBlock2D(Model):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        downsample=False,
+    ):
+        super().__init__()
+        stride = 2 if downsample else 1
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, stride, 1),
+            nn.LeakyReLU(0.2),
+            nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+        )
+        self.skip = nn.Identity()
+        if downsample or in_channels != out_channels:
+            self.skip = spectral_norm_select(
+                nn.Conv2d(in_channels, out_channels, 1, stride)
+            )
+        # on less to be handled every cicle
+        self.sqrt_2 = math.sqrt(2)
+    def forward(self, x: Tensor):
+        return (self.block(x) + self.skip(x)) / self.sqrt_2
+class AdaResBlock1D(ConvNets):
+    def __init__(
+        self,
+        res_block_channels: int,
+        ada_channel_in: int,
+        kernel_size=3,
+        dilation=(1, 3, 5),
+        activation: nn.Module = nn.LeakyReLU(0.1),
+    ):
+        super().__init__()
+        self.conv_nets = nn.ModuleList(
+            [
+                self._get_conv_layer(
+                    i,
+                    res_block_channels,
+                    ada_channel_in,
+                    kernel_size,
+                    1,
+                    dilation,
+                )
+                for i in range(3)
+            ]
+        )
+        self.conv_nets.apply(self.init_weights)
+        self.last_index = len(self.conv_nets) - 1
+        self.activation = activation
+    def _get_conv_layer(self, id, ch, ada_ch, k, stride, d):
+        get_padding = lambda ks, d: int((ks * d - d) / 2)
+        return nn.ModuleDict(
+            dict(
+                norm1=AdaFusion1D(ada_ch, ch),
+                norm2=AdaFusion1D(ada_ch, ch),
+                alpha1=nn.Parameter(torch.ones(1, ada_ch, 1)),
+                alpha2=nn.Parameter(torch.ones(1, ada_ch, 1)),
+                conv1=weight_norm(
+                    nn.Conv1d(
+                        ch, ch, k, stride, dilation=d[id], padding=get_padding(k, d[id])
+                    )
+                ),  # 2
+                conv2=weight_norm(
+                    nn.Conv1d(ch, ch, k, stride, dilation=1, padding=get_padding(k, 1))
+                ),  # 4
+            )
+        )
+    def forward(self, x: torch.Tensor, y: torch.Tensor):
+        for cnn in self.conv_nets:
+            xt = self.activation(cnn["norm1"](x, y, cnn["alpha1"]))
+            xt = cnn["conv1"](xt)
+            xt = self.activation(cnn["norm2"](xt, y, cnn["alpha2"]))
+            x = cnn["conv2"](xt) + x
+        return x

lt_tensor/model_zoo/{tfrms.py → transformer.py} RENAMED Viewed

@@ -11,8 +11,8 @@ from lt_tensor.torch_commons import *
 from lt_tensor.model_base import Model
 from lt_utils.misc_utils import default
 from typing import Optional
-from lt_tensor.model_zoo.pos import *
-from lt_tensor.model_zoo.bsc import FeedForward
+from lt_tensor.model_zoo.pos_encoder import *
+from lt_tensor.model_zoo.basic import FeedForward
 def init_weights(module):

lt-tensor 0.0.1a12__py3-none-any.whl → 0.0.1a13__py3-none-any.whl

lt-tensor 0.0.1a12py3-none-any.whl → 0.0.1a13py3-none-any.whl