lt-tensor 0.0.1a38__py3-none-any.whl → 0.0.1a39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lt_tensor/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.0.1a38"
1
+ __version__ = "0.0.1a39"
2
2
 
3
3
  from . import (
4
4
  lr_schedulers,
@@ -0,0 +1,116 @@
1
+ """ Modified from: https://github.com/dinhoitt/BemaGANv2/blob/9560ae9df153c956f259c261c57c4f84f89e3d72/envelope.py
2
+ MIT License
3
+
4
+ Copyright (c) 2025 Taseoo Park
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ """
24
+ from lt_utils.common import *
25
+ from lt_tensor.torch_commons import *
26
+ from lt_tensor.model_base import Model
27
+
28
+
29
+ class Envelope(Model):
30
+ def __init__(self, max_freq: int, sample_rate: Number = 24000, cut_off: int = 0):
31
+ super().__init__()
32
+ self.sr = sample_rate
33
+ self.max_freq = max_freq
34
+ self.setup_low_pass_fn(max_freq, cut_off)
35
+
36
+ def forward(self, x: torch.Tensor):
37
+ if not self.max_freq:
38
+ return x
39
+ return self.lp_fn(x)
40
+
41
+ def _ft_signal(self, signal: torch.Tensor):
42
+ filtered_signal = self.butterwort_lowpass_filter(signal)
43
+ return torch.abs(self.hilbert(filtered_signal))
44
+
45
+ def setup_low_pass_fn(self, max_freq: int, cutoff: int = 0):
46
+ self.max_freq = int(max_freq)
47
+ cutoff = self.max_freq if cutoff == 0 else cutoff
48
+ self.lp_fn = self.hilbert if self.max_freq in [-1, 1] else self._ft_signal
49
+ self.setup_butterwort_lowpass_coefficients(cutoff)
50
+
51
+ def hilbert(self, signal: Tensor) -> Tensor:
52
+ """Implementing the Hilbert transform manually"""
53
+ N = signal.shape[2] # Signal length
54
+ FFT_signal = torch.fft.fft(signal, axis=2)
55
+ h = torch.zeros_like(
56
+ signal
57
+ ) # Generate an array with the same shape as the signal
58
+
59
+ if N % 2 == 0:
60
+ h[:, 0, 0] = 1
61
+ h[:, 0, N // 2] = 1
62
+ h[:, 0, 1 : N // 2] = 2
63
+ else:
64
+ h[:, 0, 0] = 1
65
+ h[:, 0, 1 : (N + 1) // 2] = 2
66
+
67
+ out: Tensor = torch.fft.ifft(FFT_signal * h, axis=2)
68
+ if self.max_freq == -1:
69
+ return -out.abs()
70
+ return -out.abs()
71
+
72
+ def butterwort_lowpass_filter(self, signal):
73
+ filtered_signal = torch.zeros_like(signal)
74
+ # Applying the filter to the signal
75
+ for n in range(len(signal)):
76
+ if n < 2:
77
+ filtered_signal[n] = self.lp_coef_a[0] * signal[n]
78
+ else:
79
+ filtered_signal[n] = (
80
+ self.lp_coef_b[0] * signal[n]
81
+ + self.lp_coef_b[1] * signal[n - 1]
82
+ + self.lp_coef_b[2] * signal[n - 2]
83
+ - self.lp_coef_a[1] * filtered_signal[n - 1]
84
+ - self.lp_coef_a[2] * filtered_signal[n - 2]
85
+ )
86
+
87
+ return filtered_signal
88
+
89
+ def setup_butterwort_lowpass_coefficients(self, cutoff: int):
90
+ cutoff = torch.tensor([cutoff], dtype=torch.float64)
91
+ fs = torch.tensor([self.sr], dtype=torch.float64)
92
+
93
+ omega = torch.tan(torch.pi * cutoff / fs)
94
+ # Convert float 2 to tensor
95
+ sqrt2 = torch.tensor(2.0, dtype=torch.float64).sqrt()
96
+
97
+ sq_omega = sqrt2 * omega + omega**2
98
+ # Transfer function coefficients using the bilinear transform
99
+ a = 2 * (omega**2 - 1) / (1 + sq_omega)
100
+ self.register_buffer(
101
+ "lp_coef_a",
102
+ torch.tensor(
103
+ [1.0, a.item(), ((1 - sq_omega) / (1 + sq_omega)).item()],
104
+ dtype=torch.float64,
105
+ device=self.device,
106
+ ),
107
+ )
108
+ b = omega**2 / (1 + sq_omega)
109
+ self.register_buffer(
110
+ "lp_coef_b",
111
+ torch.tensor(
112
+ [b.item(), (2 * b).item(), b.item()],
113
+ dtype=torch.float64,
114
+ device=self.device,
115
+ ),
116
+ )
@@ -7,6 +7,7 @@ from lt_tensor.model_base import Model
7
7
  from lt_tensor.model_zoo.convs import ConvNets
8
8
  from torch.nn import functional as F
9
9
  from torchaudio import transforms as T
10
+ from lt_tensor.model_zoo.losses._envelope_disc import Envelope
10
11
 
11
12
  MULTI_DISC_OUT_TYPE: TypeAlias = Tuple[
12
13
  List[Tensor],
@@ -313,7 +314,7 @@ class DiscriminatorS(ConvNets):
313
314
  return x.flatten(1, -1), fmap
314
315
 
315
316
 
316
- class MultiScaleDiscriminator(ConvNets):
317
+ class MultiScaleDiscriminator(_MultiDiscriminatorT):
317
318
  def __init__(
318
319
  self,
319
320
  discriminator_channel_multi: Number = 1,
@@ -352,102 +353,71 @@ class MultiScaleDiscriminator(ConvNets):
352
353
  return y_d_rs, y_d_gs, fmap_rs, fmap_gs
353
354
 
354
355
 
355
- class EnvelopeExtractor(Model):
356
- """Extracts the amplitude envelope of the audio signal."""
356
+ class DiscriminatorE(ConvNets):
357
+ """Modified from: https://github.com/dinhoitt/BemaGANv2/blob/9560ae9df153c956f259c261c57c4f84f89e3d72/models.py"""
357
358
 
358
- def __init__(self, kernel_size=101):
359
- super().__init__()
360
- # Lowpass filter for smoothing envelope (moving average)
361
- self.kernel_size = kernel_size
362
- self.register_buffer("kernel", torch.ones(1, 1, kernel_size) / kernel_size)
363
-
364
- def forward(self, x: Tensor):
365
- # x: (B, 1, T) -> abs(x)
366
- envelope = torch.abs(x)
367
- # Apply low-pass smoothing (via conv1d)
368
- envelope = F.pad(
369
- envelope, (self.kernel_size // 2, self.kernel_size // 2), mode="reflect"
370
- )
371
- envelope = F.conv1d(envelope, self.kernel)
372
- return envelope
373
-
374
-
375
- class DiscriminatorEnvelope(ConvNets):
376
359
  def __init__(
377
360
  self,
378
- use_spectral_norm=False,
361
+ max_freq: int,
379
362
  discriminator_channel_multi: Number = 1,
380
- kernel_size: int = 101,
363
+ sample_rate: int = 24000,
364
+ use_spectral_norm: bool = False,
381
365
  ):
366
+
382
367
  super().__init__()
383
- norm_f = weight_norm if not use_spectral_norm else spectral_norm
384
- self.extractor = EnvelopeExtractor(kernel_size=kernel_size)
368
+ self.max_freq = max_freq
369
+ norm_f = spectral_norm if use_spectral_norm else weight_norm
385
370
  dsc = lambda x: int(x * discriminator_channel_multi)
386
371
  self.convs = nn.ModuleList(
387
372
  [
388
- norm_f(nn.Conv1d(1, dsc(64), 15, stride=1, padding=7)),
389
- norm_f(
390
- nn.Conv1d(dsc(64), dsc(128), 41, stride=2, groups=4, padding=20)
391
- ),
392
- norm_f(
393
- nn.Conv1d(dsc(128), dsc(256), 41, stride=2, groups=16, padding=20)
394
- ),
395
- norm_f(
396
- nn.Conv1d(dsc(256), dsc(512), 41, stride=4, groups=16, padding=20)
397
- ),
398
- norm_f(
399
- nn.Conv1d(dsc(512), dsc(512), 41, stride=4, groups=16, padding=20)
400
- ),
401
- norm_f(nn.Conv1d(dsc(512), dsc(512), 5, stride=1, padding=2)),
373
+ norm_f(nn.Conv1d(1, dsc(128), 15, 1, padding=7)),
374
+ norm_f(nn.Conv1d(dsc(128), dsc(128), 41, 2, groups=4, padding=20)),
375
+ norm_f(nn.Conv1d(dsc(128), dsc(256), 41, 2, groups=16, padding=20)),
376
+ norm_f(nn.Conv1d(dsc(256), dsc(512), 41, 4, groups=16, padding=20)),
377
+ norm_f(nn.Conv1d(dsc(512), dsc(1024), 41, 4, groups=16, padding=20)),
378
+ norm_f(nn.Conv1d(dsc(1024), dsc(1024), 41, 1, groups=16, padding=20)),
379
+ norm_f(nn.Conv1d(dsc(1024), dsc(1024), 5, 1, padding=2)),
402
380
  ]
403
381
  )
404
- self.conv_post = norm_f(nn.Conv1d(dsc(512), 1, 3, stride=1, padding=1))
382
+ self.conv_post = norm_f(nn.Conv1d(dsc(1024), 1, 3, 1, padding=1))
383
+ self.envelope = Envelope(max_freq=self.max_freq, sample_rate=sample_rate)
405
384
  self.activation = nn.LeakyReLU(0.1)
406
385
 
407
- def forward(self, x):
408
- # Input: raw audio (B, 1, T)
409
- x = self.extractor(x)
386
+ def forward(self, x: Tensor):
410
387
  fmap = []
411
- for layer in self.convs:
412
- x = self.activation(layer(x))
388
+ for l in self.convs:
389
+ x = self.envelope(x)
390
+ x = self.activation(l(x))
413
391
  fmap.append(x)
414
392
  x = self.conv_post(x)
415
393
  fmap.append(x)
416
- return x.flatten(1), fmap
394
+ return x.flatten(start_dim=1, end_dim=-1), fmap
417
395
 
418
396
 
419
397
  class MultiEnvelopeDiscriminator(_MultiDiscriminatorT):
398
+ """Modified from: https://github.com/dinhoitt/BemaGANv2/blob/9560ae9df153c956f259c261c57c4f84f89e3d72/models.py"""
399
+
420
400
  def __init__(
421
401
  self,
422
- use_spectral_norm: bool = False,
423
402
  discriminator_channel_multi: Number = 1,
424
403
  ):
425
404
  super().__init__()
405
+ f_times_values = [-1, 0, 1, 300, 500]
426
406
  self.discriminators = nn.ModuleList(
427
- [
428
- DiscriminatorEnvelope(
429
- use_spectral_norm, discriminator_channel_multi
430
- ), # raw envelope
431
- DiscriminatorEnvelope(use_spectral_norm), # downsampled once
432
- DiscriminatorEnvelope(use_spectral_norm), # downsampled twice
433
- ]
434
- )
435
- self.meanpools = nn.ModuleList(
436
- [nn.AvgPool1d(4, 2, padding=2), nn.AvgPool1d(4, 2, padding=2)]
407
+ [DiscriminatorE(f, discriminator_channel_multi) for f in f_times_values]
437
408
  )
438
409
 
439
410
  def forward(self, y, y_hat):
440
- y_d_rs, y_d_gs = [], []
441
- fmap_rs, fmap_gs = [], []
442
- for i, d in enumerate(self.discriminators):
443
- if i != 0:
444
- y = self.meanpools[i - 1](y)
445
- y_hat = self.meanpools[i - 1](y_hat)
411
+ y_d_rs = []
412
+ y_d_gs = []
413
+ fmap_rs = []
414
+ fmap_gs = []
415
+ for d in self.discriminators:
446
416
  y_d_r, fmap_r = d(y)
447
417
  y_d_g, fmap_g = d(y_hat)
448
418
  y_d_rs.append(y_d_r)
449
- y_d_gs.append(y_d_g)
450
419
  fmap_rs.append(fmap_r)
420
+ y_d_gs.append(y_d_g)
451
421
  fmap_gs.append(fmap_g)
452
422
 
453
423
  return y_d_rs, y_d_gs, fmap_rs, fmap_gs
@@ -77,7 +77,7 @@ class AudioProcessorConfig(ModelConfig):
77
77
  def post_process(self):
78
78
  self.n_stft = self.n_fft // 2 + 1
79
79
  # some functions needs this to be a non-zero or not None value.
80
- self.f_min = max(self.f_min, (self.sample_rate / (self.n_fft - 1)) * 2)
80
+ self.default_f_min = max(self.f_min, (self.sample_rate / (self.n_fft - 1)) * 2)
81
81
  self.default_f_max = min(
82
82
  default(self.f_max, self.sample_rate // 2), self.sample_rate // 2
83
83
  )
@@ -354,7 +354,7 @@ class AudioProcessor(Model):
354
354
  sr = default(sr, self.cfg.sample_rate)
355
355
  frame_length = default(frame_length, self.cfg.n_fft)
356
356
  fmin = max(
357
- default(fmin, self.cfg.f_min), self.calc_pitch_fmin(sr, frame_length)
357
+ default(fmin, self.cfg.default_f_min), self.calc_pitch_fmin(sr, frame_length)
358
358
  )
359
359
  fmax = min(max(default(fmax, self.cfg.default_f_max), fmin + 1), sr // 2)
360
360
  hop_length = default(hop_length, self.cfg.hop_length)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lt-tensor
3
- Version: 0.0.1a38
3
+ Version: 0.0.1a39
4
4
  Summary: General utilities for PyTorch and others. Built for general use.
5
5
  Home-page: https://github.com/gr1336/lt-tensor/
6
6
  Author: gr1336
@@ -1,4 +1,4 @@
1
- lt_tensor/__init__.py,sha256=2C0DCdesX13deU-nV8EbiF0poSsHWU0VuZvFcTZhJQk,441
1
+ lt_tensor/__init__.py,sha256=ZSFjEvr0KUkc4jdGzIZbW2b33sET0-bm2W4TrMQuq2c,441
2
2
  lt_tensor/config_templates.py,sha256=F9UvL8paAjkSvio890kp8WznpYeI50pYnm9iqQroBxk,2797
3
3
  lt_tensor/losses.py,sha256=e-YyKMmI0FwWQ3VLfJLDGSH4_rNpnYj0-htuk4eYboE,9283
4
4
  lt_tensor/lr_schedulers.py,sha256=6_vcfaPHrozfH3wvmNEdKSFYl6iTIijYoHL8vuG-45U,7651
@@ -30,14 +30,15 @@ lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=g9tSLjRgl7whafA9aun
30
30
  lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=ITSXHg3c0Um1P2HaPaXkQKI7meG5Ne60wTbyyYju3hY,6360
31
31
  lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=blICjLX_z_IFmR3_TCz_dJiSayLYGza9eG6fd9aKyvE,7448
32
32
  lt_tensor/model_zoo/losses/__init__.py,sha256=B9RAUxBiOZwooztnij1oLeRwZ7_MjnN3mPoum7saD6s,59
33
- lt_tensor/model_zoo/losses/discriminators.py,sha256=o4cicNdOv0jH3ink7jTNeDqOnwmkmRtEj9E7IUIGnEI,31866
33
+ lt_tensor/model_zoo/losses/discriminators.py,sha256=bHzpFh8OCrnpmLgBHDqn615B26NWr_FgQzbqTHg52pI,30953
34
34
  lt_tensor/model_zoo/losses/CQT/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  lt_tensor/model_zoo/losses/CQT/transforms.py,sha256=Vkid0J9dqLnlINfyyUlQf-qB3gOQAgU7W9j7xLOjDFw,13218
36
36
  lt_tensor/model_zoo/losses/CQT/utils.py,sha256=twGw6FVD7V5Ksfx_1BUEN3EP1tAS6wo-9LL3VnuHB8c,16751
37
+ lt_tensor/model_zoo/losses/_envelope_disc/__init__.py,sha256=EIPat8Q1sjxYBKxL3qdLENYtPkVs0RIuIblx2KrtkB0,4503
37
38
  lt_tensor/processors/__init__.py,sha256=Pvxhh0KR65zLCgUd53_k5Z0y5JWWcO0ZBXFK9rv0o5w,109
38
- lt_tensor/processors/audio.py,sha256=QaEbzoCxl7zJNv6ELFwX6AO--8NuOGscgqxwNpV8Czw,23599
39
- lt_tensor-0.0.1a38.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
40
- lt_tensor-0.0.1a38.dist-info/METADATA,sha256=g87aQm1aw-2dlCEvss9CcQ4iNl1Bi_mlqafGIeR1AdU,1071
41
- lt_tensor-0.0.1a38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
- lt_tensor-0.0.1a38.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
43
- lt_tensor-0.0.1a38.dist-info/RECORD,,
39
+ lt_tensor/processors/audio.py,sha256=OwICaDw6_IDsI3zVMUguOInglLIJiracFAnJB_j3nJY,23615
40
+ lt_tensor-0.0.1a39.dist-info/licenses/LICENSE,sha256=TbiyJWLgNqqgqhfCnrGwFIxy7EqGNrIZZcKhHrefcuU,11354
41
+ lt_tensor-0.0.1a39.dist-info/METADATA,sha256=-1ClE2z59FlawBpJkW5J_0EcF93rr9OBk2AiJKDyr5w,1071
42
+ lt_tensor-0.0.1a39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ lt_tensor-0.0.1a39.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
44
+ lt_tensor-0.0.1a39.dist-info/RECORD,,