lt-tensor 0.0.1a19__py3-none-any.whl → 0.0.1a21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lt_tensor/__init__.py CHANGED
@@ -11,7 +11,6 @@ from . import (
11
11
  noise_tools,
12
12
  losses,
13
13
  processors,
14
- datasets,
15
14
  torch_commons,
16
15
  )
17
16
 
@@ -26,6 +25,5 @@ __all__ = [
26
25
  "noise_tools",
27
26
  "losses",
28
27
  "processors",
29
- "datasets",
30
28
  "torch_commons",
31
29
  ]
@@ -10,49 +10,29 @@ class ModelConfig(ABC, OrderedDict):
10
10
  _forbidden_list: List[str] = [
11
11
  "_default_settings",
12
12
  "_forbidden_list",
13
- "path_name",
14
13
  ]
15
- path: Optional[str] = None
16
14
 
17
15
  def __init__(
18
16
  self,
19
- path: Optional[Union[str, PathLike]] = None,
20
17
  **settings,
21
18
  ):
22
- self._setup_path_name(path)
23
- if self.path is not None:
24
- self._default_settings = load_json(self.path, default=settings)
25
- else:
26
- self._default_settings = settings
27
-
19
+ self._default_settings = settings
28
20
  self.set_state_dict(self._default_settings)
29
21
 
30
- def _setup_path_name(self, path_name: Union[str, PathLike]):
31
- if is_file(path_name):
32
- self.from_path(path_name)
33
- self.path = str(path_name).replace("\\", "/")
34
- elif is_str(path_name):
35
- self.path = str(path_name).replace("\\", "/")
36
- if not self.path.endswith((".json")):
37
- self.path += ".json"
38
-
39
22
  def reset_settings(self):
40
23
  raise NotImplementedError("Not implemented")
41
24
 
42
25
  def save_config(
43
26
  self,
44
- path: Optional[Union[PathLike, str]] = None,
27
+ path: str,
45
28
  ):
46
- if not is_pathlike(path, True):
47
- assert (
48
- path is None
49
- ), f"path_name should be a non-empty string or pathlike object! received instead: {path}."
50
- path = self.path
51
- else:
52
- self._setup_path_name(path)
29
+ base = {
30
+ k: v
31
+ for k, v in self.state_dict().items()
32
+ if isinstance(v, (str, int, float, list, tuple, dict, set, bytes))
33
+ }
53
34
 
54
- base = self.state_dict()
55
- save_json(self.path, base, indent=2)
35
+ save_json(path, base, indent=4)
56
36
 
57
37
  def set_value(self, var_name: str, value: str) -> None:
58
38
  assert var_name not in self._forbidden_list, "Not allowed!"
@@ -76,10 +56,9 @@ class ModelConfig(ABC, OrderedDict):
76
56
  def from_dict(
77
57
  cls,
78
58
  dictionary: Dict[str, Any],
79
- path: Optional[Union[str, PathLike]] = None,
80
59
  ) -> "ModelConfig":
81
60
  assert is_dict(dictionary)
82
- return ModelConfig(path, **dictionary)
61
+ return ModelConfig(**dictionary)
83
62
 
84
63
  @classmethod
85
64
  def from_path(cls, path_name: PathLike) -> "ModelConfig":
@@ -106,4 +85,7 @@ class ModelConfig(ABC, OrderedDict):
106
85
  )
107
86
  assert files, "No config file found in the provided directory!"
108
87
  settings.update(load_json(files[-1], {}, errors="ignore"))
109
- return ModelConfig(path_name, **settings)
88
+ settings.pop("path", None)
89
+ settings.pop("path_name", None)
90
+
91
+ return ModelConfig(**settings)
lt_tensor/model_base.py CHANGED
@@ -207,12 +207,12 @@ class Model(_Devices_Base, ABC):
207
207
  def autocast(self, value: bool):
208
208
  self._autocast = value
209
209
 
210
- def freeze_all(self, exclude: Optional[List[str]] = None):
210
+ def freeze_all(self, exclude: Optional[List[str]] = None, force: bool = False):
211
211
  no_exclusions = not exclude
212
212
  no_exclusions = not exclude
213
213
  results = []
214
214
  for name, module in self.named_modules():
215
- if name not in self.registered_freezable_modules:
215
+ if not force and name not in self.registered_freezable_modules:
216
216
  results.append(
217
217
  (
218
218
  name,
@@ -228,12 +228,13 @@ class Model(_Devices_Base, ABC):
228
228
  results.append((name, "excluded"))
229
229
  return results
230
230
 
231
- def unfreeze_all(self, exclude: Optional[list[str]] = None):
231
+ def unfreeze_all(self, exclude: Optional[list[str]] = None, force: bool = False):
232
232
  """Unfreezes all model parameters except specified layers."""
233
233
  no_exclusions = not exclude
234
234
  results = []
235
235
  for name, module in self.named_modules():
236
- if name not in self.registered_freezable_modules:
236
+ if not force and name not in self.registered_freezable_modules:
237
+
237
238
  results.append(
238
239
  (
239
240
  name,
@@ -27,16 +27,22 @@ class DiffWaveConfig(ModelConfig):
27
27
 
28
28
  def __init__(
29
29
  self,
30
- n_mels = 80,
31
- hop_samples = 256,
32
- residual_layers = 30,
33
- residual_channels = 64,
34
- dilation_cycle_length = 10,
35
- unconditional = False,
30
+ n_mels=80,
31
+ hop_samples=256,
32
+ residual_layers=30,
33
+ residual_channels=64,
34
+ dilation_cycle_length=10,
35
+ unconditional=False,
36
36
  noise_schedule: list[int] = np.linspace(1e-4, 0.05, 50).tolist(),
37
- interpolate_cond = False,
37
+ interpolate_cond=False,
38
38
  interpolation_mode: Literal[
39
- "nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"
39
+ "nearest",
40
+ "linear",
41
+ "bilinear",
42
+ "bicubic",
43
+ "trilinear",
44
+ "area",
45
+ "nearest-exact",
40
46
  ] = "nearest",
41
47
  ):
42
48
  settings = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lt-tensor
3
- Version: 0.0.1a19
3
+ Version: 0.0.1a21
4
4
  Summary: General utilities for PyTorch and others. Built for general use.
5
5
  Home-page: https://github.com/gr1336/lt-tensor/
6
6
  Author: gr1336
@@ -17,7 +17,7 @@ Requires-Dist: numpy>=1.26.4
17
17
  Requires-Dist: tokenizers
18
18
  Requires-Dist: pyyaml>=6.0.0
19
19
  Requires-Dist: numba>0.60.0
20
- Requires-Dist: lt-utils==0.0.2a2
20
+ Requires-Dist: lt-utils==0.0.2
21
21
  Requires-Dist: librosa==0.11.*
22
22
  Requires-Dist: einops
23
23
  Requires-Dist: plotly
@@ -1,16 +1,14 @@
1
- lt_tensor/__init__.py,sha256=XxNCGcVL-haJyMpifr-GRaamo32R6jmqe3iOuS4ecfs,469
2
- lt_tensor/config_templates.py,sha256=9hLt7OLq3z1y8FKNoGY_sIJHHnVoXsLcuI4x2zoE0Q4,3634
1
+ lt_tensor/__init__.py,sha256=8FTxpJ6td2bMr_GqzW2tCV6Tr5CelbQle8N5JRWtx8M,439
2
+ lt_tensor/config_templates.py,sha256=RP7EFVRj6mRUj6xDLe7FMXgN5TIo8_o9h1Kb8epdmfo,2825
3
3
  lt_tensor/losses.py,sha256=zvkCOnE5XpF3v6ymivRIdqPTsMM5zc94ZMom7YDi3zM,4946
4
4
  lt_tensor/lr_schedulers.py,sha256=LSZzqrOOLzSthD8k-W4cYPJt0vCjmHkiJkLr5e3yRTE,3659
5
5
  lt_tensor/math_ops.py,sha256=TkD4WQG42KsQ9Fg7FXOjf8f-ixtW0apf2XjaooecVx4,2257
6
6
  lt_tensor/misc_utils.py,sha256=S57M5XuGsIuaOKnEGZJsY3B2dTmggpdhsqQr51CQsYo,28754
7
- lt_tensor/model_base.py,sha256=J-f-iQ9qGyYD4NkLljyAEkwtHKKbUKIrIpunMiSmh90,19155
7
+ lt_tensor/model_base.py,sha256=TynnD5ctl2kyua3bd_DRa_8pZrAe2QFgtZAYu0QAQgE,19226
8
8
  lt_tensor/monotonic_align.py,sha256=LhBd8p1xdBzg6jQrQX1j7b4PNeYGwIqM24zcU-pHOLE,2239
9
9
  lt_tensor/noise_tools.py,sha256=wFeAsHhLhSlEc5XU5LbFKaXoHeVxrWjiMeljjGdIKyM,11363
10
10
  lt_tensor/torch_commons.py,sha256=fntsEU8lhBQo0ebonI1iXBkMbWMN3HpBsG13EWlP5s8,718
11
11
  lt_tensor/transform.py,sha256=dZm8T_ov0blHMQu6nGiehsdG1VSB7bZBUVmTkT-PBdc,13257
12
- lt_tensor/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- lt_tensor/datasets/audio.py,sha256=5Wvz1BJ7xXkLYpVLLw9RY3X3RgMdPPeGiN0-MmJDQy0,8045
14
12
  lt_tensor/model_zoo/__init__.py,sha256=ltVTvmOlbOCfDc5Trvg0-Ta_Ujgkw0UVF9V5rqHx-RI,378
15
13
  lt_tensor/model_zoo/basic.py,sha256=pI8HyiHK-cmWcEEaVY_EduUJOjZW6HOtXvJd8Rbhq30,15452
16
14
  lt_tensor/model_zoo/features.py,sha256=DO8dlE0kmPKTNC1Xkv9wKegOOYkQa_rkxM4hhcNwJWA,15655
@@ -19,13 +17,13 @@ lt_tensor/model_zoo/pos_encoder.py,sha256=3d1EYLinCU9UAy-WuEWeYMGhMqaGknCiQ5qEmh
19
17
  lt_tensor/model_zoo/residual.py,sha256=i5V4ju7DB3WesKBVm6KH_LyPoKGDUOyo2Usfs-PyP58,9394
20
18
  lt_tensor/model_zoo/transformer.py,sha256=HUFoFFh7EQJErxdd9XIxhssdjvNVx2tNGDJOTUfwG2A,4301
21
19
  lt_tensor/model_zoo/audio_models/__init__.py,sha256=MoG9YjxLyvscq_6njK1ljGBletK9iedBXt66bplzW-s,83
22
- lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=vSrQJ0NXYvTbjOyjLjiMNy95Ib7VO1BJ5UqhoQ7dzYo,8032
20
+ lt_tensor/model_zoo/audio_models/diffwave/__init__.py,sha256=OUyh421xRCcxOMi_Ek6Ak3-FPe1k6WTDQ-6gd6OjaCU,8091
23
21
  lt_tensor/model_zoo/audio_models/hifigan/__init__.py,sha256=JNebaYO3nsyyqpYCCOyL13zY2uxLY3NOCeNynF6-96k,13940
24
22
  lt_tensor/model_zoo/audio_models/istft/__init__.py,sha256=JdFChpPhURaI2qb9mDV6vzDcZN757FBGGtgzN3vxtJ0,14821
25
23
  lt_tensor/processors/__init__.py,sha256=4b9MxAJolXiJfSm20ZEspQTDm1tgLazwlPWA_jB1yLM,63
26
24
  lt_tensor/processors/audio.py,sha256=SMqNSl4Den-x1awTCQ8-TcR-0jPiv5lDaUpU93SRRaw,14749
27
- lt_tensor-0.0.1a19.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
28
- lt_tensor-0.0.1a19.dist-info/METADATA,sha256=lkXND2y0Ue6-y_1LDUcpbPWEJ9jnUG71zJMfcSwKdJs,1033
29
- lt_tensor-0.0.1a19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
- lt_tensor-0.0.1a19.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
31
- lt_tensor-0.0.1a19.dist-info/RECORD,,
25
+ lt_tensor-0.0.1a21.dist-info/licenses/LICENSE,sha256=HUnu_iSPpnDfZS_PINhO3AoVizJD1A2vee8WX7D7uXo,11358
26
+ lt_tensor-0.0.1a21.dist-info/METADATA,sha256=RTOfXJV3E97PYC5Aji5FhNDPxB9ceZjnUU_3QOAx6nQ,1031
27
+ lt_tensor-0.0.1a21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
+ lt_tensor-0.0.1a21.dist-info/top_level.txt,sha256=35FuhFeXnUyvHWdbVHGPh0hS8euofafnJ_GJAVSF4Kk,10
29
+ lt_tensor-0.0.1a21.dist-info/RECORD,,
File without changes
@@ -1,235 +0,0 @@
1
- __all__ = ["WaveMelDataset"]
2
- from lt_tensor.torch_commons import *
3
- from lt_utils.common import *
4
- from lt_utils.misc_utils import default
5
- import random
6
- from torch.utils.data import Dataset, DataLoader, Sampler
7
- from lt_tensor.processors import AudioProcessor
8
- import torch.nn.functional as FT
9
- from lt_tensor.misc_utils import log_tensor
10
- from tqdm import tqdm
11
-
12
-
13
- DEFAULT_DEVICE = torch.tensor([0]).device
14
-
15
-
16
- class WaveMelDataset(Dataset):
17
- cached_data: Union[list[dict[str, Tensor]], Tuple[Tensor, Tensor]] = []
18
- loaded_files: Dict[str, List[Dict[str, Tensor]]] = {}
19
- normalize_waves: bool = False
20
- randomize_ranges: bool = False
21
- alpha_wv: float = 1.0
22
- limit_files: Optional[int] = None
23
- min_frame_length: Optional[int] = None
24
- max_frame_length: Optional[int] = None
25
-
26
- def __init__(
27
- self,
28
- audio_processor: AudioProcessor,
29
- dataset_path: PathLike,
30
- limit_files: Optional[int] = None,
31
- min_frame_length: Optional[int] = None,
32
- max_frame_length: Optional[int] = None,
33
- randomize_ranges: Optional[bool] = None,
34
- pre_load: bool = False,
35
- normalize_waves: Optional[bool] = None,
36
- alpha_wv: Optional[float] = None,
37
- lib_norm: bool = True,
38
- ):
39
- super().__init__()
40
- assert max_frame_length is None or max_frame_length >= (
41
- (audio_processor.n_fft // 2) + 1
42
- )
43
- self.ap = audio_processor
44
- self.dataset_path = dataset_path
45
- if limit_files:
46
- self.limit_files = limit_files
47
- if normalize_waves is not None:
48
- self.normalize_waves = normalize_waves
49
- if alpha_wv is not None:
50
- self.alpha_wv = alpha_wv
51
- if pre_load is not None:
52
- self.pre_loaded = pre_load
53
- if randomize_ranges is not None:
54
- self.randomize_ranges = randomize_ranges
55
-
56
- self.post_n_fft = (audio_processor.n_fft // 2) + 1
57
- self.lib_norm = lib_norm
58
- if max_frame_length is not None:
59
- max_frame_length = max(self.post_n_fft + 1, max_frame_length)
60
- self.r_range = max(self.post_n_fft + 1, max_frame_length // 3)
61
- self.max_frame_length = max_frame_length
62
- if min_frame_length is not None:
63
- self.min_frame_length = max(
64
- self.post_n_fft + 1, min(min_frame_length, max_frame_length)
65
- )
66
-
67
- self.files = self.ap.find_audios(dataset_path, maximum=None)
68
- if limit_files:
69
- random.shuffle(self.files)
70
- self.files = self.files[-self.limit_files :]
71
- if pre_load:
72
- for file in tqdm(self.files, "Loading files"):
73
- results = self.load_data(file)
74
- if not results:
75
- continue
76
- self.cached_data.extend(results)
77
-
78
- def renew_dataset(self, new_path: Optional[PathLike] = None):
79
- new_path = default(new_path, self.dataset_path)
80
- self.files = self.ap.find_audios(new_path, maximum=None)
81
- random.shuffle(self.files)
82
- for file in tqdm(self.files, "Loading files"):
83
- results = self.load_data(file)
84
- if not results:
85
- continue
86
- self.cached_data.extend(results)
87
-
88
- def _add_dict(
89
- self,
90
- audio_wave: Tensor,
91
- audio_mel: Tensor,
92
- pitch: Tensor,
93
- rms: Tensor,
94
- file: PathLike,
95
- ):
96
- return {
97
- "wave": audio_wave,
98
- "pitch": pitch,
99
- "rms": rms,
100
- "mel": audio_mel,
101
- "file": file,
102
- }
103
-
104
- def load_data(self, file: PathLike):
105
- initial_audio = self.ap.load_audio(
106
- file, normalize=self.lib_norm, alpha=self.alpha_wv
107
- )
108
- if self.normalize_waves:
109
- initial_audio = self.ap.normalize_audio(initial_audio)
110
- if initial_audio.shape[-1] < self.post_n_fft:
111
- return None
112
-
113
- if self.min_frame_length is not None:
114
- if self.min_frame_length > initial_audio.shape[-1]:
115
- return None
116
- if (
117
- not self.max_frame_length
118
- or initial_audio.shape[-1] <= self.max_frame_length
119
- ):
120
-
121
- audio_rms = self.ap.compute_rms(initial_audio)
122
- audio_pitch = self.ap.compute_pitch(initial_audio)
123
- audio_mel = self.ap.compute_mel(initial_audio, add_base=True)
124
-
125
- return [
126
- self._add_dict(initial_audio, audio_mel, audio_pitch, audio_rms, file)
127
- ]
128
- results = []
129
-
130
- if self.randomize_ranges:
131
- frame_limit = random.randint(self.r_range, self.max_frame_length)
132
- else:
133
- frame_limit = self.max_frame_length
134
-
135
- fragments = list(
136
- torch.split(initial_audio, split_size_or_sections=frame_limit, dim=-1)
137
- )
138
- random.shuffle(fragments)
139
- for fragment in fragments:
140
- if fragment.shape[-1] < self.post_n_fft:
141
- # Too small
142
- continue
143
- if (
144
- self.min_frame_length is not None
145
- and self.min_frame_length > fragment.shape[-1]
146
- ):
147
- continue
148
-
149
- audio_rms = self.ap.compute_rms(fragment)
150
- audio_pitch = self.ap.compute_pitch(fragment)
151
- audio_mel = self.ap.compute_mel(fragment, add_base=True)
152
- results.append(
153
- self._add_dict(fragment, audio_mel, audio_pitch, audio_rms, file)
154
- )
155
- return results
156
-
157
- def get_data_loader(
158
- self,
159
- batch_size: int = 1,
160
- shuffle: Optional[bool] = None,
161
- sampler: Optional[Union[Sampler, Iterable]] = None,
162
- batch_sampler: Optional[Union[Sampler[list], Iterable[list]]] = None,
163
- num_workers: int = 0,
164
- pin_memory: bool = False,
165
- drop_last: bool = False,
166
- timeout: float = 0,
167
- ):
168
- return DataLoader(
169
- self,
170
- batch_size=batch_size,
171
- shuffle=shuffle,
172
- sampler=sampler,
173
- batch_sampler=batch_sampler,
174
- num_workers=num_workers,
175
- pin_memory=pin_memory,
176
- drop_last=drop_last,
177
- timeout=timeout,
178
- collate_fn=self.collate_fn,
179
- )
180
-
181
- def collate_fn(self, batch: Sequence[Dict[str, Tensor]]):
182
- mel = []
183
- wave = []
184
- file = []
185
- rms = []
186
- pitch = []
187
- for x in batch:
188
- mel.append(x["mel"])
189
- wave.append(x["wave"])
190
- file.append(x["file"])
191
- rms.append(x["rms"])
192
- pitch.append(x["pitch"])
193
- # Find max time in mel (dim -1), and max audio length
194
- max_mel_len = max([m.shape[-1] for m in mel])
195
- max_audio_len = max([a.shape[-1] for a in wave])
196
- max_pitch_len = max([a.shape[-1] for a in pitch])
197
- max_rms_len = max([a.shape[-1] for a in rms])
198
-
199
- padded_mel = torch.stack(
200
- [FT.pad(m, (0, max_mel_len - m.shape[-1])) for m in mel]
201
- ) # shape: [B, 80, T_max]
202
-
203
- padded_wave = torch.stack(
204
- [FT.pad(a, (0, max_audio_len - a.shape[-1])) for a in wave]
205
- ) # shape: [B, L_max]
206
-
207
- padded_pitch = torch.stack(
208
- [FT.pad(a, (0, max_pitch_len - a.shape[-1])) for a in pitch]
209
- ) # shape: [B, L_max]
210
- padded_rms = torch.stack(
211
- [FT.pad(a, (0, max_rms_len - a.shape[-1])) for a in rms]
212
- ) # shape: [B, L_max]
213
- return dict(
214
- mel=padded_mel,
215
- wave=padded_wave,
216
- pitch=padded_pitch,
217
- rms=padded_rms,
218
- file=file,
219
- )
220
-
221
- def get_item(self, idx: int):
222
- if self.pre_loaded:
223
- return self.cached_data[idx]
224
- file = self.files[idx]
225
- if file not in self.loaded_files:
226
- self.loaded_files[file] = self.load_data(file)
227
- return random.choice(self.loaded_files[file])
228
-
229
- def __len__(self):
230
- if self.pre_loaded:
231
- return len(self.cached_data)
232
- return len(self.files)
233
-
234
- def __getitem__(self, index: int):
235
- return self.get_item(index)