dragon-ml-toolbox 12.11.0__py3-none-any.whl → 12.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.11.0
3
+ Version: 12.12.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,15 +1,15 @@
1
- dragon_ml_toolbox-12.11.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-12.11.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-12.12.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-12.12.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
4
  ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
6
6
  ml_tools/MICE_imputation.py,sha256=X273Qlgoqqg7KTmoKd75YDyAPB0UIbTzGP3xsCmRh3E,11717
7
- ml_tools/ML_callbacks.py,sha256=-XRIZEy3CPJWTHcoReyIw53FZlTs3pWcTVVnncTQQSc,13909
8
- ml_tools/ML_datasetmaster.py,sha256=t6q6mU9lz2rYKTVPKjA7yZ5ImV7_NykiciHaYnqIEpA,30822
7
+ ml_tools/ML_callbacks.py,sha256=2ZazJjlbClP-ALc8q0ru2oalkugbhO3TFwPg4RFZpck,14056
8
+ ml_tools/ML_datasetmaster.py,sha256=kedCGneR3S2zui0_JFZN6TBL5e69XWkdpkE_QohyqSM,31433
9
9
  ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,16208
10
10
  ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
11
11
  ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
12
- ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
12
+ ml_tools/ML_models.py,sha256=G64NPhYZfYvHTIUwkIrMrNLgfDTKJwqdc8jwesPqB9E,28090
13
13
  ml_tools/ML_optimization.py,sha256=es3TlQbY7RYgJMZnznkjYGbUxFnAqzZxE_g3_qLK9Q8,22960
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
15
  ml_tools/ML_simple_optimization.py,sha256=W2mce1XFCuiOHTOjOsCNbETISHn5MwYlYsTIXH5hMMo,18177
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=P074YCuZzkqkONnAsM-Zb9DTX_i8cRkkJLpwAWz6CR
35
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
36
  ml_tools/serde.py,sha256=ll2mVC0sO2jIEdG3K6xMcgEN13N4YSb8VjviGvw_ers,4949
37
37
  ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
38
- dragon_ml_toolbox-12.11.0.dist-info/METADATA,sha256=VOs19HzZ0j8xvEuKO9sIMDCGIPPQA22x3Lnh2H9Mw9c,6167
39
- dragon_ml_toolbox-12.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-12.11.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-12.11.0.dist-info/RECORD,,
38
+ dragon_ml_toolbox-12.12.0.dist-info/METADATA,sha256=PKf7t2ojMJs9-6STvqebRBxS_1rWPv58ff0BqPk2d_A,6167
39
+ dragon_ml_toolbox-12.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-12.12.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-12.12.0.dist-info/RECORD,,
ml_tools/ML_callbacks.py CHANGED
@@ -113,18 +113,19 @@ class TqdmProgressBar(Callback):
113
113
  class EarlyStopping(Callback):
114
114
  """
115
115
  Stop training when a monitored metric has stopped improving.
116
-
117
- Args:
118
- monitor (str): Quantity to be monitored. Defaults to 'val_loss'.
119
- min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
120
- patience (int): Number of epochs with no improvement after which training will be stopped.
121
- mode (str): One of {'auto', 'min', 'max'}. In 'min' mode, training will stop when the quantity
122
- monitored has stopped decreasing; in 'max' mode it will stop when the quantity
123
- monitored has stopped increasing; in 'auto' mode, the direction is automatically
124
- inferred from the name of the monitored quantity.
125
- verbose (int): Verbosity mode.
126
116
  """
127
117
  def __init__(self, monitor: str=PyTorchLogKeys.VAL_LOSS, min_delta: float=0.0, patience: int=5, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
118
+ """
119
+ Args:
120
+ monitor (str): Quantity to be monitored. Defaults to 'val_loss'.
121
+ min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
122
+ patience (int): Number of epochs with no improvement after which training will be stopped.
123
+ mode (str): One of {'auto', 'min', 'max'}. In 'min' mode, training will stop when the quantity
124
+ monitored has stopped decreasing; in 'max' mode it will stop when the quantity
125
+ monitored has stopped increasing; in 'auto' mode, the direction is automatically
126
+ inferred from the name of the monitored quantity.
127
+ verbose (int): Verbosity mode.
128
+ """
128
129
  super().__init__()
129
130
  self.monitor = monitor
130
131
  self.patience = patience
@@ -188,22 +189,23 @@ class EarlyStopping(Callback):
188
189
 
189
190
  class ModelCheckpoint(Callback):
190
191
  """
191
- Saves the model to a directory with automated filename generation and rotation. The filename includes the epoch and score.
192
-
193
- - If `save_best_only` is True, it saves the single best model, deleting the
194
- previous best.
195
- - If `save_best_only` is False, it keeps the 3 most recent checkpoints,
196
- deleting the oldest ones automatically.
197
-
198
- Args:
199
- save_dir (str): Directory where checkpoint files will be saved.
200
- monitor (str): Metric to monitor for `save_best_only=True`.
201
- save_best_only (bool): If true, save only the best model.
202
- mode (str): One of {'auto', 'min', 'max'}.
203
- verbose (int): Verbosity mode.
192
+ Saves the model weights to a directory with automated filename generation and rotation.
204
193
  """
205
194
  def __init__(self, save_dir: Union[str,Path], checkpoint_name: Optional[str]=None, monitor: str = PyTorchLogKeys.VAL_LOSS,
206
195
  save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
196
+ """
197
+ - If `save_best_only` is True, it saves the single best model, deleting the previous best.
198
+ - If `save_best_only` is False, it keeps the 3 most recent checkpoints, deleting the oldest ones automatically.
199
+
200
+ Args:
201
+ save_dir (str): Directory where checkpoint files will be saved.
202
+ checkpoint_name (str| None): If None, the filename will include the epoch and score.
203
+ monitor (str): Metric to monitor for `save_best_only=True`.
204
+ save_best_only (bool): If true, save only the best model.
205
+ mode (str): One of {'auto', 'min', 'max'}.
206
+ verbose (int): Verbosity mode.
207
+ """
208
+
207
209
  super().__init__()
208
210
  self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
209
211
  if not self.save_dir.is_dir():
@@ -306,17 +308,16 @@ class ModelCheckpoint(Callback):
306
308
  class LRScheduler(Callback):
307
309
  """
308
310
  Callback to manage a PyTorch learning rate scheduler.
309
-
310
- This callback automatically calls the scheduler's `step()` method at the
311
- end of each epoch. It also logs a message when the learning rate changes.
312
-
313
- Args:
314
- scheduler: An initialized PyTorch learning rate scheduler.
315
- monitor (str, optional): The metric to monitor for schedulers that
316
- require it, like `ReduceLROnPlateau`.
317
- Should match a key in the logs (e.g., 'val_loss').
318
311
  """
319
312
  def __init__(self, scheduler, monitor: Optional[str] = None):
313
+ """
314
+ This callback automatically calls the scheduler's `step()` method at the
315
+ end of each epoch. It also logs a message when the learning rate changes.
316
+
317
+ Args:
318
+ scheduler: An initialized PyTorch learning rate scheduler.
319
+ monitor (str, optional): The metric to monitor for schedulers that require it, like `ReduceLROnPlateau`. Should match a key in the logs (e.g., 'val_loss').
320
+ """
320
321
  super().__init__()
321
322
  self.scheduler = scheduler
322
323
  self.monitor = monitor
@@ -81,8 +81,7 @@ class _PytorchDataset(Dataset):
81
81
  _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
82
82
 
83
83
 
84
- # --- Abstract Base Class (New) ---
85
- # --- Abstract Base Class (Corrected) ---
84
+ # --- Abstract Base Class ---
86
85
  class _BaseDatasetMaker(ABC):
87
86
  """
88
87
  Abstract base class for dataset makers. Contains shared logic for
@@ -150,6 +149,14 @@ class _BaseDatasetMaker(ABC):
150
149
  @property
151
150
  def target_names(self) -> list[str]:
152
151
  return self._target_names
152
+
153
+ @property
154
+ def number_of_features(self) -> int:
155
+ return len(self._feature_names)
156
+
157
+ @property
158
+ def number_of_targets(self) -> int:
159
+ return len(self._target_names)
153
160
 
154
161
  @property
155
162
  def id(self) -> Optional[str]:
@@ -180,14 +187,14 @@ class _BaseDatasetMaker(ABC):
180
187
  filename=DatasetKeys.TARGET_NAMES,
181
188
  verbose=verbose)
182
189
 
183
- def save_scaler(self, save_dir: Union[str, Path], verbose: bool=True) -> None:
190
+ def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
184
191
  """
185
192
  Saves the fitted PytorchScaler's state to a .pth file.
186
193
 
187
194
  The filename is automatically generated based on the dataset id.
188
195
 
189
196
  Args:
190
- save_dir (str | Path): The directory where the scaler will be saved.
197
+ directory (str | Path): The directory where the scaler will be saved.
191
198
  """
192
199
  if not self.scaler:
193
200
  _LOGGER.error("No scaler was fitted or provided.")
@@ -195,7 +202,7 @@ class _BaseDatasetMaker(ABC):
195
202
  if not self.id:
196
203
  _LOGGER.error("Must set the dataset `id` before saving scaler.")
197
204
  raise ValueError()
198
- save_path = make_fullpath(save_dir, make=True, enforce="directory")
205
+ save_path = make_fullpath(directory, make=True, enforce="directory")
199
206
  sanitized_id = sanitize_filename(self.id)
200
207
  filename = f"{DatasetKeys.SCALER_PREFIX}{sanitized_id}.pth"
201
208
  filepath = save_path / filename
@@ -203,6 +210,15 @@ class _BaseDatasetMaker(ABC):
203
210
  if verbose:
204
211
  _LOGGER.info(f"Scaler for dataset '{self.id}' saved as '{filepath.name}'.")
205
212
 
213
+ def save_artifacts(self, directory: Union[str, Path], verbose: bool=True) -> None:
214
+ """
215
+ Convenience method to save feature names, target names, and the scaler (if a scaler was fitted)
216
+ """
217
+ self.save_feature_names(directory=directory, verbose=verbose)
218
+ self.save_target_names(directory=directory, verbose=verbose)
219
+ if self.scaler is not None:
220
+ self.save_scaler(directory=directory, verbose=verbose)
221
+
206
222
 
207
223
  # Single target dataset
208
224
  class DatasetMaker(_BaseDatasetMaker):
ml_tools/ML_models.py CHANGED
@@ -304,7 +304,7 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
304
304
  def __init__(self, *,
305
305
  in_features: int,
306
306
  out_targets: int,
307
- categorical_map: Dict[int, int],
307
+ categorical_index_map: Dict[int, int],
308
308
  embedding_dim: int = 32,
309
309
  num_heads: int = 8,
310
310
  num_layers: int = 6,
@@ -313,7 +313,7 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
313
313
  Args:
314
314
  in_features (int): The total number of columns in the input data (features).
315
315
  out_targets (int): Number of output targets (1 for regression).
316
- categorical_map (Dict[int, int]): Maps categorical column index to its cardinality (number of unique categories).
316
+ categorical_index_map (Dict[int, int]): Maps categorical column index to its cardinality (number of unique categories).
317
317
  embedding_dim (int): The dimension for all feature embeddings. Must be divisible by num_heads.
318
318
  num_heads (int): The number of heads in the multi-head attention mechanism.
319
319
  num_layers (int): The number of sub-encoder-layers in the transformer encoder.
@@ -340,20 +340,20 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
340
340
  super().__init__()
341
341
 
342
342
  # --- Validation ---
343
- if categorical_map and max(categorical_map.keys()) >= in_features:
344
- _LOGGER.error(f"A categorical index ({max(categorical_map.keys())}) is out of bounds for the provided input features ({in_features}).")
343
+ if categorical_index_map and max(categorical_index_map.keys()) >= in_features:
344
+ _LOGGER.error(f"A categorical index ({max(categorical_index_map.keys())}) is out of bounds for the provided input features ({in_features}).")
345
345
  raise ValueError()
346
346
 
347
347
  # --- Derive numerical indices ---
348
348
  all_indices = set(range(in_features))
349
- categorical_indices_set = set(categorical_map.keys())
349
+ categorical_indices_set = set(categorical_index_map.keys())
350
350
  numerical_indices = sorted(list(all_indices - categorical_indices_set))
351
351
 
352
352
  # --- Save configuration ---
353
353
  self.in_features = in_features
354
354
  self.out_targets = out_targets
355
355
  self.numerical_indices = numerical_indices
356
- self.categorical_map = categorical_map
356
+ self.categorical_map = categorical_index_map
357
357
  self.embedding_dim = embedding_dim
358
358
  self.num_heads = num_heads
359
359
  self.num_layers = num_layers
@@ -362,7 +362,7 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
362
362
  # --- 1. Feature Tokenizer ---
363
363
  self.tokenizer = _FeatureTokenizer(
364
364
  numerical_indices=numerical_indices,
365
- categorical_map=categorical_map,
365
+ categorical_map=categorical_index_map,
366
366
  embedding_dim=embedding_dim
367
367
  )
368
368