dragon-ml-toolbox 10.5.0__tar.gz → 10.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {dragon_ml_toolbox-10.5.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.7.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_datasetmaster.py +25 -5
  4. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_models.py +63 -81
  5. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_scaler.py +1 -1
  6. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_trainer.py +3 -7
  7. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/keys.py +7 -0
  8. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/pyproject.toml +1 -1
  9. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/LICENSE +0 -0
  10. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/LICENSE-THIRD-PARTY.md +0 -0
  11. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/README.md +0 -0
  12. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  13. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  14. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  15. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  16. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ETL_cleaning.py +0 -0
  17. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ETL_engineering.py +0 -0
  18. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/GUI_tools.py +0 -0
  19. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/MICE_imputation.py +0 -0
  20. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_callbacks.py +0 -0
  21. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_evaluation.py +0 -0
  22. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_evaluation_multi.py +0 -0
  23. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_inference.py +0 -0
  24. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_optimization.py +0 -0
  25. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/PSO_optimization.py +0 -0
  26. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/RNN_forecast.py +0 -0
  27. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/SQL.py +0 -0
  28. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/VIF_factor.py +0 -0
  29. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/__init__.py +0 -0
  30. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/_logger.py +0 -0
  31. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/_script_info.py +0 -0
  32. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/custom_logger.py +0 -0
  33. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/data_exploration.py +0 -0
  34. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ensemble_evaluation.py +0 -0
  35. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ensemble_inference.py +0 -0
  36. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ensemble_learning.py +0 -0
  37. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/handle_excel.py +0 -0
  38. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.5.0
3
+ Version: 10.7.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.5.0
3
+ Version: 10.7.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -34,7 +34,9 @@ class _PytorchDataset(Dataset):
34
34
  def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
35
35
  labels: Union[numpy.ndarray, pandas.Series],
36
36
  labels_dtype: torch.dtype,
37
- features_dtype: torch.dtype = torch.float32):
37
+ features_dtype: torch.dtype = torch.float32,
38
+ feature_names: Optional[List[str]] = None,
39
+ target_names: Optional[List[str]] = None):
38
40
  """
39
41
  integer labels for classification.
40
42
 
@@ -50,12 +52,30 @@ class _PytorchDataset(Dataset):
50
52
  self.labels = torch.tensor(labels, dtype=labels_dtype)
51
53
  else:
52
54
  self.labels = torch.tensor(labels.values, dtype=labels_dtype)
55
+
56
+ self._feature_names = feature_names
57
+ self._target_names = target_names
53
58
 
54
59
  def __len__(self):
55
60
  return len(self.features)
56
61
 
57
62
  def __getitem__(self, index):
58
63
  return self.features[index], self.labels[index]
64
+
65
+ @property
66
+ def feature_names(self):
67
+ if self._feature_names is not None:
68
+ return self._feature_names
69
+ else:
70
+ _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
71
+ raise ValueError()
72
+
73
+ @property
74
+ def target_names(self):
75
+ if self._target_names is not None:
76
+ return self._target_names
77
+ else:
78
+ _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
59
79
 
60
80
 
61
81
  # --- Abstract Base Class (New) ---
@@ -229,8 +249,8 @@ class DatasetMaker(_BaseDatasetMaker):
229
249
  )
230
250
 
231
251
  # --- 4. Create Datasets ---
232
- self._train_ds = _PytorchDataset(X_train_final, y_train.values, label_dtype)
233
- self._test_ds = _PytorchDataset(X_test_final, y_test.values, label_dtype)
252
+ self._train_ds = _PytorchDataset(X_train_final, y_train.values, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=[self._target_name])
253
+ self._test_ds = _PytorchDataset(X_test_final, y_test.values, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=[self._target_name])
234
254
 
235
255
  @property
236
256
  def target_name(self) -> str:
@@ -280,8 +300,8 @@ class DatasetMakerMulti(_BaseDatasetMaker):
280
300
  X_train, y_train, X_test, label_dtype, continuous_feature_columns
281
301
  )
282
302
 
283
- self._train_ds = _PytorchDataset(X_train_final, y_train, label_dtype)
284
- self._test_ds = _PytorchDataset(X_test_final, y_test, label_dtype)
303
+ self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
304
+ self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
285
305
 
286
306
  @property
287
307
  def target_names(self) -> list[str]:
@@ -6,6 +6,8 @@ import json
6
6
  from ._logger import _LOGGER
7
7
  from .path_manager import make_fullpath
8
8
  from ._script_info import _script_info
9
+ from .keys import PytorchModelKeys
10
+
9
11
 
10
12
  __all__ = [
11
13
  "MultilayerPerceptron",
@@ -13,12 +15,63 @@ __all__ = [
13
15
  "MultiHeadAttentionMLP",
14
16
  "TabularTransformer",
15
17
  "SequencePredictorLSTM",
16
- "save_architecture",
17
- "load_architecture"
18
18
  ]
19
19
 
20
20
 
21
- class _BaseMLP(nn.Module):
21
+ class _ArchitectureHandlerMixin:
22
+ """
23
+ A mixin class to provide save and load functionality for model architectures.
24
+ """
25
+ def save(self: nn.Module, directory: Union[str, Path], verbose: bool = True): # type: ignore
26
+ """Saves the model's architecture to a JSON file."""
27
+ if not hasattr(self, 'get_architecture_config'):
28
+ _LOGGER.error(f"Model '{self.__class__.__name__}' must have a 'get_architecture_config()' method to use this functionality.")
29
+ raise AttributeError()
30
+
31
+ path_dir = make_fullpath(directory, make=True, enforce="directory")
32
+ full_path = path_dir / PytorchModelKeys.SAVENAME
33
+
34
+ config = {
35
+ PytorchModelKeys.MODEL: self.__class__.__name__,
36
+ PytorchModelKeys.CONFIG: self.get_architecture_config() # type: ignore
37
+ }
38
+
39
+ with open(full_path, 'w') as f:
40
+ json.dump(config, f, indent=4)
41
+
42
+ if verbose:
43
+ _LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved to '{path_dir.name}'")
44
+
45
+ @classmethod
46
+ def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
47
+ """Loads a model architecture from a JSON file. If a directory is provided, the function will attempt to load a JSON file inside."""
48
+ user_path = make_fullpath(file_or_dir)
49
+
50
+ if user_path.is_dir():
51
+ target_path = make_fullpath(user_path / PytorchModelKeys.SAVENAME, enforce="file")
52
+ elif user_path.is_file():
53
+ target_path = user_path
54
+ else:
55
+ _LOGGER.error(f"Invalid path: '{file_or_dir}'")
56
+ raise IOError()
57
+
58
+ with open(target_path, 'r') as f:
59
+ saved_data = json.load(f)
60
+
61
+ saved_class_name = saved_data[PytorchModelKeys.MODEL]
62
+ config = saved_data[PytorchModelKeys.CONFIG]
63
+
64
+ if saved_class_name != cls.__name__:
65
+ _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
66
+ raise ValueError()
67
+
68
+ model = cls(**config)
69
+ if verbose:
70
+ _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
71
+ return model
72
+
73
+
74
+ class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
22
75
  """
23
76
  A base class for Multilayer Perceptrons.
24
77
 
@@ -68,7 +121,7 @@ class _BaseMLP(nn.Module):
68
121
  # Set a customizable Prediction Head for flexibility, specially in transfer learning and fine-tuning
69
122
  self.output_layer = nn.Linear(current_features, out_targets)
70
123
 
71
- def get_config(self) -> Dict[str, Any]:
124
+ def get_architecture_config(self) -> Dict[str, Any]:
72
125
  """Returns the base configuration of the model."""
73
126
  return {
74
127
  'in_features': self.in_features,
@@ -228,9 +281,9 @@ class MultiHeadAttentionMLP(_BaseMLP):
228
281
 
229
282
  return logits, attention_weights
230
283
 
231
- def get_config(self) -> Dict[str, Any]:
284
+ def get_architecture_config(self) -> Dict[str, Any]:
232
285
  """Returns the full configuration of the model."""
233
- config = super().get_config()
286
+ config = super().get_architecture_config()
234
287
  config['num_heads'] = self.num_heads
235
288
  config['attention_dropout'] = self.attention_dropout
236
289
  return config
@@ -247,7 +300,7 @@ class MultiHeadAttentionMLP(_BaseMLP):
247
300
  return f"MultiHeadAttentionMLP(arch: {arch_str})"
248
301
 
249
302
 
250
- class TabularTransformer(nn.Module):
303
+ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
251
304
  """
252
305
  A Transformer-based model for tabular data tasks.
253
306
 
@@ -357,7 +410,7 @@ class TabularTransformer(nn.Module):
357
410
 
358
411
  return logits
359
412
 
360
- def get_config(self) -> Dict[str, Any]:
413
+ def get_architecture_config(self) -> Dict[str, Any]:
361
414
  """Returns the full configuration of the model."""
362
415
  return {
363
416
  'out_targets': self.out_targets,
@@ -529,7 +582,7 @@ class _MultiHeadAttentionLayer(nn.Module):
529
582
  return out, attn_weights.squeeze()
530
583
 
531
584
 
532
- class SequencePredictorLSTM(nn.Module):
585
+ class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
533
586
  """
534
587
  A simple LSTM-based network for sequence-to-sequence prediction tasks.
535
588
 
@@ -597,7 +650,7 @@ class SequencePredictorLSTM(nn.Module):
597
650
 
598
651
  return predictions
599
652
 
600
- def get_config(self) -> dict:
653
+ def get_architecture_config(self) -> dict:
601
654
  """Returns the configuration of the model."""
602
655
  return {
603
656
  'features': self.features,
@@ -615,76 +668,5 @@ class SequencePredictorLSTM(nn.Module):
615
668
  )
616
669
 
617
670
 
618
- def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bool=True):
619
- """
620
- Saves a model's architecture to a 'architecture.json' file.
621
-
622
- This function relies on the model having a `get_config()` method that
623
- returns a dictionary of the arguments needed to initialize it.
624
-
625
- Args:
626
- model (nn.Module): The PyTorch model instance to save.
627
- directory (str | Path): The directory to save the JSON file.
628
-
629
- Raises:
630
- AttributeError: If the model does not have a `get_config()` method.
631
- """
632
- if not hasattr(model, 'get_config'):
633
- _LOGGER.error(f"Model '{model.__class__.__name__}' does not have a 'get_config()' method.")
634
- raise AttributeError()
635
-
636
- # Ensure the target directory exists
637
- path_dir = make_fullpath(directory, make=True, enforce="directory")
638
- full_path = path_dir / "architecture.json"
639
-
640
- config = {
641
- 'model_class': model.__class__.__name__,
642
- 'config': model.get_config() # type: ignore
643
- }
644
-
645
- with open(full_path, 'w') as f:
646
- json.dump(config, f, indent=4)
647
-
648
- if verbose:
649
- _LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
650
-
651
-
652
- def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
653
- """
654
- Loads a model architecture from a JSON file.
655
-
656
- This function instantiates a model by providing an explicit class to use
657
- and checking that it matches the class name specified in the file.
658
-
659
- Args:
660
- filepath (Union[str, Path]): The path of the JSON architecture file.
661
- expected_model_class (type): The model class expected to load (e.g., MultilayerPerceptron).
662
-
663
- Returns:
664
- nn.Module: An instance of the model with a freshly initialized state.
665
-
666
- Raises:
667
- FileNotFoundError: If the filepath does not exist.
668
- ValueError: If the class name in the file does not match the `expected_model_class`.
669
- """
670
- path_obj = make_fullpath(filepath, enforce="file")
671
-
672
- with open(path_obj, 'r') as f:
673
- saved_data = json.load(f)
674
-
675
- saved_class_name = saved_data['model_class']
676
- config = saved_data['config']
677
-
678
- if saved_class_name != expected_model_class.__name__:
679
- _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{expected_model_class.__name__}' was expected.")
680
- raise ValueError()
681
-
682
- # Create an instance of the model using the provided class and config
683
- model = expected_model_class(**config)
684
- if verbose:
685
- _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
686
- return model
687
-
688
-
689
671
  def info():
690
672
  _script_info(__all__)
@@ -156,7 +156,7 @@ class PytorchScaler:
156
156
  Args:
157
157
  filepath (str | Path): The path to save the file.
158
158
  """
159
- path_obj = make_fullpath(filepath)
159
+ path_obj = make_fullpath(filepath, make=True, enforce="file")
160
160
  state = {
161
161
  'mean': self.mean_,
162
162
  'std': self.std_,
@@ -357,7 +357,7 @@ class MLTrainer:
357
357
  If None, the trainer's test dataset is used.
358
358
  n_samples (int): The number of samples to use for both background and explanation.
359
359
  feature_names (list[str] | None): Feature names.
360
- target_names (list[str] | None): Target names
360
+ target_names (list[str] | None): Target names for multi-target tasks.
361
361
  save_dir (str | Path): Directory to save all SHAP artifacts.
362
362
  """
363
363
  # Internal helper to create a dataloader and get a random sample
@@ -408,12 +408,8 @@ class MLTrainer:
408
408
  if hasattr(target_dataset, "feature_names"):
409
409
  feature_names = target_dataset.feature_names # type: ignore
410
410
  else:
411
- try:
412
- # Handle PyTorch Subset
413
- feature_names = target_dataset.dataset.feature_names # type: ignore
414
- except AttributeError:
415
- _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
416
- raise ValueError()
411
+ _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
412
+ raise ValueError()
417
413
 
418
414
  # 3. Call the plotting function
419
415
  if self.kind in ["regression", "classification"]:
@@ -38,6 +38,13 @@ class PyTorchInferenceKeys:
38
38
  PROBABILITIES = "probabilities"
39
39
 
40
40
 
41
+ class PytorchModelKeys:
42
+ """Keys for saving and loading models"""
43
+ MODEL = 'model_class'
44
+ CONFIG = "config"
45
+ SAVENAME = "architecture.json"
46
+
47
+
41
48
  class _OneHotOtherPlaceholder:
42
49
  """Used internally by GUI_tools."""
43
50
  OTHER_GUI = "OTHER"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "10.5.0"
3
+ version = "10.7.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }