PyPI - dragon-ml-toolbox - Versions diffs - 10.5.0__tar.gz → 10.7.0__tar.gz - Mend

dragon-ml-toolbox 10.5.0tar.gz → 10.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{dragon_ml_toolbox-10.5.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 10.5.0
+Version: 10.7.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 10.5.0
+Version: 10.7.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_datasetmaster.py RENAMED Viewed

@@ -34,7 +34,9 @@ class _PytorchDataset(Dataset):
     def __init__(self, features: Union[numpy.ndarray, pandas.DataFrame],
                  labels: Union[numpy.ndarray, pandas.Series],
                  labels_dtype: torch.dtype,
-                 features_dtype: torch.dtype = torch.float32):
+                 features_dtype: torch.dtype = torch.float32,
+                 feature_names: Optional[List[str]] = None,
+                 target_names: Optional[List[str]] = None):
         """
         integer labels for classification.
@@ -50,12 +52,30 @@ class _PytorchDataset(Dataset):
             self.labels = torch.tensor(labels, dtype=labels_dtype)
         else:
             self.labels = torch.tensor(labels.values, dtype=labels_dtype)
+        self._feature_names = feature_names
+        self._target_names = target_names
     def __len__(self):
         return len(self.features)
     def __getitem__(self, index):
         return self.features[index], self.labels[index]
+    @property
+    def feature_names(self):
+        if self._feature_names is not None:
+            return self._feature_names
+        else:
+            _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any feature names.")
+            raise ValueError()
+    @property
+    def target_names(self):
+        if self._target_names is not None:
+            return self._target_names
+        else:
+            _LOGGER.error(f"Dataset {self.__class__} has not been initialized with any target names.")
 # --- Abstract Base Class (New) ---
@@ -229,8 +249,8 @@ class DatasetMaker(_BaseDatasetMaker):
         )
         # --- 4. Create Datasets ---
-        self._train_ds = _PytorchDataset(X_train_final, y_train.values, label_dtype)
-        self._test_ds = _PytorchDataset(X_test_final, y_test.values, label_dtype)
+        self._train_ds = _PytorchDataset(X_train_final, y_train.values, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=[self._target_name])
+        self._test_ds = _PytorchDataset(X_test_final, y_test.values, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=[self._target_name])
     @property
     def target_name(self) -> str:
@@ -280,8 +300,8 @@ class DatasetMakerMulti(_BaseDatasetMaker):
             X_train, y_train, X_test, label_dtype, continuous_feature_columns
         )
-        self._train_ds = _PytorchDataset(X_train_final, y_train, label_dtype)
-        self._test_ds = _PytorchDataset(X_test_final, y_test, label_dtype)
+        self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
+        self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
     @property
     def target_names(self) -> list[str]:

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_models.py RENAMED Viewed

@@ -6,6 +6,8 @@ import json
 from ._logger import _LOGGER
 from .path_manager import make_fullpath
 from ._script_info import _script_info
+from .keys import PytorchModelKeys
 __all__ = [
     "MultilayerPerceptron",
@@ -13,12 +15,63 @@ __all__ = [
     "MultiHeadAttentionMLP",
     "TabularTransformer",
     "SequencePredictorLSTM",
-    "save_architecture",
-    "load_architecture"
 ]
-class _BaseMLP(nn.Module):
+class _ArchitectureHandlerMixin:
+    """
+    A mixin class to provide save and load functionality for model architectures.
+    """
+    def save(self: nn.Module, directory: Union[str, Path], verbose: bool = True): # type: ignore
+        """Saves the model's architecture to a JSON file."""
+        if not hasattr(self, 'get_architecture_config'):
+            _LOGGER.error(f"Model '{self.__class__.__name__}' must have a 'get_architecture_config()' method to use this functionality.")
+            raise AttributeError()
+        path_dir = make_fullpath(directory, make=True, enforce="directory")
+        full_path = path_dir / PytorchModelKeys.SAVENAME
+        config = {
+            PytorchModelKeys.MODEL: self.__class__.__name__,
+            PytorchModelKeys.CONFIG: self.get_architecture_config() # type: ignore
+        }
+        with open(full_path, 'w') as f:
+            json.dump(config, f, indent=4)
+        if verbose:
+            _LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved to '{path_dir.name}'")
+    @classmethod
+    def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
+        """Loads a model architecture from a JSON file. If a directory is provided, the function will attempt to load a JSON file inside."""
+        user_path = make_fullpath(file_or_dir)
+        if user_path.is_dir():
+            target_path = make_fullpath(user_path / PytorchModelKeys.SAVENAME, enforce="file")
+        elif user_path.is_file():
+            target_path = user_path
+        else:
+            _LOGGER.error(f"Invalid path: '{file_or_dir}'")
+            raise IOError()
+        with open(target_path, 'r') as f:
+            saved_data = json.load(f)
+        saved_class_name = saved_data[PytorchModelKeys.MODEL]
+        config = saved_data[PytorchModelKeys.CONFIG]
+        if saved_class_name != cls.__name__:
+            _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
+            raise ValueError()
+        model = cls(**config)
+        if verbose:
+            _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
+        return model
+class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
     """
     A base class for Multilayer Perceptrons.
@@ -68,7 +121,7 @@ class _BaseMLP(nn.Module):
         # Set a customizable Prediction Head for flexibility, specially in transfer learning and fine-tuning
         self.output_layer = nn.Linear(current_features, out_targets)
-    def get_config(self) -> Dict[str, Any]:
+    def get_architecture_config(self) -> Dict[str, Any]:
         """Returns the base configuration of the model."""
         return {
             'in_features': self.in_features,
@@ -228,9 +281,9 @@ class MultiHeadAttentionMLP(_BaseMLP):
         return logits, attention_weights
-    def get_config(self) -> Dict[str, Any]:
+    def get_architecture_config(self) -> Dict[str, Any]:
         """Returns the full configuration of the model."""
-        config = super().get_config()
+        config = super().get_architecture_config()
         config['num_heads'] = self.num_heads
         config['attention_dropout'] = self.attention_dropout
         return config
@@ -247,7 +300,7 @@ class MultiHeadAttentionMLP(_BaseMLP):
         return f"MultiHeadAttentionMLP(arch: {arch_str})"
-class TabularTransformer(nn.Module):
+class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
     """
     A Transformer-based model for tabular data tasks.
@@ -357,7 +410,7 @@ class TabularTransformer(nn.Module):
         return logits
-    def get_config(self) -> Dict[str, Any]:
+    def get_architecture_config(self) -> Dict[str, Any]:
         """Returns the full configuration of the model."""
         return {
             'out_targets': self.out_targets,
@@ -529,7 +582,7 @@ class _MultiHeadAttentionLayer(nn.Module):
         return out, attn_weights.squeeze()
-class SequencePredictorLSTM(nn.Module):
+class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
     """
     A simple LSTM-based network for sequence-to-sequence prediction tasks.
@@ -597,7 +650,7 @@ class SequencePredictorLSTM(nn.Module):
         return predictions
-    def get_config(self) -> dict:
+    def get_architecture_config(self) -> dict:
         """Returns the configuration of the model."""
         return {
             'features': self.features,
@@ -615,76 +668,5 @@ class SequencePredictorLSTM(nn.Module):
         )
-def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bool=True):
-    """
-    Saves a model's architecture to a 'architecture.json' file.
-    This function relies on the model having a `get_config()` method that
-    returns a dictionary of the arguments needed to initialize it.
-    Args:
-        model (nn.Module): The PyTorch model instance to save.
-        directory (str | Path): The directory to save the JSON file.
-    Raises:
-        AttributeError: If the model does not have a `get_config()` method.
-    """
-    if not hasattr(model, 'get_config'):
-        _LOGGER.error(f"Model '{model.__class__.__name__}' does not have a 'get_config()' method.")
-        raise AttributeError()
-    # Ensure the target directory exists
-    path_dir = make_fullpath(directory, make=True, enforce="directory")
-    full_path = path_dir / "architecture.json"
-    config = {
-        'model_class': model.__class__.__name__,
-        'config': model.get_config() # type: ignore
-    }
-    with open(full_path, 'w') as f:
-        json.dump(config, f, indent=4)
-    if verbose:
-        _LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
-def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
-    """
-    Loads a model architecture from a JSON file.
-    This function instantiates a model by providing an explicit class to use
-    and checking that it matches the class name specified in the file.
-    Args:
-        filepath (Union[str, Path]): The path of the JSON architecture file.
-        expected_model_class (type): The model class expected to load (e.g., MultilayerPerceptron).
-    Returns:
-        nn.Module: An instance of the model with a freshly initialized state.
-    Raises:
-        FileNotFoundError: If the filepath does not exist.
-        ValueError: If the class name in the file does not match the `expected_model_class`.
-    """
-    path_obj = make_fullpath(filepath, enforce="file")
-    with open(path_obj, 'r') as f:
-        saved_data = json.load(f)
-    saved_class_name = saved_data['model_class']
-    config = saved_data['config']
-    if saved_class_name != expected_model_class.__name__:
-        _LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{expected_model_class.__name__}' was expected.")
-        raise ValueError()
-    # Create an instance of the model using the provided class and config
-    model = expected_model_class(**config)
-    if verbose:
-        _LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
-    return model
 def info():
     _script_info(__all__)

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_scaler.py RENAMED Viewed

@@ -156,7 +156,7 @@ class PytorchScaler:
         Args:
             filepath (str | Path): The path to save the file.
         """
-        path_obj = make_fullpath(filepath)
+        path_obj = make_fullpath(filepath, make=True, enforce="file")
         state = {
             'mean': self.mean_,
             'std': self.std_,

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/ML_trainer.py RENAMED Viewed

@@ -357,7 +357,7 @@ class MLTrainer:
                                                  If None, the trainer's test dataset is used.
             n_samples (int): The number of samples to use for both background and explanation.
             feature_names (list[str] | None): Feature names.
-            target_names (list[str] | None): Target names
+            target_names (list[str] | None): Target names for multi-target tasks.
             save_dir (str | Path): Directory to save all SHAP artifacts.
         """
         # Internal helper to create a dataloader and get a random sample
@@ -408,12 +408,8 @@ class MLTrainer:
             if hasattr(target_dataset, "feature_names"):
                 feature_names = target_dataset.feature_names # type: ignore
             else:
-                try:
-                # Handle PyTorch Subset
-                    feature_names = target_dataset.dataset.feature_names # type: ignore
-                except AttributeError:
-                    _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
-                    raise ValueError()
+                _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
+                raise ValueError()
         # 3. Call the plotting function
         if self.kind in ["regression", "classification"]:

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/ml_tools/keys.py RENAMED Viewed

@@ -38,6 +38,13 @@ class PyTorchInferenceKeys:
     PROBABILITIES = "probabilities"
+class PytorchModelKeys:
+    """Keys for saving and loading models"""
+    MODEL = 'model_class'
+    CONFIG = "config"
+    SAVENAME = "architecture.json"
 class _OneHotOtherPlaceholder:
     """Used internally by GUI_tools."""
     OTHER_GUI = "OTHER"

{dragon_ml_toolbox-10.5.0 → dragon_ml_toolbox-10.7.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "10.5.0"
+version = "10.7.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }