PyPI - dragon-ml-toolbox - Versions diffs - 6.2.0__tar.gz → 6.3.0__tar.gz - Mend

dragon-ml-toolbox 6.2.0tar.gz → 6.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{dragon_ml_toolbox-6.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-6.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 6.2.0
+Version: 6.3.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -160,6 +160,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -178,6 +180,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -194,6 +198,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -215,6 +221,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -236,6 +244,8 @@ ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -254,6 +264,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/README.md RENAMED Viewed

@@ -79,6 +79,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -97,6 +99,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -113,6 +117,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -134,6 +140,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -155,6 +163,8 @@ ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -173,6 +183,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 6.2.0
+Version: 6.3.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -160,6 +160,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -178,6 +180,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -194,6 +198,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -215,6 +221,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -236,6 +244,8 @@ ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -254,6 +264,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/ml_tools/ML_callbacks.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import numpy as np
 import torch
 from tqdm.auto import tqdm
-from .path_manager import make_fullpath
+from .path_manager import make_fullpath, sanitize_filename
 from .keys import PyTorchLogKeys
 from ._logger import _LOGGER
 from typing import Optional
@@ -212,6 +212,8 @@ class ModelCheckpoint(Callback):
         self.monitor = monitor
         self.save_best_only = save_best_only
         self.verbose = verbose
+        if checkpoint_name:
+            checkpoint_name = sanitize_filename(checkpoint_name)
         self.checkpoint_name = checkpoint_name
         # State variables to be managed during training

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/ml_tools/ML_datasetmaster.py RENAMED Viewed

@@ -16,6 +16,7 @@ from pathlib import Path
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
+from .custom_logger import save_list_strings
 # --- public-facing API ---
@@ -144,6 +145,9 @@ class DatasetMaker(_BaseMaker):
         self.features = pandas_df.drop(columns=label_col)
         self.labels_map = None
         self.scaler = None
+        self._feature_names = self.features.columns.tolist()
+        self._target_name = str(self.labels.name)
         self._is_split = False
         self._is_balanced = False
@@ -347,6 +351,23 @@ class DatasetMaker(_BaseMaker):
         if not self._is_split:
              raise RuntimeError("Data has not been split yet. Call .split_data() or .process() first.")
         return self.features_train, self.features_test, self.labels_train, self.labels_test # type: ignore
+    @property
+    def feature_names(self) -> list[str]:
+        """Returns the list of feature column names."""
+        return self._feature_names
+    @property
+    def target_name(self) -> str:
+        """Returns the name of the target column."""
+        return self._target_name
+    def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """Saves a list of feature names as a text file"""
+        save_list_strings(list_strings=self._feature_names,
+                          directory=directory,
+                          filename="feature_names",
+                          verbose=verbose)
     @staticmethod
     def _embed_categorical(cat_df: pandas.DataFrame, random_state: Optional[int] = None, **kwargs) -> pandas.DataFrame:
@@ -413,7 +434,7 @@ class SimpleDatasetMaker:
         target = pandas_df.iloc[:, -1]
         self._feature_names = features.columns.tolist()
-        self._target_name = target.name
+        self._target_name = str(target.name)
         #set id
         self._id: Optional[str] = None
@@ -452,7 +473,7 @@ class SimpleDatasetMaker:
     @property
     def target_name(self) -> str:
         """Returns the name of the target column."""
-        return str(self._target_name)
+        return self._target_name
     @property
     def id(self) -> Optional[str]:
@@ -474,6 +495,13 @@ class SimpleDatasetMaker:
         print(f"  X_test shape:  {self._X_test_shape}")
         print(f"  y_test shape:  {self._y_test_shape}")
         print("-------------------------------------------")
+    def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """Saves a list of feature names as a text file"""
+        save_list_strings(list_strings=self._feature_names,
+                          directory=directory,
+                          filename="feature_names",
+                          verbose=verbose)
 # --- VisionDatasetMaker ---

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/ml_tools/ML_inference.py RENAMED Viewed

@@ -2,7 +2,7 @@ import torch
 from torch import nn
 import numpy as np
 from pathlib import Path
-from typing import Union, Literal, Dict, Any
+from typing import Union, Literal, Dict, Any, Optional
 from ._script_info import _script_info
 from ._logger import _LOGGER
@@ -22,7 +22,8 @@ class PyTorchInferenceHandler:
                  model: nn.Module,
                  state_dict: Union[str, Path],
                  task: Literal["classification", "regression"],
-                 device: str = 'cpu'):
+                 device: str = 'cpu',
+                 target_id: Optional[str]=None):
         """
         Initializes the handler by loading a model's state_dict.
@@ -31,10 +32,12 @@ class PyTorchInferenceHandler:
             state_dict (str | Path): The path to the saved .pth model state_dict file.
             task (str): The type of task, 'regression' or 'classification'.
             device (str): The device to run inference on ('cpu', 'cuda', 'mps').
+            target_id (str | None): Target name as used in the training set.
         """
         self.model = model
         self.task = task
         self.device = self._validate_device(device)
+        self.target_id = target_id
         model_p = make_fullpath(state_dict, enforce="file")

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/ml_tools/ML_models.py RENAMED Viewed

@@ -1,12 +1,18 @@
 import torch
 from torch import nn
 from ._script_info import _script_info
-from typing import List
+from typing import List, Union
+from pathlib import Path
+import json
+from ._logger import _LOGGER
+from .path_manager import make_fullpath
 __all__ = [
     "MultilayerPerceptron",
-    "SequencePredictorLSTM"
+    "SequencePredictorLSTM",
+    "save_architecture",
+    "load_architecture"
 ]
@@ -45,6 +51,12 @@ class MultilayerPerceptron(nn.Module):
             raise TypeError("hidden_layers must be a list of integers.")
         if not (0.0 <= drop_out < 1.0):
             raise ValueError("drop_out must be a float between 0.0 and 1.0.")
+        # --- Save configuration ---
+        self.in_features = in_features
+        self.out_targets = out_targets
+        self.hidden_layers = hidden_layers
+        self.drop_out = drop_out
         # --- Build network layers ---
         layers = []
@@ -67,6 +79,15 @@ class MultilayerPerceptron(nn.Module):
         """Defines the forward pass of the model."""
         return self._layers(x)
+    def get_config(self) -> dict:
+        """Returns the configuration of the model."""
+        return {
+            'in_features': self.in_features,
+            'out_targets': self.out_targets,
+            'hidden_layers': self.hidden_layers,
+            'drop_out': self.drop_out
+        }
     def __repr__(self) -> str:
         """Returns the developer-friendly string representation of the model."""
         # Extracts the number of neurons from each nn.Linear layer
@@ -114,7 +135,14 @@ class SequencePredictorLSTM(nn.Module):
             raise ValueError("recurrent_layers must be a positive integer.")
         if not (0.0 <= dropout < 1.0):
             raise ValueError("dropout must be a float between 0.0 and 1.0.")
+        # --- Save configuration ---
+        self.features = features
+        self.hidden_size = hidden_size
+        self.recurrent_layers = recurrent_layers
+        self.dropout = dropout
+        # Build model
         self.lstm = nn.LSTM(
             input_size=features,
             hidden_size=hidden_size,
@@ -144,6 +172,15 @@ class SequencePredictorLSTM(nn.Module):
         return predictions
+    def get_config(self) -> dict:
+        """Returns the configuration of the model."""
+        return {
+            'features': self.features,
+            'hidden_size': self.hidden_size,
+            'recurrent_layers': self.recurrent_layers,
+            'dropout': self.dropout
+        }
     def __repr__(self) -> str:
         """Returns the developer-friendly string representation of the model."""
         return (
@@ -153,5 +190,80 @@ class SequencePredictorLSTM(nn.Module):
         )
+def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bool=True):
+    """
+    Saves a model's architecture to a 'architecture.json' file.
+    This function relies on the model having a `get_config()` method that
+    returns a dictionary of the arguments needed to initialize it.
+    Args:
+        model (nn.Module): The PyTorch model instance to save.
+        directory (str | Path): The directory to save the JSON file.
+    Raises:
+        AttributeError: If the model does not have a `get_config()` method.
+    """
+    if not hasattr(model, 'get_config'):
+        raise AttributeError(
+            f"Model '{model.__class__.__name__}' does not have a 'get_config()' method. "
+            "Please implement it to return the model's constructor arguments."
+        )
+    # Ensure the target directory exists
+    path_dir = make_fullpath(directory, make=True, enforce="directory")
+    full_path = path_dir / "architecture.json"
+    config = {
+        'model_class': model.__class__.__name__,
+        'config': model.get_config() # type: ignore
+    }
+    with open(full_path, 'w') as f:
+        json.dump(config, f, indent=4)
+    if verbose:
+        _LOGGER.info(f"✅ Architecture for '{model.__class__.__name__}' saved to '{path_dir}'")
+def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
+    """
+    Loads a model architecture from a JSON file.
+    This function instantiates a model by providing an explicit class to use
+    and checking that it matches the class name specified in the file.
+    Args:
+        filepath (Union[str, Path]): The path of the JSON architecture file.
+        expected_model_class (type): The model class expected to load (e.g., MultilayerPerceptron).
+    Returns:
+        nn.Module: An instance of the model with a freshly initialized state.
+    Raises:
+        FileNotFoundError: If the filepath does not exist.
+        ValueError: If the class name in the file does not match the `expected_model_class`.
+    """
+    path_obj = make_fullpath(filepath, enforce="file")
+    with open(path_obj, 'r') as f:
+        saved_data = json.load(f)
+    saved_class_name = saved_data['model_class']
+    config = saved_data['config']
+    if saved_class_name != expected_model_class.__name__:
+        raise ValueError(
+            f"Model class mismatch. File specifies '{saved_class_name}', "
+            f"but you expected '{expected_model_class.__name__}'."
+        )
+    # Create an instance of the model using the provided class and config
+    model = expected_model_class(**config)
+    if verbose:
+        _LOGGER.info(f"✅ Successfully loaded architecture for '{saved_class_name}'")
+    return model
 def info():
     _script_info(__all__)

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/ml_tools/custom_logger.py RENAMED Viewed

@@ -10,7 +10,9 @@ from ._logger import _LOGGER
 __all__ = [
-    "custom_logger"
+    "custom_logger",
+    "save_list_strings",
+    "load_list_strings"
 ]
@@ -136,5 +138,39 @@ def _log_dict_to_json(data: Dict[Any, Any], path: Path) -> None:
         json.dump(data, f, indent=4, ensure_ascii=False)
+def save_list_strings(list_strings: list[str], directory: Union[str,Path], filename: str, verbose: bool=True):
+    """Saves a list of strings as a text file."""
+    target_dir = make_fullpath(directory, make=True, enforce="directory")
+    sanitized_name = sanitize_filename(filename)
+    if not sanitized_name.endswith(".txt"):
+        sanitized_name = sanitized_name + ".txt"
+    full_path = target_dir / sanitized_name
+    with open(full_path, 'w') as f:
+        for string_data in list_strings:
+            f.write(f"{string_data}\n")
+    if verbose:
+        _LOGGER.info(f"✅ Text file saved as '{full_path.name}'.")
+def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[str]:
+    """Loads a text file as a list of strings."""
+    target_path = make_fullpath(text_file, enforce="file")
+    loaded_strings = []
+    with open(target_path, 'r') as f:
+        loaded_strings = [line.strip() for line in f]
+    if len(loaded_strings) == 0:
+        raise ValueError("❌ The text file is empty.")
+    if verbose:
+        _LOGGER.info(f"✅ Text file loaded as list of strings.")
+    return loaded_strings
 def info():
     _script_info(__all__)

{dragon_ml_toolbox-6.2.0 → dragon_ml_toolbox-6.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "6.2.0"
+version = "6.3.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }