PyPI - dragon-ml-toolbox - Versions diffs - 6.2.1__tar.gz → 6.4.0__tar.gz - Mend

dragon-ml-toolbox 6.2.1tar.gz → 6.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (39) hide show

{dragon_ml_toolbox-6.2.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-6.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 6.2.1
+Version: 6.4.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -160,6 +160,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -178,6 +180,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -194,6 +198,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -215,6 +221,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -232,10 +240,13 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
 ```Bash
 custom_logger
 GUI_tools
+ML_models
 ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -254,6 +265,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0}/README.md RENAMED Viewed

@@ -79,6 +79,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -97,6 +99,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -113,6 +117,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -134,6 +140,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -151,10 +159,13 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
 ```Bash
 custom_logger
 GUI_tools
+ML_models
 ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -173,6 +184,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 6.2.1
+Version: 6.4.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -160,6 +160,8 @@ SQL
 utilities
 ```
+---
 ### 🔬 MICE Imputation and Variance Inflation Factor [mice]
 ⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
@@ -178,6 +180,8 @@ path_manager
 utilities
 ```
+---
 ### 📋 Excel File Handling [excel]
 Installs dependencies required to process and handle .xlsx or .xls files.
@@ -194,6 +198,8 @@ handle_excel
 path_manager
 ```
+---
 ### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -215,6 +221,8 @@ ensemble_inference
 path_manager
 ```
+---
 ### 🤖 GUI for PyTorch Models [gui-torch]
 For GUIs that include plotting functionality, you must also install the [plot] extra.
@@ -232,10 +240,13 @@ pip install "dragon-ml-toolbox[gui-torch,plot]"
 ```Bash
 custom_logger
 GUI_tools
+ML_models
 ML_inference
 path_manager
 ```
+---
 ### 🎫 Base Tools [base]
 General purpose functions and classes.
@@ -254,6 +265,8 @@ utilities
 path_manager
 ```
+---
 ### ⚒️ APP bundlers
 Choose one if needed.

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0}/ml_tools/ML_datasetmaster.py RENAMED Viewed

@@ -16,6 +16,7 @@ from pathlib import Path
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
+from .custom_logger import save_list_strings
 # --- public-facing API ---
@@ -144,6 +145,9 @@ class DatasetMaker(_BaseMaker):
         self.features = pandas_df.drop(columns=label_col)
         self.labels_map = None
         self.scaler = None
+        self._feature_names = self.features.columns.tolist()
+        self._target_name = str(self.labels.name)
         self._is_split = False
         self._is_balanced = False
@@ -347,6 +351,23 @@ class DatasetMaker(_BaseMaker):
         if not self._is_split:
              raise RuntimeError("Data has not been split yet. Call .split_data() or .process() first.")
         return self.features_train, self.features_test, self.labels_train, self.labels_test # type: ignore
+    @property
+    def feature_names(self) -> list[str]:
+        """Returns the list of feature column names."""
+        return self._feature_names
+    @property
+    def target_name(self) -> str:
+        """Returns the name of the target column."""
+        return self._target_name
+    def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """Saves a list of feature names as a text file"""
+        save_list_strings(list_strings=self._feature_names,
+                          directory=directory,
+                          filename="feature_names",
+                          verbose=verbose)
     @staticmethod
     def _embed_categorical(cat_df: pandas.DataFrame, random_state: Optional[int] = None, **kwargs) -> pandas.DataFrame:
@@ -413,7 +434,7 @@ class SimpleDatasetMaker:
         target = pandas_df.iloc[:, -1]
         self._feature_names = features.columns.tolist()
-        self._target_name = target.name
+        self._target_name = str(target.name)
         #set id
         self._id: Optional[str] = None
@@ -452,7 +473,7 @@ class SimpleDatasetMaker:
     @property
     def target_name(self) -> str:
         """Returns the name of the target column."""
-        return str(self._target_name)
+        return self._target_name
     @property
     def id(self) -> Optional[str]:
@@ -474,6 +495,13 @@ class SimpleDatasetMaker:
         print(f"  X_test shape:  {self._X_test_shape}")
         print(f"  y_test shape:  {self._y_test_shape}")
         print("-------------------------------------------")
+    def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """Saves a list of feature names as a text file"""
+        save_list_strings(list_strings=self._feature_names,
+                          directory=directory,
+                          filename="feature_names",
+                          verbose=verbose)
 # --- VisionDatasetMaker ---

dragon_ml_toolbox-6.4.0/ml_tools/ML_inference.py ADDED Viewed

@@ -0,0 +1,287 @@
+import torch
+from torch import nn
+import numpy as np
+from pathlib import Path
+from typing import Union, Literal, Dict, Any, Optional
+from ._script_info import _script_info
+from ._logger import _LOGGER
+from .path_manager import make_fullpath
+from .keys import PyTorchInferenceKeys
+__all__ = [
+    "PyTorchInferenceHandler",
+    "multi_inference_regression",
+    "multi_inference_classification"
+]
+class PyTorchInferenceHandler:
+    """
+    Handles loading a PyTorch model's state dictionary and performing inference
+    for either regression or classification tasks.
+    """
+    def __init__(self,
+                 model: nn.Module,
+                 state_dict: Union[str, Path],
+                 task: Literal["classification", "regression"],
+                 device: str = 'cpu',
+                 target_id: Optional[str]=None):
+        """
+        Initializes the handler by loading a model's state_dict.
+        Args:
+            model (nn.Module): An instantiated PyTorch model with the correct architecture.
+            state_dict (str | Path): The path to the saved .pth model state_dict file.
+            task (str): The type of task, 'regression' or 'classification'.
+            device (str): The device to run inference on ('cpu', 'cuda', 'mps').
+            target_id (str | None): Target name as used in the training set.
+        """
+        self.model = model
+        self.task = task
+        self.device = self._validate_device(device)
+        self.target_id = target_id
+        model_p = make_fullpath(state_dict, enforce="file")
+        try:
+            # Load the state dictionary and apply it to the model structure
+            self.model.load_state_dict(torch.load(model_p, map_location=self.device))
+            self.model.to(self.device)
+            self.model.eval()  # Set the model to evaluation mode
+            _LOGGER.info(f"✅ Model state loaded from '{model_p.name}' and set to evaluation mode.")
+        except Exception as e:
+            _LOGGER.error(f"❌ Failed to load model state from '{model_p}': {e}")
+            raise
+    def _validate_device(self, device: str) -> torch.device:
+        """Validates the selected device and returns a torch.device object."""
+        device_lower = device.lower()
+        if "cuda" in device_lower and not torch.cuda.is_available():
+            _LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
+            device_lower = "cpu"
+        elif device_lower == "mps" and not torch.backends.mps.is_available():
+            _LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
+            device_lower = "cpu"
+        return torch.device(device_lower)
+    def _preprocess_input(self, features: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
+        """Converts input to a torch.Tensor and moves it to the correct device."""
+        if isinstance(features, np.ndarray):
+            features = torch.from_numpy(features).float()
+        # Ensure tensor is on the correct device
+        return features.to(self.device)
+    def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Core batch prediction method. Returns results as PyTorch tensors on the model's device.
+        """
+        if features.ndim != 2:
+            raise ValueError("Input for batch prediction must be a 2D array or tensor.")
+        input_tensor = self._preprocess_input(features)
+        with torch.no_grad():
+            # Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
+            output = self.model(input_tensor)
+            if self.task == "classification":
+                probs = nn.functional.softmax(output, dim=1)
+                labels = torch.argmax(probs, dim=1)
+                return {
+                    PyTorchInferenceKeys.LABELS: labels,
+                    PyTorchInferenceKeys.PROBABILITIES: probs
+                }
+            else:  # regression
+                return {PyTorchInferenceKeys.PREDICTIONS: output}
+    def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Core single-sample prediction. Returns results as PyTorch tensors on the model's device.
+        """
+        if features.ndim == 1:
+            features = features.reshape(1, -1)
+        if features.shape[0] != 1:
+            raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
+        batch_results = self.predict_batch(features)
+        single_results = {key: value[0] for key, value in batch_results.items()}
+        return single_results
+    # --- NumPy Convenience Wrappers (on CPU) ---
+    def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
+        """
+        Convenience wrapper for predict_batch that returns NumPy arrays.
+        """
+        tensor_results = self.predict_batch(features)
+        # Move tensor to CPU before converting to NumPy
+        numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
+        return numpy_results
+    def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
+        """
+        Convenience wrapper for predict that returns NumPy arrays or scalars.
+        """
+        tensor_results = self.predict(features)
+        if self.task == "regression":
+            # .item() implicitly moves to CPU
+            return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
+        else: # classification
+            return {
+                PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
+                # Move tensor to CPU before converting to NumPy
+                PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
+            }
+def multi_inference_regression(handlers: list[PyTorchInferenceHandler],
+                               feature_vector: Union[np.ndarray, torch.Tensor],
+                               output: Literal["numpy","torch"]="numpy") -> dict[str,Any]:
+    """
+    Performs regression inference using multiple models on a single feature vector.
+    This function iterates through a list of PyTorchInferenceHandler objects,
+    each configured for a different regression target. It runs a prediction for
+    each handler using the same input feature vector and returns the results
+    in a dictionary.
+    The function adapts its behavior based on the input dimensions:
+    - 1D input: Returns a dictionary mapping target ID to a single value.
+    - 2D input: Returns a dictionary mapping target ID to a list of values.
+    Args:
+        handlers (list[PyTorchInferenceHandler]): A list of initialized inference
+            handlers. Each handler must have a unique `target_id` and be configured with `task="regression"`.
+        feature_vector (Union[np.ndarray, torch.Tensor]): An input sample (1D) or a batch of samples (2D) to be fed into each regression model.
+        output (Literal["numpy", "torch"], optional): The desired format for the output predictions.
+            - "numpy": Returns predictions as Python scalars or NumPy arrays.
+            - "torch": Returns predictions as PyTorch tensors.
+    Returns:
+        (dict[str, Any]): A dictionary mapping each handler's `target_id` to its
+        predicted regression values.
+    Raises:
+        AttributeError: If any handler in the list is missing a `target_id`.
+        ValueError: If any handler's `task` is not 'regression' or if the input `feature_vector` is not 1D or 2D.
+    """
+    # check batch dimension
+    is_single_sample = feature_vector.ndim == 1
+    # Reshape a 1D vector to a 2D batch of one for uniform processing.
+    if is_single_sample:
+        feature_vector = feature_vector.reshape(1, -1)
+    # Validate that the input is a 2D tensor.
+    if feature_vector.ndim != 2:
+        raise ValueError("Input feature_vector must be a 1D or 2D array/tensor.")
+    results: dict[str,Any] = dict()
+    for handler in handlers:
+        # validation
+        if handler.target_id is None:
+            raise AttributeError("All inference handlers must have a 'target_id' attribute.")
+        if handler.task != "regression":
+            raise ValueError(
+                f"Invalid task type: The handler for target_id '{handler.target_id}' "
+                f"is for '{handler.task}', but only 'regression' tasks are supported."
+            )
+        # inference
+        if output == "numpy":
+            result = handler.predict_batch_numpy(feature_vector)[PyTorchInferenceKeys.PREDICTIONS]
+        else: # torch
+            result = handler.predict_batch(feature_vector)[PyTorchInferenceKeys.PREDICTIONS]
+        # Unpack single results and update result dictionary
+        # If the original input was 1D, extract the single prediction from the array.
+        if is_single_sample:
+            results[handler.target_id] = result[0]
+        else:
+            results[handler.target_id] = result
+    return results
+def multi_inference_classification(
+    handlers: list[PyTorchInferenceHandler],
+    feature_vector: Union[np.ndarray, torch.Tensor],
+    output: Literal["numpy","torch"]="numpy"
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+    """
+    Performs classification inference on a single sample or a batch.
+    This function iterates through a list of PyTorchInferenceHandler objects,
+    each configured for a different classification target. It returns two
+    dictionaries: one for the predicted labels and one for the probabilities.
+    The function adapts its behavior based on the input dimensions:
+    - 1D input: The dictionaries map target ID to a single label and a single probability array.
+    - 2D input: The dictionaries map target ID to an array of labels and an array of probability arrays.
+    Args:
+        handlers (list[PyTorchInferenceHandler]): A list of initialized inference handlers. Each must have a unique `target_id` and be configured
+            with `task="classification"`.
+        feature_vector (Union[np.ndarray, torch.Tensor]): An input sample (1D)
+            or a batch of samples (2D) for prediction.
+        output (Literal["numpy", "torch"], optional): The desired format for the
+            output predictions.
+    Returns:
+        (tuple[dict[str, Any], dict[str, Any]]): A tuple containing two dictionaries:
+        1.  A dictionary mapping `target_id` to the predicted label(s).
+        2.  A dictionary mapping `target_id` to the prediction probabilities.
+    Raises:
+        AttributeError: If any handler in the list is missing a `target_id`.
+        ValueError: If any handler's `task` is not 'classification' or if the input `feature_vector` is not 1D or 2D.
+    """
+    # Store if the original input was a single sample
+    is_single_sample = feature_vector.ndim == 1
+    # Reshape a 1D vector to a 2D batch of one for uniform processing
+    if is_single_sample:
+        feature_vector = feature_vector.reshape(1, -1)
+    if feature_vector.ndim != 2:
+        raise ValueError("Input feature_vector must be a 1D or 2D array/tensor.")
+    # Initialize two dictionaries for results
+    labels_results: dict[str, Any] = dict()
+    probs_results: dict[str, Any] = dict()
+    for handler in handlers:
+        # Validation
+        if handler.target_id is None:
+            raise AttributeError("All inference handlers must have a 'target_id' attribute.")
+        if handler.task != "classification":
+            raise ValueError(
+                f"Invalid task type: The handler for target_id '{handler.target_id}' "
+                f"is for '{handler.task}', but this function only supports 'classification'."
+            )
+        # Always use the batch method to get both labels and probabilities
+        if output == "numpy":
+            result = handler.predict_batch_numpy(feature_vector)
+        else: # torch
+            result = handler.predict_batch(feature_vector)
+        labels = result[PyTorchInferenceKeys.LABELS]
+        probabilities = result[PyTorchInferenceKeys.PROBABILITIES]
+        # If the original input was 1D, unpack the single result from the batch array
+        if is_single_sample:
+            labels_results[handler.target_id] = labels[0]
+            probs_results[handler.target_id] = probabilities[0]
+        else:
+            labels_results[handler.target_id] = labels
+            probs_results[handler.target_id] = probabilities
+    return labels_results, probs_results
+def info():
+    _script_info(__all__)

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0}/ml_tools/ML_models.py RENAMED Viewed

@@ -1,12 +1,18 @@
 import torch
 from torch import nn
 from ._script_info import _script_info
-from typing import List
+from typing import List, Union
+from pathlib import Path
+import json
+from ._logger import _LOGGER
+from .path_manager import make_fullpath
 __all__ = [
     "MultilayerPerceptron",
-    "SequencePredictorLSTM"
+    "SequencePredictorLSTM",
+    "save_architecture",
+    "load_architecture"
 ]
@@ -45,6 +51,12 @@ class MultilayerPerceptron(nn.Module):
             raise TypeError("hidden_layers must be a list of integers.")
         if not (0.0 <= drop_out < 1.0):
             raise ValueError("drop_out must be a float between 0.0 and 1.0.")
+        # --- Save configuration ---
+        self.in_features = in_features
+        self.out_targets = out_targets
+        self.hidden_layers = hidden_layers
+        self.drop_out = drop_out
         # --- Build network layers ---
         layers = []
@@ -67,6 +79,15 @@ class MultilayerPerceptron(nn.Module):
         """Defines the forward pass of the model."""
         return self._layers(x)
+    def get_config(self) -> dict:
+        """Returns the configuration of the model."""
+        return {
+            'in_features': self.in_features,
+            'out_targets': self.out_targets,
+            'hidden_layers': self.hidden_layers,
+            'drop_out': self.drop_out
+        }
     def __repr__(self) -> str:
         """Returns the developer-friendly string representation of the model."""
         # Extracts the number of neurons from each nn.Linear layer
@@ -114,7 +135,14 @@ class SequencePredictorLSTM(nn.Module):
             raise ValueError("recurrent_layers must be a positive integer.")
         if not (0.0 <= dropout < 1.0):
             raise ValueError("dropout must be a float between 0.0 and 1.0.")
+        # --- Save configuration ---
+        self.features = features
+        self.hidden_size = hidden_size
+        self.recurrent_layers = recurrent_layers
+        self.dropout = dropout
+        # Build model
         self.lstm = nn.LSTM(
             input_size=features,
             hidden_size=hidden_size,
@@ -144,6 +172,15 @@ class SequencePredictorLSTM(nn.Module):
         return predictions
+    def get_config(self) -> dict:
+        """Returns the configuration of the model."""
+        return {
+            'features': self.features,
+            'hidden_size': self.hidden_size,
+            'recurrent_layers': self.recurrent_layers,
+            'dropout': self.dropout
+        }
     def __repr__(self) -> str:
         """Returns the developer-friendly string representation of the model."""
         return (
@@ -153,5 +190,80 @@ class SequencePredictorLSTM(nn.Module):
         )
+def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bool=True):
+    """
+    Saves a model's architecture to a 'architecture.json' file.
+    This function relies on the model having a `get_config()` method that
+    returns a dictionary of the arguments needed to initialize it.
+    Args:
+        model (nn.Module): The PyTorch model instance to save.
+        directory (str | Path): The directory to save the JSON file.
+    Raises:
+        AttributeError: If the model does not have a `get_config()` method.
+    """
+    if not hasattr(model, 'get_config'):
+        raise AttributeError(
+            f"Model '{model.__class__.__name__}' does not have a 'get_config()' method. "
+            "Please implement it to return the model's constructor arguments."
+        )
+    # Ensure the target directory exists
+    path_dir = make_fullpath(directory, make=True, enforce="directory")
+    full_path = path_dir / "architecture.json"
+    config = {
+        'model_class': model.__class__.__name__,
+        'config': model.get_config() # type: ignore
+    }
+    with open(full_path, 'w') as f:
+        json.dump(config, f, indent=4)
+    if verbose:
+        _LOGGER.info(f"✅ Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
+def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
+    """
+    Loads a model architecture from a JSON file.
+    This function instantiates a model by providing an explicit class to use
+    and checking that it matches the class name specified in the file.
+    Args:
+        filepath (Union[str, Path]): The path of the JSON architecture file.
+        expected_model_class (type): The model class expected to load (e.g., MultilayerPerceptron).
+    Returns:
+        nn.Module: An instance of the model with a freshly initialized state.
+    Raises:
+        FileNotFoundError: If the filepath does not exist.
+        ValueError: If the class name in the file does not match the `expected_model_class`.
+    """
+    path_obj = make_fullpath(filepath, enforce="file")
+    with open(path_obj, 'r') as f:
+        saved_data = json.load(f)
+    saved_class_name = saved_data['model_class']
+    config = saved_data['config']
+    if saved_class_name != expected_model_class.__name__:
+        raise ValueError(
+            f"Model class mismatch. File specifies '{saved_class_name}', "
+            f"but you expected '{expected_model_class.__name__}'."
+        )
+    # Create an instance of the model using the provided class and config
+    model = expected_model_class(**config)
+    if verbose:
+        _LOGGER.info(f"✅ Successfully loaded architecture for '{saved_class_name}'")
+    return model
 def info():
     _script_info(__all__)

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0}/ml_tools/custom_logger.py RENAMED Viewed

@@ -10,7 +10,9 @@ from ._logger import _LOGGER
 __all__ = [
-    "custom_logger"
+    "custom_logger",
+    "save_list_strings",
+    "load_list_strings"
 ]
@@ -136,5 +138,39 @@ def _log_dict_to_json(data: Dict[Any, Any], path: Path) -> None:
         json.dump(data, f, indent=4, ensure_ascii=False)
+def save_list_strings(list_strings: list[str], directory: Union[str,Path], filename: str, verbose: bool=True):
+    """Saves a list of strings as a text file."""
+    target_dir = make_fullpath(directory, make=True, enforce="directory")
+    sanitized_name = sanitize_filename(filename)
+    if not sanitized_name.endswith(".txt"):
+        sanitized_name = sanitized_name + ".txt"
+    full_path = target_dir / sanitized_name
+    with open(full_path, 'w') as f:
+        for string_data in list_strings:
+            f.write(f"{string_data}\n")
+    if verbose:
+        _LOGGER.info(f"✅ Text file saved as '{full_path.name}'.")
+def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[str]:
+    """Loads a text file as a list of strings."""
+    target_path = make_fullpath(text_file, enforce="file")
+    loaded_strings = []
+    with open(target_path, 'r') as f:
+        loaded_strings = [line.strip() for line in f]
+    if len(loaded_strings) == 0:
+        raise ValueError("❌ The text file is empty.")
+    if verbose:
+        _LOGGER.info(f"✅ Text file loaded as list of strings.")
+    return loaded_strings
 def info():
     _script_info(__all__)

{dragon_ml_toolbox-6.2.1 → dragon_ml_toolbox-6.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "6.2.1"
+version = "6.4.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }

dragon_ml_toolbox-6.2.1/ml_tools/ML_inference.py DELETED Viewed

@@ -1,137 +0,0 @@
-import torch
-from torch import nn
-import numpy as np
-from pathlib import Path
-from typing import Union, Literal, Dict, Any
-from ._script_info import _script_info
-from ._logger import _LOGGER
-from .path_manager import make_fullpath
-from .keys import PyTorchInferenceKeys
-__all__ = [
-    "PyTorchInferenceHandler"
-]
-class PyTorchInferenceHandler:
-    """
-    Handles loading a PyTorch model's state dictionary and performing inference
-    for either regression or classification tasks.
-    """
-    def __init__(self,
-                 model: nn.Module,
-                 state_dict: Union[str, Path],
-                 task: Literal["classification", "regression"],
-                 device: str = 'cpu'):
-        """
-        Initializes the handler by loading a model's state_dict.
-        Args:
-            model (nn.Module): An instantiated PyTorch model with the correct architecture.
-            state_dict (str | Path): The path to the saved .pth model state_dict file.
-            task (str): The type of task, 'regression' or 'classification'.
-            device (str): The device to run inference on ('cpu', 'cuda', 'mps').
-        """
-        self.model = model
-        self.task = task
-        self.device = self._validate_device(device)
-        model_p = make_fullpath(state_dict, enforce="file")
-        try:
-            # Load the state dictionary and apply it to the model structure
-            self.model.load_state_dict(torch.load(model_p, map_location=self.device))
-            self.model.to(self.device)
-            self.model.eval()  # Set the model to evaluation mode
-            _LOGGER.info(f"✅ Model state loaded from '{model_p.name}' and set to evaluation mode.")
-        except Exception as e:
-            _LOGGER.error(f"❌ Failed to load model state from '{model_p}': {e}")
-            raise
-    def _validate_device(self, device: str) -> torch.device:
-        """Validates the selected device and returns a torch.device object."""
-        device_lower = device.lower()
-        if "cuda" in device_lower and not torch.cuda.is_available():
-            _LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
-            device_lower = "cpu"
-        elif device_lower == "mps" and not torch.backends.mps.is_available():
-            _LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
-            device_lower = "cpu"
-        return torch.device(device_lower)
-    def _preprocess_input(self, features: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
-        """Converts input to a torch.Tensor and moves it to the correct device."""
-        if isinstance(features, np.ndarray):
-            features = torch.from_numpy(features).float()
-        # Ensure tensor is on the correct device
-        return features.to(self.device)
-    def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        """
-        Core batch prediction method. Returns results as PyTorch tensors on the model's device.
-        """
-        if features.ndim != 2:
-            raise ValueError("Input for batch prediction must be a 2D array or tensor.")
-        input_tensor = self._preprocess_input(features)
-        with torch.no_grad():
-            # Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
-            output = self.model(input_tensor)
-            if self.task == "classification":
-                probs = nn.functional.softmax(output, dim=1)
-                labels = torch.argmax(probs, dim=1)
-                return {
-                    PyTorchInferenceKeys.LABELS: labels,
-                    PyTorchInferenceKeys.PROBABILITIES: probs
-                }
-            else:  # regression
-                return {PyTorchInferenceKeys.PREDICTIONS: output}
-    def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        """
-        Core single-sample prediction. Returns results as PyTorch tensors on the model's device.
-        """
-        if features.ndim == 1:
-            features = features.reshape(1, -1)
-        if features.shape[0] != 1:
-            raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
-        batch_results = self.predict_batch(features)
-        single_results = {key: value[0] for key, value in batch_results.items()}
-        return single_results
-    # --- NumPy Convenience Wrappers (on CPU) ---
-    def predict_batch_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, np.ndarray]:
-        """
-        Convenience wrapper for predict_batch that returns NumPy arrays.
-        """
-        tensor_results = self.predict_batch(features)
-        # Move tensor to CPU before converting to NumPy
-        numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
-        return numpy_results
-    def predict_numpy(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
-        """
-        Convenience wrapper for predict that returns NumPy arrays or scalars.
-        """
-        tensor_results = self.predict(features)
-        if self.task == "regression":
-            # .item() implicitly moves to CPU
-            return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
-        else: # classification
-            return {
-                PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
-                # ✅ Move tensor to CPU before converting to NumPy
-                PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
-            }
-def info():
-    _script_info(__all__)