PyPI - dragon-ml-toolbox - Versions diffs - 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 3.12.6py3-none-any.whl → 4.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (32) hide show

dragon_ml_toolbox-4.1.0.dist-info/METADATA +253 -0
dragon_ml_toolbox-4.1.0.dist-info/RECORD +30 -0
ml_tools/ETL_engineering.py +2 -2
ml_tools/GUI_tools.py +2 -2
ml_tools/MICE_imputation.py +4 -3
ml_tools/ML_callbacks.py +8 -4
ml_tools/ML_evaluation.py +11 -6
ml_tools/ML_inference.py +131 -0
ml_tools/ML_trainer.py +17 -8
ml_tools/PSO_optimization.py +116 -62
ml_tools/RNN_forecast.py +5 -0
ml_tools/SQL.py +272 -0
ml_tools/VIF_factor.py +4 -3
ml_tools/_logger.py +36 -0
ml_tools/_pytorch_models.py +1 -1
ml_tools/_script_info.py +8 -0
ml_tools/{logger.py → custom_logger.py} +4 -66
ml_tools/data_exploration.py +2 -66
ml_tools/datasetmaster.py +3 -2
ml_tools/ensemble_inference.py +249 -0
ml_tools/ensemble_learning.py +40 -294
ml_tools/handle_excel.py +3 -2
ml_tools/keys.py +13 -2
ml_tools/path_manager.py +194 -31
ml_tools/utilities.py +2 -180
dragon_ml_toolbox-3.12.6.dist-info/METADATA +0 -137
dragon_ml_toolbox-3.12.6.dist-info/RECORD +0 -26
ml_tools/ML_tutorial.py +0 -300
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/top_level.txt +0 -0

dragon_ml_toolbox-4.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,253 @@
+Metadata-Version: 2.4
+Name: dragon-ml-toolbox
+Version: 4.1.0
+Summary: A collection of tools for data science and machine learning projects.
+Author-email: Karl Loza <luigiloza@gmail.com>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
+Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+License-File: LICENSE-THIRD-PARTY.md
+Provides-Extra: base
+Requires-Dist: pandas; extra == "base"
+Requires-Dist: numpy; extra == "base"
+Requires-Dist: polars; extra == "base"
+Requires-Dist: joblib; extra == "base"
+Provides-Extra: ml
+Requires-Dist: numpy; extra == "ml"
+Requires-Dist: pandas; extra == "ml"
+Requires-Dist: polars; extra == "ml"
+Requires-Dist: joblib; extra == "ml"
+Requires-Dist: scikit-learn; extra == "ml"
+Requires-Dist: matplotlib; extra == "ml"
+Requires-Dist: seaborn; extra == "ml"
+Requires-Dist: imbalanced-learn; extra == "ml"
+Requires-Dist: ipython; extra == "ml"
+Requires-Dist: ipykernel; extra == "ml"
+Requires-Dist: notebook; extra == "ml"
+Requires-Dist: jupyterlab; extra == "ml"
+Requires-Dist: ipywidgets; extra == "ml"
+Requires-Dist: xgboost; extra == "ml"
+Requires-Dist: lightgbm; extra == "ml"
+Requires-Dist: shap; extra == "ml"
+Requires-Dist: tqdm; extra == "ml"
+Requires-Dist: Pillow; extra == "ml"
+Provides-Extra: mice
+Requires-Dist: numpy<2.0; extra == "mice"
+Requires-Dist: pandas; extra == "mice"
+Requires-Dist: polars; extra == "mice"
+Requires-Dist: joblib; extra == "mice"
+Requires-Dist: miceforest>=6.0.0; extra == "mice"
+Requires-Dist: plotnine>=0.12; extra == "mice"
+Requires-Dist: matplotlib; extra == "mice"
+Requires-Dist: statsmodels; extra == "mice"
+Requires-Dist: lightgbm<=4.5.0; extra == "mice"
+Requires-Dist: shap; extra == "mice"
+Provides-Extra: pytorch
+Requires-Dist: torch; extra == "pytorch"
+Requires-Dist: torchvision; extra == "pytorch"
+Provides-Extra: excel
+Requires-Dist: pandas; extra == "excel"
+Requires-Dist: openpyxl; extra == "excel"
+Requires-Dist: ipython; extra == "excel"
+Requires-Dist: ipykernel; extra == "excel"
+Requires-Dist: notebook; extra == "excel"
+Requires-Dist: jupyterlab; extra == "excel"
+Requires-Dist: ipywidgets; extra == "excel"
+Provides-Extra: gui-boost
+Requires-Dist: numpy; extra == "gui-boost"
+Requires-Dist: joblib; extra == "gui-boost"
+Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-boost"
+Requires-Dist: pyinstaller; extra == "gui-boost"
+Requires-Dist: xgboost; extra == "gui-boost"
+Requires-Dist: lightgbm; extra == "gui-boost"
+Provides-Extra: gui-torch
+Requires-Dist: numpy; extra == "gui-torch"
+Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui-torch"
+Requires-Dist: pyinstaller; extra == "gui-torch"
+Provides-Extra: plot
+Requires-Dist: matplotlib; extra == "plot"
+Requires-Dist: seaborn; extra == "plot"
+Dynamic: license-file
+# dragon-ml-toolbox
+A collection of Python utilities for data science and machine learning, structured as a modular package for easy reuse and installation. This package has no base dependencies, allowing for lightweight and customized virtual environments.
+### Features:
+- Modular scripts for data exploration, logging, machine learning, and more.
+- Designed for seamless integration as a Git submodule or installable Python package.
+## Installation
+**Python 3.10+**
+### Via PyPI
+Install the latest stable release from PyPI:
+```bash
+pip install dragon-ml-toolbox
+```
+### Via GitHub (Editable)
+Clone the repository and install in editable mode with optional dependencies:
+```bash
+git clone https://github.com/DrAg0n-BoRn/ML_tools.git
+cd ML_tools
+pip install -e .
+```
+### Via conda-forge
+Install from the conda-forge channel:
+```bash
+conda install -c conda-forge dragon-ml-toolbox
+```
+## Modular Installation
+### 📦 Core Machine Learning Toolbox [ML]
+Installs a comprehensive set of tools for typical data science workflows, including data manipulation, modeling, and evaluation. PyTorch is required.
+```Bash
+pip install "dragon-ml-toolbox[ML]"
+```
+To install the standard CPU-only versions of Torch and Torchvision:
+```Bash
+pip install "dragon-ml-toolbox[pytorch]"
+```
+⚠️ To make use of GPU acceleration (highly recommended), follow the official instructions: [PyTorch website](https://pytorch.org/get-started/locally/)
+#### Modules:
+```bash
+custom_logger
+data_exploration
+datasetmaster
+ensemble_learning
+ensemble_inference
+ETL_engineering
+ML_callbacks
+ML_evaluation
+ML_trainer
+ML_inference
+path_manager
+PSO_optimization
+SQL
+RNN_forecast
+utilities
+```
+### 🔬 MICE Imputation and Variance Inflation Factor [mice]
+⚠️ Important: This group has strict version requirements. It is highly recommended to install this group in a separate virtual environment.
+```Bash
+pip install "dragon-ml-toolbox[mice]"
+```
+#### Modules:
+```bash
+custom_logger
+MICE_imputation
+VIF_factor
+path_manager
+utilities
+```
+### 📋 Excel File Handling [excel]
+Installs dependencies required to process and handle .xlsx or .xls files.
+```Bash
+pip install "dragon-ml-toolbox[excel]"
+```
+#### Modules:
+```bash
+custom_logger
+handle_excel
+path_manager
+```
+### 🎰 GUI for Boosting Algorithms (XGBoost, LightGBM) [gui-boost]
+For GUIs that include plotting functionality, you must also install the [plot] extra.
+```Bash
+pip install "dragon-ml-toolbox[gui-boost]"
+```
+```Bash
+pip install "dragon-ml-toolbox[gui-boost,plot]"
+```
+#### Modules:
+```bash
+GUI_tools
+ensemble_inference
+path_manager
+```
+### 🤖 GUI for PyTorch Models [gui-torch]
+For GUIs that include plotting functionality, you must also install the [plot] extra.
+```Bash
+pip install "dragon-ml-toolbox[gui-torch]"
+```
+```Bash
+pip install "dragon-ml-toolbox[gui-torch,plot]"
+```
+#### Modules:
+```bash
+GUI_tools
+ML_inference
+path_manager
+```
+### 🎫 Base Tools [base]
+General purpose functions and classes.
+```Bash
+pip install "dragon-ml-toolbox[base]"
+```
+#### Modules:
+```bash
+ETL_Engineering
+custom_logger
+SQL
+utilities
+path_manager
+```
+## Usage
+After installation, import modules like this:
+```python
+from ml_tools.utilities import serialize_object, deserialize_object
+from ml_tools.custom_logger import custom_logger
+```

dragon_ml_toolbox-4.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,30 @@
+dragon_ml_toolbox-4.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-4.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
+ml_tools/ETL_engineering.py,sha256=m_IY-4hSp5X5TfJbWQ-MJNRxkxl4fcsxOnsivMs8tiM,39506
+ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
+ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
+ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
+ml_tools/ML_evaluation.py,sha256=4dVqe6JF1Ukmk1sAcY8E5EG1oB1_oy2HXE5OT-pZwCs,10273
+ml_tools/ML_inference.py,sha256=Fh-X2UQn3AznWBjf-7iPSxwE-EzkGQm1VEIRUAkURmE,5336
+ml_tools/ML_trainer.py,sha256=dJjMfCEEM07Txy9KEH-2srZ3CZUa4lFWTJhpNWQ4Ndk,14974
+ml_tools/PSO_optimization.py,sha256=xtnPute5pkS_w-VvqOBgRLgke09mjfacGC2m9DiipHE,27626
+ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
+ml_tools/SQL.py,sha256=9zzS6AFEJM9aj6nE31hDe8S9TqLonk-J1amwZoiHNbk,10468
+ml_tools/VIF_factor.py,sha256=2nUMupfUoogf8o6ghoFZk_OwWhFXU0R3C9Gj0HOlI14,10415
+ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
+ml_tools/_pytorch_models.py,sha256=ewPPsTHgmRPzMMWwObZOdH1vxm2Ij2VWZP38NC6zSH4,10135
+ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
+ml_tools/custom_logger.py,sha256=a3ywSCQT7j5ypR-usnKh2l861d_aVJ93ZRVqxrHsBBw,4112
+ml_tools/data_exploration.py,sha256=rJhvxUqVbEuB_7HG-PfLH3vaA7hrZEtbVHg9QO9VS4A,22837
+ml_tools/datasetmaster.py,sha256=_tNC2v98eCQGr3nMW_EFs83TRgRme8Uc7ttg1vosmQU,30106
+ml_tools/ensemble_inference.py,sha256=0SNX3YAz5bpvtwYmqEwqyWeIJP2Pb-v-bemENRSO7qg,9426
+ml_tools/ensemble_learning.py,sha256=Zi1oy6G2FWnTI5hBwjlexwF3JKALFS2FN6F8HAlVi_s,35391
+ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,13007
+ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
+ml_tools/path_manager.py,sha256=ElDa25bntANujTjY7xN4ZfCDiZp-9Ud3x0aJSJptZBY,13419
+ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
+dragon_ml_toolbox-4.1.0.dist-info/METADATA,sha256=eJQwYS8B7RMy4H8DveKsDVmj4ikBSJb_hkuTSzmObz4,6278
+dragon_ml_toolbox-4.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-4.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-4.1.0.dist-info/RECORD,,

ml_tools/ETL_engineering.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import polars as pl
 import re
 from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
-from .utilities import _script_info
-from .logger import _LOGGER
+from ._script_info import _script_info
+from ._logger import _LOGGER
 __all__ = [

ml_tools/GUI_tools.py CHANGED Viewed

@@ -4,9 +4,9 @@ import traceback
 import FreeSimpleGUI as sg
 from functools import wraps
 from typing import Any, Dict, Tuple, List, Literal, Union, Optional, Callable
-from .utilities import _script_info
+from ._script_info import _script_info
 import numpy as np
-from .logger import _LOGGER
+from ._logger import _LOGGER
 from .keys import _OneHotOtherPlaceholder

ml_tools/MICE_imputation.py CHANGED Viewed

@@ -3,11 +3,12 @@ import miceforest as mf
 from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
-from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
+from .utilities import load_dataframe, merge_dataframes, save_dataframe, threshold_binary_values
+from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
 from plotnine import ggplot, labs, theme, element_blank # type: ignore
 from typing import Optional, Union
-from .logger import _LOGGER
+from ._logger import _LOGGER
+from ._script_info import _script_info
 __all__ = [
     "apply_mice",

ml_tools/ML_callbacks.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import numpy as np
 import torch
 from tqdm.auto import tqdm
-from .utilities import make_fullpath
+from .path_manager import make_fullpath
 from .keys import LogKeys
-from .logger import _LOGGER
+from ._logger import _LOGGER
 from typing import Optional
+from ._script_info import _script_info
 __all__ = [
@@ -270,7 +271,7 @@ class ModelCheckpoint(Callback):
             self.last_best_filepath = new_filepath
     def _save_rolling_checkpoints(self, epoch, logs):
-        """Saves the latest model and keeps only the last 5."""
+        """Saves the latest model and keeps only the most recent ones."""
         filename = f"epoch_{epoch}.pth"
         filepath = self.save_dir / filename
@@ -334,4 +335,7 @@ class LRScheduler(Callback):
         if current_lr != self.previous_lr:
             _LOGGER.info(f"Epoch {epoch}: Learning rate changed to {current_lr:.6f}")
             self.previous_lr = current_lr
+def info():
+    _script_info(__all__)

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -14,9 +14,10 @@ from sklearn.metrics import (
 import torch
 import shap
 from pathlib import Path
-from .utilities import make_fullpath
-from .logger import _LOGGER
+from .path_manager import make_fullpath
+from ._logger import _LOGGER
 from typing import Union, Optional
+from ._script_info import _script_info
 __all__ = [
@@ -62,7 +63,7 @@ def plot_losses(history: dict, save_dir: Optional[Union[str, Path]] = None):
     plt.tight_layout()
     if save_dir:
-        save_dir_path = make_fullpath(save_dir, make=True)
+        save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
         save_path = save_dir_path / "loss_plot.svg"
         plt.savefig(save_path)
         _LOGGER.info(f"📉 Loss plot saved as '{save_path.name}'")
@@ -88,7 +89,7 @@ def classification_metrics(y_true: np.ndarray, y_pred: np.ndarray, y_prob: Optio
     print(report)
     if save_dir:
-        save_dir_path = make_fullpath(save_dir, make=True)
+        save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
         # Save text report
         report_path = save_dir_path / "classification_report.txt"
         report_path.write_text(report, encoding="utf-8")
@@ -158,7 +159,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Optiona
     print(report_string)
     if save_dir:
-        save_dir_path = make_fullpath(save_dir, make=True)
+        save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
         # Save text report
         report_path = save_dir_path / "regression_report.txt"
         report_path.write_text(report_string)
@@ -220,7 +221,7 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         _LOGGER.info("Using SHAP values for the positive class (class 1) for plots.")
     if save_dir:
-        save_dir_path = make_fullpath(save_dir, make=True)
+        save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
         # Save Bar Plot
         bar_path = save_dir_path / "shap_bar_plot.svg"
         shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="bar", show=False)
@@ -253,3 +254,7 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
     else:
         _LOGGER.info("No save directory provided. Displaying SHAP dot plot.")
         shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="dot")
+def info():
+    _script_info(__all__)

ml_tools/ML_inference.py ADDED Viewed

@@ -0,0 +1,131 @@
+import torch
+from torch import nn
+import numpy as np
+from pathlib import Path
+from typing import Union, Literal, Dict, Any
+from ._script_info import _script_info
+from ._logger import _LOGGER
+from .path_manager import make_fullpath
+from .keys import PyTorchInferenceKeys
+__all__ = [
+    "PyTorchInferenceHandler"
+]
+class PyTorchInferenceHandler:
+    """
+    Handles loading a PyTorch model's state dictionary and performing inference
+    for either regression or classification tasks.
+    """
+    def __init__(self,
+                 model: nn.Module,
+                 state_dict: Union[str, Path],
+                 task: Literal["classification", "regression"],
+                 device: str = 'cpu'):
+        """
+        Initializes the handler by loading a model's state_dict.
+        Args:
+            model (nn.Module): An instantiated PyTorch model with the correct architecture.
+            state_dict (str | Path): The path to the saved .pth model state_dict file.
+            task (str): The type of task, 'regression' or 'classification'.
+            device (str): The device to run inference on ('cpu', 'cuda', 'mps').
+        """
+        self.model = model
+        self.task = task
+        self.device = self._validate_device(device)
+        model_p = make_fullpath(state_dict, enforce="file")
+        try:
+            # Load the state dictionary and apply it to the model structure
+            self.model.load_state_dict(torch.load(model_p, map_location=self.device))
+            self.model.to(self.device)
+            self.model.eval()  # Set the model to evaluation mode
+            _LOGGER.info(f"✅ Model state loaded from '{model_p.name}' and set to evaluation mode.")
+        except Exception as e:
+            _LOGGER.error(f"❌ Failed to load model state from '{model_p}': {e}")
+            raise
+    def _validate_device(self, device: str) -> torch.device:
+        """Validates the selected device and returns a torch.device object."""
+        device_lower = device.lower()
+        if "cuda" in device_lower and not torch.cuda.is_available():
+            _LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
+            device_lower = "cpu"
+        elif device_lower == "mps" and not torch.backends.mps.is_available():
+            _LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
+            device_lower = "cpu"
+        return torch.device(device_lower)
+    def _preprocess_input(self, features: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
+        """Converts input to a torch.Tensor and moves it to the correct device."""
+        if isinstance(features, np.ndarray):
+            features = torch.from_numpy(features).float()
+        # Ensure tensor is on the correct device
+        return features.to(self.device)
+    def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
+        """
+        Predicts on a single feature vector.
+        Args:
+            features (np.ndarray | torch.Tensor): A 1D or 2D array/tensor for a single sample.
+        Returns:
+            Dict[str, Any]: A dictionary containing the prediction.
+                - For regression: {'predictions': float}
+                - For classification: {'labels': int, 'probabilities': np.ndarray}
+        """
+        if features.ndim == 1:
+            features = features.reshape(1, -1)
+        if features.shape[0] != 1:
+            raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
+        results_batch = self.predict_batch(features)
+        # Extract the single result from the batch
+        if self.task == "regression":
+            return {PyTorchInferenceKeys.PREDICTIONS: results_batch[PyTorchInferenceKeys.PREDICTIONS].item()}
+        else: # classification
+            return {
+                PyTorchInferenceKeys.LABELS: results_batch[PyTorchInferenceKeys.LABELS].item(),
+                PyTorchInferenceKeys.PROBABILITIES: results_batch[PyTorchInferenceKeys.PROBABILITIES][0]
+            }
+    def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
+        """
+        Predicts on a batch of feature vectors.
+        Args:
+            features (np.ndarray | torch.Tensor): A 2D array/tensor where each row is a sample.
+        Returns:
+            Dict[str, Any]: A dictionary containing the predictions.
+                - For regression: {'predictions': np.ndarray}
+                - For classification: {'labels': np.ndarray, 'probabilities': np.ndarray}
+        """
+        if features.ndim != 2:
+            raise ValueError("Input for batch prediction must be a 2D array or tensor.")
+        input_tensor = self._preprocess_input(features)
+        with torch.no_grad():
+            output = self.model(input_tensor).cpu()
+            if self.task == "classification":
+                probs = nn.functional.softmax(output, dim=1)
+                labels = torch.argmax(probs, dim=1)
+                return {
+                    PyTorchInferenceKeys.LABELS: labels.numpy(),
+                    PyTorchInferenceKeys.PROBABILITIES: probs.numpy()
+                }
+            else:  # regression
+                return {PyTorchInferenceKeys.PREDICTIONS: output.numpy()}
+def info():
+    _script_info(__all__)

ml_tools/ML_trainer.py CHANGED Viewed

@@ -7,9 +7,9 @@ import numpy as np
 from .ML_callbacks import Callback, History, TqdmProgressBar
 from .ML_evaluation import classification_metrics, regression_metrics, plot_losses, shap_summary_plot
-from .utilities import _script_info
+from ._script_info import _script_info
 from .keys import LogKeys
-from .logger import _LOGGER
+from ._logger import _LOGGER
 __all__ = [
@@ -105,7 +105,7 @@ class MyTrainer:
             pin_memory=(self.device.type == "cuda")
         )
-    def fit(self, epochs: int = 10, batch_size: int = 32, shuffle: bool = True):
+    def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):
         """
         Starts the training-validation process of the model.
@@ -113,6 +113,13 @@ class MyTrainer:
             epochs (int): The total number of epochs to train for.
             batch_size (int): The number of samples per batch.
             shuffle (bool): Whether to shuffle the training data at each epoch.
+        Note:
+            For regression tasks using `nn.MSELoss` or `nn.L1Loss`, the trainer
+            automatically aligns the model's output tensor with the target tensor's
+            shape using `output.view_as(target)`. This handles the common case
+            where a model outputs a shape of `[batch_size, 1]` and the target has a
+            shape of `[batch_size]`.
         """
         self.epochs = epochs
         self._create_dataloaders(batch_size, shuffle)
@@ -189,9 +196,10 @@ class MyTrainer:
         logs = {LogKeys.VAL_LOSS: running_loss / len(self.test_loader.dataset)} # type: ignore
         return logs
-    def predict(self, dataloader: DataLoader):
+    def _predict_for_eval(self, dataloader: DataLoader):
         """
-        Yields model predictions batch by batch, avoids loading all predictions into memory at once.
+        Private method to yield model predictions batch by batch for evaluation.
+        This is used internally by the `evaluate` method.
         Args:
             dataloader (DataLoader): The dataloader to predict on.
@@ -213,13 +221,14 @@ class MyTrainer:
                     preds = torch.argmax(probs, dim=1)
                     y_pred_batch = preds.numpy()
                     y_prob_batch = probs.numpy()
+                # regression
                 else:
                     y_pred_batch = output.numpy()
                     y_prob_batch = None
                 yield y_pred_batch, y_prob_batch, y_true_batch
-    def evaluate(self, data: Optional[Union[DataLoader, Dataset]] = None, save_dir: Optional[Union[str,Path]] = None):
+    def evaluate(self, save_dir: Optional[Union[str,Path]], data: Optional[Union[DataLoader, Dataset]] = None):
         """
         Evaluates the model on the given data.
@@ -251,7 +260,7 @@ class MyTrainer:
         # Collect results from the predict generator
         all_preds, all_probs, all_true = [], [], []
-        for y_pred_b, y_prob_b, y_true_b in self.predict(eval_loader):
+        for y_pred_b, y_prob_b, y_true_b in self._predict_for_eval(eval_loader):
             all_preds.append(y_pred_b)
             if y_prob_b is not None:
                 all_probs.append(y_prob_b)
@@ -270,7 +279,7 @@ class MyTrainer:
         plot_losses(self.history, save_dir=save_dir)
     def explain(self, explain_dataset: Optional[Dataset] = None, n_samples: int = 100,
-                feature_names: Optional[List[str]] = None, save_dir: Optional[str] = None):
+                feature_names: Optional[List[str]] = None, save_dir: Optional[Union[str,Path]] = None):
         """
         Explains model predictions using SHAP and saves all artifacts.

dragon-ml-toolbox 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 3.12.6py3-none-any.whl → 4.1.0py3-none-any.whl