PyPI - dragon-ml-toolbox - Versions diffs - 5.3.0__tar.gz → 6.0.0__tar.gz - Mend

dragon-ml-toolbox 5.3.0tar.gz → 6.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (39) hide show

{dragon_ml_toolbox-5.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-6.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.3.0
+Version: 6.0.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -141,19 +141,22 @@ pip install "dragon-ml-toolbox[pytorch]"
 ```bash
 custom_logger
 data_exploration
-ensemble_learning
+ensemble_evaluation
 ensemble_inference
+ensemble_learning
 ETL_engineering
-ML_datasetmaster
-ML_models
 ML_callbacks
+ML_datasetmaster
 ML_evaluation
-ML_trainer
 ML_inference
+ML_models
+ML_optimization
+ML_trainer
+optimization_tools
 path_manager
 PSO_optimization
-SQL
 RNN_forecast
+SQL
 utilities
 ```

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0}/README.md RENAMED Viewed

@@ -60,19 +60,22 @@ pip install "dragon-ml-toolbox[pytorch]"
 ```bash
 custom_logger
 data_exploration
-ensemble_learning
+ensemble_evaluation
 ensemble_inference
+ensemble_learning
 ETL_engineering
-ML_datasetmaster
-ML_models
 ML_callbacks
+ML_datasetmaster
 ML_evaluation
-ML_trainer
 ML_inference
+ML_models
+ML_optimization
+ML_trainer
+optimization_tools
 path_manager
 PSO_optimization
-SQL
 RNN_forecast
+SQL
 utilities
 ```

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.3.0
+Version: 6.0.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -141,19 +141,22 @@ pip install "dragon-ml-toolbox[pytorch]"
 ```bash
 custom_logger
 data_exploration
-ensemble_learning
+ensemble_evaluation
 ensemble_inference
+ensemble_learning
 ETL_engineering
-ML_datasetmaster
-ML_models
 ML_callbacks
+ML_datasetmaster
 ML_evaluation
-ML_trainer
 ML_inference
+ML_models
+ML_optimization
+ML_trainer
+optimization_tools
 path_manager
 PSO_optimization
-SQL
 RNN_forecast
+SQL
 utilities
 ```

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0}/dragon_ml_toolbox.egg-info/SOURCES.txt RENAMED Viewed

@@ -26,6 +26,7 @@ ml_tools/_logger.py
 ml_tools/_script_info.py
 ml_tools/custom_logger.py
 ml_tools/data_exploration.py
+ml_tools/ensemble_evaluation.py
 ml_tools/ensemble_inference.py
 ml_tools/ensemble_learning.py
 ml_tools/handle_excel.py

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0}/ml_tools/ML_callbacks.py RENAMED Viewed

@@ -2,7 +2,7 @@ import numpy as np
 import torch
 from tqdm.auto import tqdm
 from .path_manager import make_fullpath
-from .keys import LogKeys
+from .keys import PyTorchLogKeys
 from ._logger import _LOGGER
 from typing import Optional
 from ._script_info import _script_info
@@ -96,14 +96,14 @@ class TqdmProgressBar(Callback):
     def on_batch_end(self, batch, logs=None):
         self.batch_bar.update(1) # type: ignore
         if logs:
-            self.batch_bar.set_postfix(loss=f"{logs.get(LogKeys.BATCH_LOSS, 0):.4f}") # type: ignore
+            self.batch_bar.set_postfix(loss=f"{logs.get(PyTorchLogKeys.BATCH_LOSS, 0):.4f}") # type: ignore
     def on_epoch_end(self, epoch, logs=None):
         self.batch_bar.close() # type: ignore
         self.epoch_bar.update(1) # type: ignore
         if logs:
-            train_loss_str = f"{logs.get(LogKeys.TRAIN_LOSS, 0):.4f}"
-            val_loss_str = f"{logs.get(LogKeys.VAL_LOSS, 0):.4f}"
+            train_loss_str = f"{logs.get(PyTorchLogKeys.TRAIN_LOSS, 0):.4f}"
+            val_loss_str = f"{logs.get(PyTorchLogKeys.VAL_LOSS, 0):.4f}"
             self.epoch_bar.set_postfix_str(f"Train Loss: {train_loss_str}, Val Loss: {val_loss_str}") # type: ignore
     def on_train_end(self, logs=None):
@@ -124,7 +124,7 @@ class EarlyStopping(Callback):
                     inferred from the name of the monitored quantity.
         verbose (int): Verbosity mode.
     """
-    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
+    def __init__(self, monitor: str=PyTorchLogKeys.VAL_LOSS, min_delta: float=0.0, patience: int=5, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
         super().__init__()
         self.monitor = monitor
         self.patience = patience
@@ -201,8 +201,8 @@ class ModelCheckpoint(Callback):
         mode (str): One of {'auto', 'min', 'max'}.
         verbose (int): Verbosity mode.
     """
-    def __init__(self, save_dir: Union[str,Path], monitor: str = LogKeys.VAL_LOSS,
-                 save_best_only: bool = False, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 1):
+    def __init__(self, save_dir: Union[str,Path], monitor: str = PyTorchLogKeys.VAL_LOSS,
+                 save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
         super().__init__()
         self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
         if not self.save_dir.is_dir():

dragon_ml_toolbox-6.0.0/ml_tools/ML_evaluation.py ADDED Viewed

@@ -0,0 +1,350 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.calibration import CalibrationDisplay
+from sklearn.metrics import (
+    classification_report,
+    ConfusionMatrixDisplay,
+    roc_curve,
+    roc_auc_score,
+    mean_squared_error,
+    mean_absolute_error,
+    r2_score,
+    median_absolute_error,
+    precision_recall_curve,
+    average_precision_score
+)
+import torch
+import shap
+from pathlib import Path
+from .path_manager import make_fullpath
+from ._logger import _LOGGER
+from typing import Union, Optional
+from ._script_info import _script_info
+__all__ = [
+    "plot_losses",
+    "classification_metrics",
+    "regression_metrics",
+    "shap_summary_plot"
+]
+def plot_losses(history: dict, save_dir: Union[str, Path]):
+    """
+    Plots training & validation loss curves from a history object.
+    Args:
+        history (dict): A dictionary containing 'train_loss' and 'val_loss'.
+        save_dir (str | Path): Directory to save the plot image.
+    """
+    train_loss = history.get('train_loss', [])
+    val_loss = history.get('val_loss', [])
+    if not train_loss and not val_loss:
+        print("Warning: Loss history is empty or incomplete. Cannot plot.")
+        return
+    fig, ax = plt.subplots(figsize=(10, 5), dpi=100)
+    # Plot training loss only if data for it exists
+    if train_loss:
+        epochs = range(1, len(train_loss) + 1)
+        ax.plot(epochs, train_loss, 'o-', label='Training Loss')
+    # Plot validation loss only if data for it exists
+    if val_loss:
+        epochs = range(1, len(val_loss) + 1)
+        ax.plot(epochs, val_loss, 'o-', label='Validation Loss')
+    ax.set_title('Training and Validation Loss')
+    ax.set_xlabel('Epochs')
+    ax.set_ylabel('Loss')
+    ax.legend()
+    ax.grid(True)
+    plt.tight_layout()
+    save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    save_path = save_dir_path / "loss_plot.svg"
+    plt.savefig(save_path)
+    _LOGGER.info(f"📉 Loss plot saved as '{save_path.name}'")
+    plt.close(fig)
+def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pred: np.ndarray, y_prob: Optional[np.ndarray] = None,
+                           cmap: str = "Blues"):
+    """
+    Saves classification metrics and plots.
+    Args:
+        y_true (np.ndarray): Ground truth labels.
+        y_pred (np.ndarray): Predicted labels.
+        y_prob (np.ndarray, optional): Predicted probabilities for ROC curve.
+        cmap (str): Colormap for the confusion matrix.
+        save_dir (str | Path): Directory to save plots.
+    """
+    print("--- Classification Report ---")
+    # Generate report as both text and dictionary
+    report_text: str = classification_report(y_true, y_pred) # type: ignore
+    report_dict: dict = classification_report(y_true, y_pred, output_dict=True) # type: ignore
+    print(report_text)
+    save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    # Save text report
+    report_path = save_dir_path / "classification_report.txt"
+    report_path.write_text(report_text, encoding="utf-8")
+    _LOGGER.info(f"📝 Classification report saved as '{report_path.name}'")
+    # --- Save Classification Report Heatmap ---
+    try:
+        plt.figure(figsize=(8, 6), dpi=100)
+        sns.heatmap(pd.DataFrame(report_dict).iloc[:-1, :].T, annot=True, cmap='viridis', fmt='.2f')
+        plt.title("Classification Report")
+        plt.tight_layout()
+        heatmap_path = save_dir_path / "classification_report_heatmap.svg"
+        plt.savefig(heatmap_path)
+        _LOGGER.info(f"📊 Report heatmap saved as '{heatmap_path.name}'")
+        plt.close()
+    except Exception as e:
+        _LOGGER.error(f"❌ Could not generate classification report heatmap: {e}")
+    # Save Confusion Matrix
+    fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=100)
+    ConfusionMatrixDisplay.from_predictions(y_true, y_pred, cmap=cmap, ax=ax_cm)
+    ax_cm.set_title("Confusion Matrix")
+    cm_path = save_dir_path / "confusion_matrix.svg"
+    plt.savefig(cm_path)
+    _LOGGER.info(f"❇️ Confusion matrix saved as '{cm_path.name}'")
+    plt.close(fig_cm)
+    # Plotting logic for ROC and PR Curves
+    if y_prob is not None and y_prob.ndim > 1 and y_prob.shape[1] >= 2:
+        # Use probabilities of the positive class
+        y_score = y_prob[:, 1]
+        # --- Save ROC Curve ---
+        fpr, tpr, _ = roc_curve(y_true, y_score)
+        auc = roc_auc_score(y_true, y_score)
+        fig_roc, ax_roc = plt.subplots(figsize=(6, 6), dpi=100)
+        ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
+        ax_roc.plot([0, 1], [0, 1], 'k--')
+        ax_roc.set_title('Receiver Operating Characteristic (ROC) Curve')
+        ax_roc.set_xlabel('False Positive Rate')
+        ax_roc.set_ylabel('True Positive Rate')
+        ax_roc.legend(loc='lower right')
+        ax_roc.grid(True)
+        roc_path = save_dir_path / "roc_curve.svg"
+        plt.savefig(roc_path)
+        _LOGGER.info(f"📈 ROC curve saved as '{roc_path.name}'")
+        plt.close(fig_roc)
+        # --- Save Precision-Recall Curve ---
+        precision, recall, _ = precision_recall_curve(y_true, y_score)
+        ap_score = average_precision_score(y_true, y_score)
+        fig_pr, ax_pr = plt.subplots(figsize=(6, 6), dpi=100)
+        ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}')
+        ax_pr.set_title('Precision-Recall Curve')
+        ax_pr.set_xlabel('Recall')
+        ax_pr.set_ylabel('Precision')
+        ax_pr.legend(loc='lower left')
+        ax_pr.grid(True)
+        pr_path = save_dir_path / "pr_curve.svg"
+        plt.savefig(pr_path)
+        _LOGGER.info(f"📈 PR curve saved as '{pr_path.name}'")
+        plt.close(fig_pr)
+        # --- Save Calibration Plot ---
+        if y_prob.ndim > 1 and y_prob.shape[1] >= 2:
+            y_score = y_prob[:, 1] # Use probabilities of the positive class
+            fig_cal, ax_cal = plt.subplots(figsize=(8, 8), dpi=100)
+            CalibrationDisplay.from_predictions(y_true, y_score, n_bins=15, ax=ax_cal)
+            ax_cal.set_title('Calibration Plot (Reliability Curve)')
+            ax_cal.set_xlabel('Mean Predicted Probability')
+            ax_cal.set_ylabel('Fraction of Positives')
+            ax_cal.grid(True)
+            plt.tight_layout()
+            cal_path = save_dir_path / "calibration_plot.svg"
+            plt.savefig(cal_path)
+            _LOGGER.info(f"✅ Calibration plot saved as '{cal_path.name}'")
+            plt.close(fig_cal)
+def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[str, Path]):
+    """
+    Saves regression metrics and plots.
+    Args:
+        y_true (np.ndarray): Ground truth values.
+        y_pred (np.ndarray): Predicted values.
+        save_dir (str | Path): Directory to save plots and report.
+    """
+    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
+    mae = mean_absolute_error(y_true, y_pred)
+    r2 = r2_score(y_true, y_pred)
+    medae = median_absolute_error(y_true, y_pred)
+    report_lines = [
+        "--- Regression Report ---",
+        f"  Root Mean Squared Error (RMSE): {rmse:.4f}",
+        f"  Mean Absolute Error (MAE):      {mae:.4f}",
+        f"  Median Absolute Error (MedAE):  {medae:.4f}",
+        f"  Coefficient of Determination (R²): {r2:.4f}"
+    ]
+    report_string = "\n".join(report_lines)
+    print(report_string)
+    save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    # Save text report
+    report_path = save_dir_path / "regression_report.txt"
+    report_path.write_text(report_string)
+    _LOGGER.info(f"📝 Regression report saved as '{report_path.name}'")
+    # Save residual plot
+    residuals = y_true - y_pred
+    fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=100)
+    ax_res.scatter(y_pred, residuals, alpha=0.6)
+    ax_res.axhline(0, color='red', linestyle='--')
+    ax_res.set_xlabel("Predicted Values")
+    ax_res.set_ylabel("Residuals")
+    ax_res.set_title("Residual Plot")
+    ax_res.grid(True)
+    plt.tight_layout()
+    res_path = save_dir_path / "residual_plot.svg"
+    plt.savefig(res_path)
+    _LOGGER.info(f"📈 Residual plot saved as '{res_path.name}'")
+    plt.close(fig_res)
+    # Save true vs predicted plot
+    fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=100)
+    ax_tvp.scatter(y_true, y_pred, alpha=0.6)
+    ax_tvp.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--', lw=2)
+    ax_tvp.set_xlabel('True Values')
+    ax_tvp.set_ylabel('Predictions')
+    ax_tvp.set_title('True vs. Predicted Values')
+    ax_tvp.grid(True)
+    plt.tight_layout()
+    tvp_path = save_dir_path / "true_vs_predicted_plot.svg"
+    plt.savefig(tvp_path)
+    _LOGGER.info(f"📉 True vs. Predicted plot saved as '{tvp_path.name}'")
+    plt.close(fig_tvp)
+    # Save Histogram of Residuals
+    fig_hist, ax_hist = plt.subplots(figsize=(8, 6), dpi=100)
+    sns.histplot(residuals, kde=True, ax=ax_hist)
+    ax_hist.set_xlabel("Residual Value")
+    ax_hist.set_ylabel("Frequency")
+    ax_hist.set_title("Distribution of Residuals")
+    ax_hist.grid(True)
+    plt.tight_layout()
+    hist_path = save_dir_path / "residuals_histogram.svg"
+    plt.savefig(hist_path)
+    _LOGGER.info(f"📊 Residuals histogram saved as '{hist_path.name}'")
+    plt.close(fig_hist)
+def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], instances_to_explain: Union[torch.Tensor,np.ndarray],
+                      feature_names: Optional[list[str]]=None, save_dir: Optional[Union[str, Path]] = None):
+    """
+    Calculates SHAP values and saves summary plots and data.
+    Args:
+        model (nn.Module): The trained PyTorch model.
+        background_data (torch.Tensor): A sample of data for the explainer background.
+        instances_to_explain (torch.Tensor): The specific data instances to explain.
+        feature_names (list of str | None): Names of the features for plot labeling.
+        save_dir (str | Path | None): Directory to save SHAP artifacts. If None, dot plot is shown.
+    """
+    # everything to numpy
+    if isinstance(background_data, np.ndarray):
+        background_data_np = background_data
+    else:
+        background_data_np = background_data.numpy()
+    if isinstance(instances_to_explain, np.ndarray):
+        instances_to_explain_np = instances_to_explain
+    else:
+        instances_to_explain_np = instances_to_explain.numpy()
+    # --- Data Validation Step ---
+    if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
+        _LOGGER.error("❌ Input data for SHAP contains NaN values. Aborting explanation.")
+        return
+    print("\n--- SHAP Value Explanation ---")
+    model.eval()
+    model.cpu()
+    # 1. Summarize the background data.
+    # Summarize the background data using k-means. 10-50 clusters is a good starting point.
+    background_summary = shap.kmeans(background_data_np, 30)
+    # 2. Define a prediction function wrapper that SHAP can use. It must take a numpy array and return a numpy array.
+    def prediction_wrapper(x_np: np.ndarray) -> np.ndarray:
+        # Convert numpy data to torch tensor
+        x_torch = torch.from_numpy(x_np).float()
+        with torch.no_grad():
+            # Get model output
+            output = model(x_torch)
+        # Return as numpy array
+        return output.cpu().numpy().flatten()
+    # 3. Create the KernelExplainer
+    explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
+    print("Calculating SHAP values with KernelExplainer...")
+    shap_values = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
+    if save_dir:
+        save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+        plt.ioff()
+        # Save Bar Plot
+        bar_path = save_dir_path / "shap_bar_plot.svg"
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
+        plt.title("SHAP Feature Importance")
+        plt.tight_layout()
+        plt.savefig(bar_path)
+        _LOGGER.info(f"📊 SHAP bar plot saved as '{bar_path.name}'")
+        plt.close()
+        # Save Dot Plot
+        dot_path = save_dir_path / "shap_dot_plot.svg"
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
+        plt.title("SHAP Feature Importance")
+        plt.tight_layout()
+        plt.savefig(dot_path)
+        _LOGGER.info(f"📊 SHAP dot plot saved as '{dot_path.name}'")
+        plt.close()
+        # Save Summary Data to CSV
+        summary_path = save_dir_path / "shap_summary.csv"
+        # Ensure the array is 1D before creating the DataFrame
+        mean_abs_shap = np.abs(shap_values).mean(axis=0).flatten()
+        if feature_names is None:
+            feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
+        summary_df = pd.DataFrame({
+            'feature': feature_names,
+            'mean_abs_shap_value': mean_abs_shap
+        }).sort_values('mean_abs_shap_value', ascending=False)
+        summary_df.to_csv(summary_path, index=False)
+        _LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
+        plt.ion()
+    else:
+        _LOGGER.info("No save directory provided. Displaying SHAP dot plot.")
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot")
+def info():
+    _script_info(__all__)

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0}/ml_tools/ML_trainer.py RENAMED Viewed

@@ -8,16 +8,16 @@ import numpy as np
 from .ML_callbacks import Callback, History, TqdmProgressBar
 from .ML_evaluation import classification_metrics, regression_metrics, plot_losses, shap_summary_plot
 from ._script_info import _script_info
-from .keys import LogKeys
+from .keys import PyTorchLogKeys
 from ._logger import _LOGGER
 __all__ = [
-    "MyTrainer"
+    "MLTrainer"
 ]
-class MyTrainer:
+class MLTrainer:
     def __init__(self, model: nn.Module, train_dataset: Dataset, test_dataset: Dataset,
                  kind: Literal["regression", "classification"],
                  criterion: nn.Module, optimizer: torch.optim.Optimizer,
@@ -95,14 +95,16 @@ class MyTrainer:
             batch_size=batch_size,
             shuffle=shuffle,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type),
+            drop_last=True  # Drops the last batch if incomplete, selecting a good batch size is key.
         )
         self.test_loader = DataLoader(
             dataset=self.test_dataset,
             batch_size=batch_size,
             shuffle=False,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type)
         )
     def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):
@@ -159,8 +161,8 @@ class MyTrainer:
         for batch_idx, (features, target) in enumerate(self.train_loader): # type: ignore
             # Create a log dictionary for the batch
             batch_logs = {
-                LogKeys.BATCH_INDEX: batch_idx,
-                LogKeys.BATCH_SIZE: features.size(0)
+                PyTorchLogKeys.BATCH_INDEX: batch_idx,
+                PyTorchLogKeys.BATCH_SIZE: features.size(0)
             }
             self.callbacks_hook('on_batch_begin', batch_idx, logs=batch_logs)
@@ -178,11 +180,11 @@ class MyTrainer:
             running_loss += batch_loss * features.size(0)
             # Add the batch loss to the logs and call the end-of-batch hook
-            batch_logs[LogKeys.BATCH_LOSS] = batch_loss
+            batch_logs[PyTorchLogKeys.BATCH_LOSS] = batch_loss
             self.callbacks_hook('on_batch_end', batch_idx, logs=batch_logs)
         # Return the average loss for the entire epoch
-        return {LogKeys.TRAIN_LOSS: running_loss / len(self.train_loader.dataset)} # type: ignore
+        return {PyTorchLogKeys.TRAIN_LOSS: running_loss / len(self.train_loader.dataset)} # type: ignore
     def _validation_step(self):
         self.model.eval()
@@ -195,7 +197,7 @@ class MyTrainer:
                     output = output.view_as(target)
                 loss = self.criterion(output, target)
                 running_loss += loss.item() * features.size(0)
-        logs = {LogKeys.VAL_LOSS: running_loss / len(self.test_loader.dataset)} # type: ignore
+        logs = {PyTorchLogKeys.VAL_LOSS: running_loss / len(self.test_loader.dataset)} # type: ignore
         return logs
     def _predict_for_eval(self, dataloader: DataLoader):
@@ -230,14 +232,14 @@ class MyTrainer:
                 yield y_pred_batch, y_prob_batch, y_true_batch
-    def evaluate(self, save_dir: Optional[Union[str,Path]], data: Optional[Union[DataLoader, Dataset]] = None):
+    def evaluate(self, save_dir: Union[str,Path], data: Optional[Union[DataLoader, Dataset]] = None):
         """
         Evaluates the model on the given data.
         Args:
             data (DataLoader | Dataset | None ): The data to evaluate on.
                 Can be a DataLoader or a Dataset. If None, defaults to the trainer's internal test_dataset.
-            save_dir (str | Path | None): Directory to save all reports and plots. If None, metrics are shown but not saved.
+            save_dir (str | Path): Directory to save all reports and plots.
         """
         eval_loader = None
         if isinstance(data, DataLoader):
@@ -273,14 +275,14 @@ class MyTrainer:
         y_prob = np.concatenate(all_probs) if self.kind == "classification" else None
         if self.kind == "classification":
-            classification_metrics(y_true, y_pred, y_prob, save_dir=save_dir)
+            classification_metrics(save_dir, y_true, y_pred, y_prob)
         else:
-            regression_metrics(y_true.flatten(), y_pred.flatten(), save_dir=save_dir)
+            regression_metrics(y_true.flatten(), y_pred.flatten(), save_dir)
         print("\n--- Training History ---")
         plot_losses(self.history, save_dir=save_dir)
-    def explain(self, explain_dataset: Optional[Dataset] = None, n_samples: int = 100,
+    def explain(self, explain_dataset: Optional[Dataset] = None, n_samples: int = 1000,
                 feature_names: Optional[List[str]] = None, save_dir: Optional[Union[str,Path]] = None):
         """
         Explains model predictions using SHAP and saves all artifacts.

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-6.0.0}/ml_tools/PSO_optimization.py RENAMED Viewed

@@ -12,7 +12,7 @@ from .path_manager import sanitize_filename, make_fullpath, list_files_by_extens
 import torch
 from tqdm import trange
 from ._logger import _LOGGER
-from .keys import ModelSaveKeys
+from .keys import EnsembleKeys
 from ._script_info import _script_info
 from .SQL import DatabaseManager
 from contextlib import nullcontext
@@ -48,9 +48,9 @@ class ObjectiveFunction():
         self.is_hybrid = False if binary_features <= 0 else True
         self.use_noise = add_noise
         self._artifact = deserialize_object(trained_model_path, verbose=False, raise_on_error=True)
-        self.model = self._get_from_artifact(ModelSaveKeys.MODEL)
-        self.feature_names: Optional[list[str]] = self._get_from_artifact(ModelSaveKeys.FEATURES) # type: ignore
-        self.target_name: Optional[str] = self._get_from_artifact(ModelSaveKeys.TARGET) # type: ignore
+        self.model = self._get_from_artifact(EnsembleKeys.MODEL)
+        self.feature_names: Optional[list[str]] = self._get_from_artifact(EnsembleKeys.FEATURES) # type: ignore
+        self.target_name: Optional[str] = self._get_from_artifact(EnsembleKeys.TARGET) # type: ignore
         self.task = task
         self.check_model() # check for classification models and None values
@@ -126,7 +126,7 @@ class ObjectiveFunction():
         if self._artifact is None:
             raise TypeError("Load model error")
         val = self._artifact.get(key)
-        if key == ModelSaveKeys.FEATURES:
+        if key == EnsembleKeys.FEATURES:
             result = val if isinstance(val, list) and val else None
         else:
             result = val if val else None

dragon-ml-toolbox 5.3.0__tar.gz → 6.0.0__tar.gz

Potentially problematic release.

dragon-ml-toolbox 5.3.0tar.gz → 6.0.0tar.gz