PyPI - dragon-ml-toolbox - Versions diffs - 14.3.1__py3-none-any.whl → 14.8.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.3.1py3-none-any.whl → 14.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (17) hide show

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/METADATA +2 -1
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/RECORD +17 -16
ml_tools/ML_configuration.py +116 -0
ml_tools/ML_datasetmaster.py +42 -0
ml_tools/ML_evaluation.py +208 -63
ml_tools/ML_evaluation_multi.py +40 -10
ml_tools/ML_trainer.py +38 -12
ml_tools/ML_utilities.py +50 -1
ml_tools/ML_vision_datasetmaster.py +198 -60
ml_tools/ML_vision_models.py +15 -1
ml_tools/ML_vision_transformers.py +151 -6
ml_tools/ensemble_evaluation.py +53 -10
ml_tools/keys.py +2 -1
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/top_level.txt +0 -0

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 14.3.1
+Version: 14.8.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT
@@ -141,6 +141,7 @@ ETL_cleaning
 ETL_engineering
 math_utilities
 ML_callbacks
+ML_configuration
 ML_datasetmaster
 ML_evaluation_multi
 ML_evaluation

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,26 @@
-dragon_ml_toolbox-14.3.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-14.3.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=gkOdNDbKYpIJezwSo2CEnISkLeYfYHv9t8b5K2-P69A,2687
+dragon_ml_toolbox-14.8.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-14.8.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=gkOdNDbKYpIJezwSo2CEnISkLeYfYHv9t8b5K2-P69A,2687
 ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
 ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
 ml_tools/MICE_imputation.py,sha256=KLJXGQLKJ6AuWWttAG-LCCaxpS-ygM4dXPiguHDaL6Y,20815
 ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
-ml_tools/ML_datasetmaster.py,sha256=rsJgZEGBJmfeKF6cR8CQZzfEx4T7Y-p1wUnR15_nNw0,28400
-ml_tools/ML_evaluation.py,sha256=4GU86rUWMIGbkXrvN6PyjfGwKtWvXKE7pMlWpWeBq14,18988
-ml_tools/ML_evaluation_multi.py,sha256=rJKdgtq-9I7oaI7PRzq7aIZ84XdNV0xzlVePZW4nj0k,16095
+ml_tools/ML_configuration.py,sha256=tXkm2q57bl2kK0Iqpx1G7s1pEURBL_UMmqD8mlsGPs4,4689
+ml_tools/ML_datasetmaster.py,sha256=Zi5jBnBI_U6tD8mpCVL5bQcsqsGEMAzMsCVI_wFD2QU,30175
+ml_tools/ML_evaluation.py,sha256=EvlgFeMQeZ1RSEMtNd-nv7W0d0SVcR4n6cwW5UG16DU,25358
+ml_tools/ML_evaluation_multi.py,sha256=bQZ2gJY-dBzKQxvtd-B6wVaGBdFpQGVBr7tQZFokp5E,17166
 ml_tools/ML_inference.py,sha256=YJ953bhNWsdlPRtJQh3h2ACfMIgp8dQ9KtL9Azar-5s,23489
 ml_tools/ML_models.py,sha256=PqOcNlws7vCJMbiVCKqlPuktxvskZVUHG3VfU-Yshf8,31415
 ml_tools/ML_models_advanced.py,sha256=vk3PZBSu3DVso2S1rKTxxdS43XG8Q5FnasIL3-rMajc,12410
 ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
 ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
-ml_tools/ML_trainer.py,sha256=ZWI4MbUcLeBxyfoUTL96l5tjHHMp9I64h4SdXnjYmBE,49795
-ml_tools/ML_utilities.py,sha256=z6LbpbZwhn8F__fWlKi-g-cAJQXSxwg1NHfC5FBoAyc,21139
-ml_tools/ML_vision_datasetmaster.py,sha256=2S7stCgGQX39Y38gfMQccwAx_QTKEmmRIqh4XJ2K_YE,58041
+ml_tools/ML_trainer.py,sha256=salZxfv3RWRCiinp5S9xeUsHysMbMQ52EecR8GyEbaM,51461
+ml_tools/ML_utilities.py,sha256=eYe2N-65FTzaOHF5gmiJl-HmicyzhqcdvlDiIivr5_g,22993
+ml_tools/ML_vision_datasetmaster.py,sha256=VHZo0gzgrXrfGcHA34WKD3gGfhlxMrOXbNdYhXb6p6M,64462
 ml_tools/ML_vision_evaluation.py,sha256=t12R7i1RkOCt9zu1_lxSBr8OH6A6Get0k8ftDLctn6I,10486
 ml_tools/ML_vision_inference.py,sha256=He3KV3VJAm8PwO-fOq4b9VO8UXFr-GmpuCnoHXf4VZI,20588
-ml_tools/ML_vision_models.py,sha256=G3S4jB9AE9wMpU9ZygOgOx9q1K6t6LAXBYcJ-U2XQ1M,25600
-ml_tools/ML_vision_transformers.py,sha256=95e0aBkHY5VDGE8i5xy57COU7NvSNIgFknnhBubwE40,1832
+ml_tools/ML_vision_models.py,sha256=WqiRN9JAjv--BcwkDrooXAs4Qo26JHPCHh3JSPm4kMI,26226
+ml_tools/ML_vision_transformers.py,sha256=h332O9BjDMgxrBc0I-bJwJODWlcp7nJHbX1QS2etwBk,7738
 ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,22960
 ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
 ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
@@ -32,17 +33,17 @@ ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/custom_logger.py,sha256=TGc0Ww2Xlqj2XE3q4bP43hV7T3qnb5ci9f0pYHXF5TY,11226
 ml_tools/data_exploration.py,sha256=bwHzFJ-IAo5GN3T53F-1J_pXUg8VHS91sG_90utAsfg,69911
-ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
+ml_tools/ensemble_evaluation.py,sha256=2sJ3jD6yBNPRNwSokyaLKqKHi0QhF13ChoFe5yd4zwg,28368
 ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
 ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
 ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
-ml_tools/keys.py,sha256=wZOBuEnnHc54vlOZiimnrxfk-sZh6f6suPppJW8rbPQ,3326
+ml_tools/keys.py,sha256=-OiL9G0RIOKQk6BwETKIP3LWz2s5-x6lZW2YitJa4mY,3330
 ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
 ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
 ml_tools/utilities.py,sha256=aWqvYzmxlD74PD5Yqu1VuTekDJeYLQrmPIU_VeVyRp0,22526
-dragon_ml_toolbox-14.3.1.dist-info/METADATA,sha256=dNN-vygEF2WikswJ-6XCShUDf3rD2-XRNtz0vvRg2-4,6475
-dragon_ml_toolbox-14.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-14.3.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-14.3.1.dist-info/RECORD,,
+dragon_ml_toolbox-14.8.0.dist-info/METADATA,sha256=9OndkhzBGS_XzlCPuHH88wIgndT2jhWN4fydXTGJg-8,6492
+dragon_ml_toolbox-14.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-14.8.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-14.8.0.dist-info/RECORD,,

ml_tools/ML_configuration.py ADDED Viewed

@@ -0,0 +1,116 @@
+from typing import Optional
+from ._script_info import _script_info
+__all__ = [
+    "ClassificationMetricsFormat",
+    "MultiClassificationMetricsFormat"
+]
+class ClassificationMetricsFormat:
+    """
+    Optional configuration for classification tasks, use in the '.evaluate()' method of the MLTrainer.
+    """
+    def __init__(self,
+                 cmap: str="Blues",
+                 class_map: Optional[dict[str,int]]=None,
+                 ROC_PR_line: str='darkorange',
+                 calibration_bins: int=15,
+                 font_size: int=16) -> None:
+        """
+        Initializes the formatting configuration for single-label classification metrics.
+        Args:
+            cmap (str): The matplotlib colormap name for the confusion matrix
+                and report heatmap. Defaults to "Blues".
+                - Sequential options: 'Blues', 'Greens', 'Reds', 'Oranges', 'Purples'
+                - Diverging options: 'coolwarm', 'viridis', 'plasma', 'inferno'
+            class_map (dict[str,int] | None): A dictionary mapping
+                class string names to their integer indices (e.g., {'cat': 0, 'dog': 1}).
+                This is used to label the axes of the confusion matrix and classification
+                report correctly. Defaults to None.
+            ROC_PR_line (str): The color name or hex code for the line plotted
+                on the ROC and Precision-Recall curves. Defaults to 'darkorange'.
+                - Common color names: 'darkorange', 'cornflowerblue', 'crimson', 'forestgreen'
+                - Hex codes: '#FF6347', '#4682B4'
+            calibration_bins (int): The number of bins to use when
+                creating the calibration (reliability) plot. Defaults to 15.
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+        <br>
+        ## [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
+        """
+        self.cmap = cmap
+        self.class_map = class_map
+        self.ROC_PR_line = ROC_PR_line
+        self.calibration_bins = calibration_bins
+        self.font_size = font_size
+    def __repr__(self) -> str:
+        parts = [
+            f"cmap='{self.cmap}'",
+            f"class_map={self.class_map}",
+            f"ROC_PR_line='{self.ROC_PR_line}'",
+            f"calibration_bins={self.calibration_bins}",
+            f"font_size={self.font_size}"
+        ]
+        return f"ClassificationMetricsFormat({', '.join(parts)})"
+class MultiClassificationMetricsFormat:
+    """
+    Optional configuration for multi-label classification tasks, use in the '.evaluate()' method of the MLTrainer.
+    """
+    def __init__(self,
+                 threshold: float=0.5,
+                 ROC_PR_line: str='darkorange',
+                 cmap: str = "Blues",
+                 font_size: int = 16) -> None:
+        """
+        Initializes the formatting configuration for multi-label classification metrics.
+        Args:
+            threshold (float): The probability threshold (0.0 to 1.0) used
+                to convert sigmoid outputs into binary (0 or 1) predictions for
+                calculating the confusion matrix and overall metrics. Defaults to 0.5.
+            ROC_PR_line (str): The color name or hex code for the line plotted
+                on the ROC and Precision-Recall curves (one for each label).
+                Defaults to 'darkorange'.
+                - Common color names: 'darkorange', 'cornflowerblue', 'crimson', 'forestgreen'
+                - Hex codes: '#FF6347', '#4682B4'
+            cmap (str): The matplotlib colormap name for the per-label
+                confusion matrices. Defaults to "Blues".
+                - Sequential options: 'Blues', 'Greens', 'Reds', 'Oranges', 'Purples'
+                - Diverging options: 'coolwarm', 'viridis', 'plasma', 'inferno'
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+        <br>
+        ## [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
+        """
+        self.threshold = threshold
+        self.cmap = cmap
+        self.ROC_PR_line = ROC_PR_line
+        self.font_size = font_size
+    def __repr__(self) -> str:
+        parts = [
+            f"threshold={self.threshold}",
+            f"ROC_PR_line='{self.ROC_PR_line}'",
+            f"cmap='{self.cmap}'",
+            f"font_size={self.font_size}"
+        ]
+        return f"MultiClassificationMetricsFormat({', '.join(parts)})"
+def info():
+    _script_info(__all__)

ml_tools/ML_datasetmaster.py CHANGED Viewed

@@ -333,7 +333,20 @@ class DatasetMaker(_BaseDatasetMaker):
         # --- 5. Create Datasets ---
         self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
         self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__} (ID: '{self.id}')>\n"
+        s += f"  Target: {self.target_names[0]}\n"
+        s += f"  Features: {self.number_of_features}\n"
+        s += f"  Scaler: {'Fitted' if self.scaler else 'None'}\n"
+        if self._train_ds:
+            s += f"  Train Samples: {len(self._train_ds)}\n" # type: ignore
+        if self._test_ds:
+            s += f"  Test Samples: {len(self._test_ds)}\n" # type: ignore
+        return s
 # --- Multi-Target Class ---
 class DatasetMakerMulti(_BaseDatasetMaker):
@@ -448,6 +461,19 @@ class DatasetMakerMulti(_BaseDatasetMaker):
         self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
         self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__} (ID: '{self.id}')>\n"
+        s += f"  Targets: {self.number_of_targets}\n"
+        s += f"  Features: {self.number_of_features}\n"
+        s += f"  Scaler: {'Fitted' if self.scaler else 'None'}\n"
+        if self._train_ds:
+            s += f"  Train Samples: {len(self._train_ds)}\n" # type: ignore
+        if self._test_ds:
+            s += f"  Test Samples: {len(self._test_ds)}\n" # type: ignore
+        return s
 # --- Private Base Class ---
 class _BaseMaker(ABC):
@@ -654,6 +680,22 @@ class SequenceMaker(_BaseMaker):
             _LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
             raise RuntimeError()
         return self._train_dataset, self._test_dataset
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Sequence Length (Window): {self.sequence_length}\n"
+        s += f"  Total Data Points: {len(self.sequence)}\n"
+        s += "  --- Status ---\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Normalized: {self._is_normalized}\n"
+        s += f"  Windows Generated: {self._are_windows_generated}\n"
+        if self._are_windows_generated:
+            train_len = len(self._train_dataset) if self._train_dataset else 0 # type: ignore
+            test_len = len(self._test_dataset) if self._test_dataset else 0 # type: ignore
+            s += f"  Datasets (Train/Test): {train_len} / {test_len} windows\n"
+        return s
 def info():

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -21,7 +21,7 @@ from pathlib import Path
 from typing import Union, Optional, List, Literal
 import warnings
-from .path_manager import make_fullpath
+from .path_manager import make_fullpath, sanitize_filename
 from ._logger import _LOGGER
 from ._script_info import _script_info
 from .keys import SHAPKeys, PyTorchLogKeys
@@ -35,6 +35,8 @@ __all__ = [
     "plot_attention_importance"
 ]
+DPI_value = 250
 def plot_losses(history: dict, save_dir: Union[str, Path]):
     """
@@ -48,10 +50,10 @@ def plot_losses(history: dict, save_dir: Union[str, Path]):
     val_loss = history.get(PyTorchLogKeys.VAL_LOSS, [])
     if not train_loss and not val_loss:
-        print("Warning: Loss history is empty or incomplete. Cannot plot.")
+        _LOGGER.warning("Loss history is empty or incomplete. Cannot plot.")
         return
-    fig, ax = plt.subplots(figsize=(10, 5), dpi=100)
+    fig, ax = plt.subplots(figsize=(10, 5), dpi=DPI_value)
     # Plot training loss only if data for it exists
     if train_loss:
@@ -78,8 +80,15 @@ def plot_losses(history: dict, save_dir: Union[str, Path]):
     plt.close(fig)
-def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pred: np.ndarray, y_prob: Optional[np.ndarray] = None,
-                           cmap: str = "Blues"):
+def classification_metrics(save_dir: Union[str, Path],
+                           y_true: np.ndarray,
+                           y_pred: np.ndarray,
+                           y_prob: Optional[np.ndarray] = None,
+                           cmap: str = "Blues",
+                           class_map: Optional[dict[str,int]]=None,
+                           ROC_PR_line: str='darkorange',
+                           calibration_bins: int=15,
+                           font_size: int=16):
     """
     Saves classification metrics and plots.
@@ -89,12 +98,31 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
         y_prob (np.ndarray, optional): Predicted probabilities for ROC curve.
         cmap (str): Colormap for the confusion matrix.
         save_dir (str | Path): Directory to save plots.
+        class_map (dict[str, int], None): A map of {class_name: index} used to order and label the confusion matrix.
     """
-    print("--- Classification Report ---")
+    original_rc_params = plt.rcParams.copy()
+    plt.rcParams.update({'font.size': font_size})
+    # print("--- Classification Report ---")
+    # --- Parse class_map ---
+    map_labels = None
+    map_display_labels = None
+    if class_map:
+        # Sort the map by its values (the indices) to ensure correct order
+        try:
+            sorted_items = sorted(class_map.items(), key=lambda item: item[1])
+            map_labels = [item[1] for item in sorted_items]
+            map_display_labels = [item[0] for item in sorted_items]
+        except Exception as e:
+            _LOGGER.warning(f"Could not parse 'class_map': {e}")
+            map_labels = None
+            map_display_labels = None
     # Generate report as both text and dictionary
-    report_text: str = classification_report(y_true, y_pred) # type: ignore
-    report_dict: dict = classification_report(y_true, y_pred, output_dict=True) # type: ignore
-    print(report_text)
+    report_text: str = classification_report(y_true, y_pred, labels=map_labels, target_names=map_display_labels) # type: ignore
+    report_dict: dict = classification_report(y_true, y_pred, output_dict=True, labels=map_labels, target_names=map_display_labels) # type: ignore
+    # print(report_text)
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
     # Save text report
@@ -104,8 +132,15 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
     # --- Save Classification Report Heatmap ---
     try:
-        plt.figure(figsize=(8, 6), dpi=100)
-        sns.heatmap(pd.DataFrame(report_dict).iloc[:-1, :].T, annot=True, cmap='viridis', fmt='.2f')
+        plt.figure(figsize=(8, 6), dpi=DPI_value)
+        sns.set_theme(font_scale=1.2) # Scale seaborn font
+        sns.heatmap(pd.DataFrame(report_dict).iloc[:-1, :].T,
+                    annot=True,
+                    cmap=cmap,
+                    fmt='.2f',
+                    vmin=0.0,
+                    vmax=1.0)
+        sns.set_theme(font_scale=1.0) # Reset seaborn scale
         plt.title("Classification Report")
         plt.tight_layout()
         heatmap_path = save_dir_path / "classification_report_heatmap.svg"
@@ -114,69 +149,179 @@ def classification_metrics(save_dir: Union[str, Path], y_true: np.ndarray, y_pre
         plt.close()
     except Exception as e:
         _LOGGER.error(f"Could not generate classification report heatmap: {e}")
+    # --- labels for Confusion Matrix ---
+    plot_labels = map_labels
+    plot_display_labels = map_display_labels
     # Save Confusion Matrix
-    fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=100)
-    ConfusionMatrixDisplay.from_predictions(y_true, y_pred, cmap=cmap, ax=ax_cm)
+    fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+    disp_ = ConfusionMatrixDisplay.from_predictions(y_true,
+                                            y_pred,
+                                            cmap=cmap,
+                                            ax=ax_cm,
+                                            normalize='true',
+                                            labels=plot_labels,
+                                            display_labels=plot_display_labels)
+    disp_.im_.set_clim(vmin=0.0, vmax=1.0)
+    # Turn off gridlines
+    ax_cm.grid(False)
+    # Manually update font size of cell texts
+    for text in ax_cm.texts:
+        text.set_fontsize(font_size)
+    fig_cm.tight_layout()
     ax_cm.set_title("Confusion Matrix")
     cm_path = save_dir_path / "confusion_matrix.svg"
     plt.savefig(cm_path)
     _LOGGER.info(f"❇️ Confusion matrix saved as '{cm_path.name}'")
     plt.close(fig_cm)
-    # Plotting logic for ROC and PR Curves
-    if y_prob is not None and y_prob.ndim > 1 and y_prob.shape[1] >= 2:
-        # Use probabilities of the positive class
-        y_score = y_prob[:, 1]
+    # Plotting logic for ROC, PR, and Calibration Curves
+    if y_prob is not None and y_prob.ndim == 2:
+        num_classes = y_prob.shape[1]
+        # --- Determine which classes to loop over ---
+        class_indices_to_plot = []
+        plot_titles = []
+        save_suffixes = []
+        if num_classes == 2:
+            # Binary case: Only plot for the positive class (index 1)
+            class_indices_to_plot = [1]
+            plot_titles = [""] # No extra title
+            save_suffixes = [""] # No extra suffix
+            _LOGGER.info("Generating binary classification plots (ROC, PR, Calibration).")
-        # --- Save ROC Curve ---
-        fpr, tpr, _ = roc_curve(y_true, y_score)
-        auc = roc_auc_score(y_true, y_score)
-        fig_roc, ax_roc = plt.subplots(figsize=(6, 6), dpi=100)
-        ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
-        ax_roc.plot([0, 1], [0, 1], 'k--')
-        ax_roc.set_title('Receiver Operating Characteristic (ROC) Curve')
-        ax_roc.set_xlabel('False Positive Rate')
-        ax_roc.set_ylabel('True Positive Rate')
-        ax_roc.legend(loc='lower right')
-        ax_roc.grid(True)
-        roc_path = save_dir_path / "roc_curve.svg"
-        plt.savefig(roc_path)
-        _LOGGER.info(f"📈 ROC curve saved as '{roc_path.name}'")
-        plt.close(fig_roc)
-        # --- Save Precision-Recall Curve ---
-        precision, recall, _ = precision_recall_curve(y_true, y_score)
-        ap_score = average_precision_score(y_true, y_score)
-        fig_pr, ax_pr = plt.subplots(figsize=(6, 6), dpi=100)
-        ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}')
-        ax_pr.set_title('Precision-Recall Curve')
-        ax_pr.set_xlabel('Recall')
-        ax_pr.set_ylabel('Precision')
-        ax_pr.legend(loc='lower left')
-        ax_pr.grid(True)
-        pr_path = save_dir_path / "pr_curve.svg"
-        plt.savefig(pr_path)
-        _LOGGER.info(f"📈 PR curve saved as '{pr_path.name}'")
-        plt.close(fig_pr)
+        elif num_classes > 2:
+            _LOGGER.info(f"Generating One-vs-Rest plots for {num_classes} classes.")
+            # Multiclass case: Plot for every class (One-vs-Rest)
+            class_indices_to_plot = list(range(num_classes))
+            # --- Use class_map names if available ---
+            use_generic_names = True
+            if map_display_labels and len(map_display_labels) == num_classes:
+                try:
+                    # Ensure labels are safe for filenames
+                    safe_names = [sanitize_filename(name) for name in map_display_labels]
+                    plot_titles = [f" ({name} vs. Rest)" for name in map_display_labels]
+                    save_suffixes = [f"_{safe_names[i]}" for i in class_indices_to_plot]
+                    use_generic_names = False
+                except Exception as e:
+                    _LOGGER.warning(f"Failed to use 'class_map' for plot titles: {e}. Reverting to generic names.")
+                    use_generic_names = True
+            if use_generic_names:
+                plot_titles = [f" (Class {i} vs. Rest)" for i in class_indices_to_plot]
+                save_suffixes = [f"_class_{i}" for i in class_indices_to_plot]
-        # --- Save Calibration Plot ---
-        if y_prob.ndim > 1 and y_prob.shape[1] >= 2:
-            y_score = y_prob[:, 1] # Use probabilities of the positive class
+        else:
+            # Should not happen, but good to check
+            _LOGGER.warning(f"Probability array has invalid shape {y_prob.shape}. Skipping ROC/PR/Calibration plots.")
+        # --- Loop and generate plots ---
+        for i, class_index in enumerate(class_indices_to_plot):
+            plot_title = plot_titles[i]
+            save_suffix = save_suffixes[i]
+            # Get scores for the current class
+            y_score = y_prob[:, class_index]
+            # Binarize y_true for the current class
+            y_true_binary = (y_true == class_index).astype(int)
+            # --- Save ROC Curve ---
+            fpr, tpr, _ = roc_curve(y_true_binary, y_score)
-            fig_cal, ax_cal = plt.subplots(figsize=(8, 8), dpi=100)
-            CalibrationDisplay.from_predictions(y_true, y_score, n_bins=15, ax=ax_cal)
+            # Calculate AUC.
+            # Note: For multiclass, roc_auc_score(y_true, y_prob, multi_class='ovr') could average, but plotting individual curves is more informative.
+            # Here we calculate the specific AUC for the binarized problem.
+            auc = roc_auc_score(y_true_binary, y_score)
-            ax_cal.set_title('Reliability Curve')
+            fig_roc, ax_roc = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+            ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=ROC_PR_line)
+            ax_roc.plot([0, 1], [0, 1], 'k--')
+            ax_roc.set_title(f'Receiver Operating Characteristic{plot_title}')
+            ax_roc.set_xlabel('False Positive Rate')
+            ax_roc.set_ylabel('True Positive Rate')
+            ax_roc.legend(loc='lower right')
+            ax_roc.grid(True)
+            roc_path = save_dir_path / f"roc_curve{save_suffix}.svg"
+            plt.savefig(roc_path)
+            plt.close(fig_roc)
+            # --- Save Precision-Recall Curve ---
+            precision, recall, _ = precision_recall_curve(y_true_binary, y_score)
+            ap_score = average_precision_score(y_true_binary, y_score)
+            fig_pr, ax_pr = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+            ax_pr.plot(recall, precision, label=f'Avg Precision = {ap_score:.2f}', color=ROC_PR_line)
+            ax_pr.set_title(f'Precision-Recall Curve{plot_title}')
+            ax_pr.set_xlabel('Recall')
+            ax_pr.set_ylabel('Precision')
+            ax_pr.legend(loc='lower left')
+            ax_pr.grid(True)
+            pr_path = save_dir_path / f"pr_curve{save_suffix}.svg"
+            plt.savefig(pr_path)
+            plt.close(fig_pr)
+            # --- Save Calibration Plot ---
+            fig_cal, ax_cal = plt.subplots(figsize=(8, 8), dpi=DPI_value)
+            # --- Step 1: Get binned data *without* plotting ---
+            with plt.ioff(): # Suppress showing the temporary plot
+                fig_temp, ax_temp = plt.subplots()
+                cal_display_temp = CalibrationDisplay.from_predictions(
+                    y_true_binary, # Use binarized labels
+                    y_score,
+                    n_bins=calibration_bins,
+                    ax=ax_temp,
+                    name="temp" # Add a name to suppress potential warnings
+                )
+                # Get the x, y coordinates of the binned data
+                line_x, line_y = cal_display_temp.line_.get_data() # type: ignore
+                plt.close(fig_temp) # Close the temporary plot
+            # --- Step 2: Build the plot from scratch ---
+            ax_cal.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
+            sns.regplot(
+                x=line_x,
+                y=line_y,
+                ax=ax_cal,
+                scatter=False,
+                label=f"Calibration Curve ({calibration_bins} bins)",
+                line_kws={
+                    'color': ROC_PR_line,
+                    'linestyle': '--',
+                    'linewidth': 2,
+                    }
+            )
+            ax_cal.set_title(f'Reliability Curve{plot_title}')
             ax_cal.set_xlabel('Mean Predicted Probability')
             ax_cal.set_ylabel('Fraction of Positives')
+            # --- Step 3: Set final limits *after* plotting ---
+            ax_cal.set_ylim(0.0, 1.0)
+            ax_cal.set_xlim(0.0, 1.0)
+            ax_cal.legend(loc='lower right')
             ax_cal.grid(True)
             plt.tight_layout()
-            cal_path = save_dir_path / "calibration_plot.svg"
+            cal_path = save_dir_path / f"calibration_plot{save_suffix}.svg"
             plt.savefig(cal_path)
-            _LOGGER.info(f"📈 Calibration plot saved as '{cal_path.name}'")
             plt.close(fig_cal)
+        _LOGGER.info(f"📈 Saved {len(class_indices_to_plot)} sets of ROC, Precision-Recall, and Calibration plots.")
+    # restore RC params
+    plt.rcParams.update(original_rc_params)
 def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[str, Path]):
@@ -211,7 +356,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
     # Save residual plot
     residuals = y_true - y_pred
-    fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=100)
+    fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=DPI_value)
     ax_res.scatter(y_pred, residuals, alpha=0.6)
     ax_res.axhline(0, color='red', linestyle='--')
     ax_res.set_xlabel("Predicted Values")
@@ -225,7 +370,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
     plt.close(fig_res)
     # Save true vs predicted plot
-    fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=100)
+    fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=DPI_value)
     ax_tvp.scatter(y_true, y_pred, alpha=0.6)
     ax_tvp.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--', lw=2)
     ax_tvp.set_xlabel('True Values')
@@ -239,7 +384,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
     plt.close(fig_tvp)
     # Save Histogram of Residuals
-    fig_hist, ax_hist = plt.subplots(figsize=(8, 6), dpi=100)
+    fig_hist, ax_hist = plt.subplots(figsize=(8, 6), dpi=DPI_value)
     sns.histplot(residuals, kde=True, ax=ax_hist)
     ax_hist.set_xlabel("Residual Value")
     ax_hist.set_ylabel("Frequency")
@@ -276,7 +421,7 @@ def shap_summary_plot(model,
               slow and memory-intensive.
     """
-    print(f"\n--- SHAP Value Explanation Using {explainer_type.upper()} Explainer ---")
+    _LOGGER.info(f"📊 Running SHAP Value Explanation Using {explainer_type.upper()} Explainer")
     model.eval()
     # model.cpu() # Run explanations on CPU
@@ -348,9 +493,9 @@ def shap_summary_plot(model,
         _LOGGER.error(f"Invalid explainer_type: '{explainer_type}'. Must be 'deep' or 'kernel'.")
         raise ValueError()
-    if not isinstance(shap_values, list) and shap_values.ndim == 3 and shap_values.shape[2] == 1:
+    if not isinstance(shap_values, list) and shap_values.ndim == 3 and shap_values.shape[2] == 1: # type: ignore
         # _LOGGER.info("Squeezing SHAP values from (N, F, 1) to (N, F) for regression plot.")
-        shap_values = shap_values.squeeze(-1)
+        shap_values = shap_values.squeeze(-1) # type: ignore
     # --- 3. Plotting and Saving ---
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
@@ -455,7 +600,7 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
     # --- Step 3: Create and save the plot for top N features ---
     plot_df = summary_df.head(top_n).sort_values('mean_attention', ascending=True)
-    plt.figure(figsize=(10, 8), dpi=100)
+    plt.figure(figsize=(10, 8), dpi=DPI_value)
     # Create horizontal bar plot with error bars
     plt.barh(

dragon-ml-toolbox 14.3.1__py3-none-any.whl → 14.8.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 14.3.1py3-none-any.whl → 14.8.0py3-none-any.whl