PyPI - dragon-ml-toolbox - Versions diffs - 13.4.0__py3-none-any.whl → 13.5.0__py3-none-any.whl - Mend

dragon-ml-toolbox 13.4.0py3-none-any.whl → 13.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (9) hide show

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 13.4.0
+Version: 13.5.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
-dragon_ml_toolbox-13.4.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-13.4.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-13.5.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-13.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
 ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
 ml_tools/MICE_imputation.py,sha256=X273Qlgoqqg7KTmoKd75YDyAPB0UIbTzGP3xsCmRh3E,11717
 ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
 ml_tools/ML_datasetmaster.py,sha256=6caWbq6eu1RE9V51gmceD71PtMctJRjFuLvkkK5ChiY,36271
-ml_tools/ML_evaluation.py,sha256=3u5dOhS77gn3kAshKr2GwSa5xZBF0YM77ZkFevqNPvA,18528
-ml_tools/ML_evaluation_multi.py,sha256=L6Ub_uObXsI7ToVCF6DtmAFekHRcga5wWMOnRYRR-BY,16121
+ml_tools/ML_evaluation.py,sha256=li77AuP53pCzgrj6p-jTCNtPFgS9Y9XnMWIZn1ulTBM,18946
+ml_tools/ML_evaluation_multi.py,sha256=rJKdgtq-9I7oaI7PRzq7aIZ84XdNV0xzlVePZW4nj0k,16095
 ml_tools/ML_inference.py,sha256=yq2gdN6s_OUYC5ZLQrIJC5BA5H33q8UKODXwb-_0M2c,23549
 ml_tools/ML_models.py,sha256=UVWJHPLVIvFno_csCHH1FwBfTwQ5nX0V8F1TbOByZ4I,31388
 ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
 ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
-ml_tools/ML_trainer.py,sha256=9BP6JFClqGfe7GL-FGG3n5e-no9ssjEOLol7P6baGrI,29019
+ml_tools/ML_trainer.py,sha256=ZxeOagXW5adFhYIH-oMTlcrLU6VHe4R1EROI7yypNwQ,29665
 ml_tools/ML_utilities.py,sha256=EnKpPTnJ2qjZmz7kvows4Uu5CfSA7ByRmI1v2-KarKw,9337
 ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,22960
 ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJR
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
 ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
-dragon_ml_toolbox-13.4.0.dist-info/METADATA,sha256=Ixk5If3BJhjyJy9_mirNJ2QckMELXFQiJa9_8RWfreI,6166
-dragon_ml_toolbox-13.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-13.4.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-13.4.0.dist-info/RECORD,,
+dragon_ml_toolbox-13.5.0.dist-info/METADATA,sha256=EwOjL8T9Vnk1cg7vsDY4JaK9ovZtIkeIN2LcAiN-nvg,6166
+dragon_ml_toolbox-13.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-13.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-13.5.0.dist-info/RECORD,,

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -258,7 +258,7 @@ def shap_summary_plot(model,
                       feature_names: Optional[list[str]],
                       save_dir: Union[str, Path],
                       device: torch.device = torch.device('cpu'),
-                      explainer_type: Literal['deep', 'kernel'] = 'deep'):
+                      explainer_type: Literal['deep', 'kernel'] = 'kernel'):
     """
     Calculates SHAP values and saves summary plots and data.
@@ -270,7 +270,7 @@ def shap_summary_plot(model,
         save_dir (str | Path): Directory to save SHAP artifacts.
         device (torch.device): The torch device for SHAP calculations.
         explainer_type (Literal['deep', 'kernel']): The explainer to use.
-            - 'deep': (Default) Uses shap.DeepExplainer. Fast and efficient for
+            - 'deep': Uses shap.DeepExplainer. Fast and efficient for
               PyTorch models.
             - 'kernel': Uses shap.KernelExplainer. Model-agnostic but EXTREMELY
               slow and memory-intensive.
@@ -285,7 +285,7 @@ def shap_summary_plot(model,
     instances_to_explain_np = None
     if explainer_type == 'deep':
-        # --- 1. Use DeepExplainer (Preferred) ---
+        # --- 1. Use DeepExplainer  ---
         # Ensure data is torch.Tensor
         if isinstance(background_data, np.ndarray):
@@ -309,10 +309,9 @@ def shap_summary_plot(model,
         instances_to_explain_np = instances_to_explain.cpu().numpy()
     elif explainer_type == 'kernel':
-        # --- 2. Use KernelExplainer (Slow Fallback) ---
+        # --- 2. Use KernelExplainer ---
         _LOGGER.warning(
-            "Using KernelExplainer. This is memory-intensive and slow. "
-            "Consider reducing 'n_samples' if the process terminates unexpectedly."
+            "KernelExplainer is memory-intensive and slow. Consider reducing the number of instances to explain if the process terminates unexpectedly."
         )
         # Ensure data is np.ndarray
@@ -348,14 +347,26 @@ def shap_summary_plot(model,
     else:
         _LOGGER.error(f"Invalid explainer_type: '{explainer_type}'. Must be 'deep' or 'kernel'.")
         raise ValueError()
+    if not isinstance(shap_values, list) and shap_values.ndim == 3 and shap_values.shape[2] == 1:
+        # _LOGGER.info("Squeezing SHAP values from (N, F, 1) to (N, F) for regression plot.")
+        shap_values = shap_values.squeeze(-1)
     # --- 3. Plotting and Saving ---
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
     plt.ioff()
+    # Convert instances to a DataFrame. robust way to ensure SHAP correctly maps values to feature names.
+    if feature_names is None:
+        # Create generic names if none were provided
+        num_features = instances_to_explain_np.shape[1]
+        feature_names = [f'feature_{i}' for i in range(num_features)]
+    instances_df = pd.DataFrame(instances_to_explain_np, columns=feature_names)
     # Save Bar Plot
     bar_path = save_dir_path / "shap_bar_plot.svg"
-    shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
+    shap.summary_plot(shap_values, instances_df, plot_type="bar", show=False)
     ax = plt.gca()
     ax.set_xlabel("SHAP Value Impact", labelpad=10)
     plt.title("SHAP Feature Importance")
@@ -366,7 +377,7 @@ def shap_summary_plot(model,
     # Save Dot Plot
     dot_path = save_dir_path / "shap_dot_plot.svg"
-    shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
+    shap.summary_plot(shap_values, instances_df, plot_type="dot", show=False)
     ax = plt.gca()
     ax.set_xlabel("SHAP Value Impact", labelpad=10)
     if plt.gcf().axes and len(plt.gcf().axes) > 1:
@@ -389,9 +400,6 @@ def shap_summary_plot(model,
         mean_abs_shap = np.abs(shap_values).mean(axis=0)
     mean_abs_shap = mean_abs_shap.flatten()
-    if feature_names is None:
-        feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
     summary_df = pd.DataFrame({
         SHAPKeys.FEATURE_COLUMN: feature_names,
@@ -401,7 +409,7 @@ def shap_summary_plot(model,
     summary_df.to_csv(summary_path, index=False)
     _LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
-    plt.ion()
+    plt.ion()
 def plot_attention_importance(weights: List[torch.Tensor], feature_names: Optional[List[str]], save_dir: Union[str, Path], top_n: int = 10):

ml_tools/ML_evaluation_multi.py CHANGED Viewed

@@ -235,7 +235,7 @@ def multi_target_shap_summary_plot(
     target_names: List[str],
     save_dir: Union[str, Path],
     device: torch.device = torch.device('cpu'),
-    explainer_type: Literal['deep', 'kernel'] = 'deep'
+    explainer_type: Literal['deep', 'kernel'] = 'kernel'
 ):
     """
     Calculates SHAP values for a multi-target model and saves summary plots and data for each target.
@@ -249,7 +249,7 @@ def multi_target_shap_summary_plot(
         save_dir (str | Path): Directory to save SHAP artifacts.
         device (torch.device): The torch device for SHAP calculations.
         explainer_type (Literal['deep', 'kernel']): The explainer to use.
-            - 'deep': (Default) Uses shap.DeepExplainer. Fast and efficient.
+            - 'deep': Uses shap.DeepExplainer. Fast and efficient.
             - 'kernel': Uses shap.KernelExplainer. Model-agnostic but slow and memory-intensive.
     """
     _LOGGER.info(f"--- Multi-Target SHAP Value Explanation (Using: {explainer_type.upper()}Explainer) ---")
@@ -260,7 +260,7 @@ def multi_target_shap_summary_plot(
     instances_to_explain_np = None
     if explainer_type == 'deep':
-        # --- 1. Use DeepExplainer (Preferred) ---
+        # --- 1. Use DeepExplainer ---
         # Ensure data is torch.Tensor
         if isinstance(background_data, np.ndarray):
@@ -285,10 +285,9 @@ def multi_target_shap_summary_plot(
         instances_to_explain_np = instances_to_explain.cpu().numpy()
     elif explainer_type == 'kernel':
-        # --- 2. Use KernelExplainer (Slow Fallback) ---
+        # --- 2. Use KernelExplainer  ---
         _LOGGER.warning(
-            "Using KernelExplainer. This is memory-intensive and slow. "
-            "Consider reducing 'n_samples' if the process terminates."
+            "KernelExplainer is memory-intensive and slow. Consider reducing the number of instances to explain if the process terminates unexpectedly."
         )
         # Convert all data to numpy

ml_tools/ML_trainer.py CHANGED Viewed

@@ -9,7 +9,7 @@ from .ML_callbacks import Callback, History, TqdmProgressBar, ModelCheckpoint
 from .ML_evaluation import classification_metrics, regression_metrics, plot_losses, shap_summary_plot, plot_attention_importance
 from .ML_evaluation_multi import multi_target_regression_metrics, multi_label_classification_metrics, multi_target_shap_summary_plot
 from ._script_info import _script_info
-from .keys import PyTorchLogKeys, PyTorchCheckpointKeys
+from .keys import PyTorchLogKeys, PyTorchCheckpointKeys, DatasetKeys
 from ._logger import _LOGGER
 from .path_manager import make_fullpath
@@ -408,7 +408,7 @@ class MLTrainer:
                 n_samples: int = 300,
                 feature_names: Optional[List[str]] = None,
                 target_names: Optional[List[str]] = None,
-                explainer_type: Literal['deep', 'kernel'] = 'deep'):
+                explainer_type: Literal['deep', 'kernel'] = 'kernel'):
         """
         Explains model predictions using SHAP and saves all artifacts.
@@ -422,11 +422,11 @@ class MLTrainer:
             explain_dataset (Dataset | None): A specific dataset to explain.
                                                  If None, the trainer's test dataset is used.
             n_samples (int): The number of samples to use for both background and explanation.
-            feature_names (list[str] | None): Feature names.
+            feature_names (list[str] | None): Feature names. If None, the names will be extracted from the Dataset and raise an error on failure.
             target_names (list[str] | None): Target names for multi-target tasks.
             save_dir (str | Path): Directory to save all SHAP artifacts.
             explainer_type (Literal['deep', 'kernel']): The explainer to use.
-                - 'deep': (Default) Uses shap.DeepExplainer. Fast and efficient for PyTorch models.
+                - 'deep': Uses shap.DeepExplainer. Fast and efficient for PyTorch models.
                 - 'kernel': Uses shap.KernelExplainer. Model-agnostic but EXTREMELY slow and memory-intensive. Use with a very low 'n_samples'< 100.
         """
         # Internal helper to create a dataloader and get a random sample
@@ -474,10 +474,10 @@ class MLTrainer:
         # attempt to get feature names
         if feature_names is None:
             # _LOGGER.info("`feature_names` not provided. Attempting to extract from dataset...")
-            if hasattr(target_dataset, "feature_names"):
+            if hasattr(target_dataset, DatasetKeys.FEATURE_NAMES):
                 feature_names = target_dataset.feature_names # type: ignore
             else:
-                _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
+                _LOGGER.error(f"Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a '{DatasetKeys.FEATURE_NAMES}' attribute.")
                 raise ValueError()
         # move model to device
@@ -498,7 +498,7 @@ class MLTrainer:
             # try to get target names
             if target_names is None:
                 target_names = []
-                if hasattr(target_dataset, 'target_names'):
+                if hasattr(target_dataset, DatasetKeys.TARGET_NAMES):
                     target_names = target_dataset.target_names # type: ignore
                 else:
                     # Infer number of targets from the model's output layer
@@ -549,7 +549,7 @@ class MLTrainer:
                 yield attention_weights
     def explain_attention(self, save_dir: Union[str, Path],
-                          feature_names: Optional[List[str]],
+                          feature_names: Optional[List[str]] = None,
                           explain_dataset: Optional[Dataset] = None,
                           plot_n_features: int = 10):
         """
@@ -559,7 +559,7 @@ class MLTrainer:
         Args:
             save_dir (str | Path): Directory to save the plot and summary data.
-            feature_names (List[str] | None): Names for the features for plot labeling. If not given, generic names will be used.
+            feature_names (List[str] | None): Names for the features for plot labeling. If None, the names will be extracted from the Dataset and raise an error on failure.
             explain_dataset (Dataset, optional): A specific dataset to explain. If None, the trainer's test dataset is used.
             plot_n_features (int): Number of top features to plot.
         """
@@ -580,6 +580,14 @@ class MLTrainer:
             _LOGGER.error("The explanation dataset is empty or invalid. Skipping attention analysis.")
             return
+        # Get feature names
+        if feature_names is None:
+            if hasattr(dataset_to_use, DatasetKeys.FEATURE_NAMES):
+                feature_names = dataset_to_use.feature_names # type: ignore
+            else:
+                _LOGGER.error(f"Could not extract `feature_names` from the dataset for attention plot. It must be provided if the dataset object does not have a '{DatasetKeys.FEATURE_NAMES}' attribute.")
+                raise ValueError()
         explain_loader = DataLoader(
             dataset=dataset_to_use, batch_size=32, shuffle=False,
             num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.4.0.dist-info → dragon_ml_toolbox-13.5.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 13.4.0__py3-none-any.whl → 13.5.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 13.4.0py3-none-any.whl → 13.5.0py3-none-any.whl