PyPI - dragon-ml-toolbox - Versions diffs - 5.3.0__tar.gz → 5.3.1__tar.gz - Mend

dragon-ml-toolbox 5.3.0tar.gz → 5.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (37) hide show

{dragon_ml_toolbox-5.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-5.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.3.0
+Version: 5.3.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.3.0
+Version: 5.3.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_callbacks.py RENAMED Viewed

@@ -124,7 +124,7 @@ class EarlyStopping(Callback):
                     inferred from the name of the monitored quantity.
         verbose (int): Verbosity mode.
     """
-    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
+    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta: float=0.0, patience: int=5, mode: Literal['auto', 'min', 'max']='auto', verbose: int=0):
         super().__init__()
         self.monitor = monitor
         self.patience = patience
@@ -202,7 +202,7 @@ class ModelCheckpoint(Callback):
         verbose (int): Verbosity mode.
     """
     def __init__(self, save_dir: Union[str,Path], monitor: str = LogKeys.VAL_LOSS,
-                 save_best_only: bool = False, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 1):
+                 save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
         super().__init__()
         self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
         if not self.save_dir.is_dir():

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_evaluation.py RENAMED Viewed

@@ -195,7 +195,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Optiona
         plt.close(fig_tvp)
-def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain: torch.Tensor,
+def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], instances_to_explain: Union[torch.Tensor,np.ndarray],
                       feature_names: Optional[list[str]]=None, save_dir: Optional[Union[str, Path]] = None):
     """
     Calculates SHAP values and saves summary plots and data.
@@ -207,24 +207,54 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         feature_names (list of str | None): Names of the features for plot labeling.
         save_dir (str | Path | None): Directory to save SHAP artifacts. If None, dot plot is shown.
     """
+    # everything to numpy
+    if isinstance(background_data, np.ndarray):
+        background_data_np = background_data
+    else:
+        background_data_np = background_data.numpy()
+    if isinstance(instances_to_explain, np.ndarray):
+        instances_to_explain_np = instances_to_explain
+    else:
+        instances_to_explain_np = instances_to_explain.numpy()
+    # --- Data Validation Step ---
+    if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
+        _LOGGER.error("❌ Input data for SHAP contains NaN values. Aborting explanation.")
+        return
     print("\n--- SHAP Value Explanation ---")
-    print("Calculating SHAP values... ")
     model.eval()
     model.cpu()
-    explainer = shap.DeepExplainer(model, background_data)
-    shap_values = explainer.shap_values(instances_to_explain)
-    shap_values_for_plot = shap_values[1] if isinstance(shap_values, list) else shap_values
-    if isinstance(shap_values, list):
-        _LOGGER.info("Using SHAP values for the positive class (class 1) for plots.")
+    # 1. Summarize the background data.
+    # Summarize the background data using k-means. 10-50 clusters is a good starting point.
+    background_summary = shap.kmeans(background_data_np, 30)
+    # 2. Define a prediction function wrapper that SHAP can use. It must take a numpy array and return a numpy array.
+    def prediction_wrapper(x_np: np.ndarray) -> np.ndarray:
+        # Convert numpy data to torch tensor
+        x_torch = torch.from_numpy(x_np).float()
+        with torch.no_grad():
+            # Get model output
+            output = model(x_torch)
+        # Return as numpy array
+        return output.cpu().numpy().flatten()
+    # 3. Create the KernelExplainer
+    explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
+    print("Calculating SHAP values with KernelExplainer...")
+    shap_values = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
     if save_dir:
         save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+        plt.ioff()
         # Save Bar Plot
         bar_path = save_dir_path / "shap_bar_plot.svg"
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="bar", show=False)
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
         plt.title("SHAP Feature Importance")
         plt.tight_layout()
         plt.savefig(bar_path)
@@ -233,7 +263,7 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         # Save Dot Plot
         dot_path = save_dir_path / "shap_dot_plot.svg"
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="dot", show=False)
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
         plt.title("SHAP Feature Importance")
         plt.tight_layout()
         plt.savefig(dot_path)
@@ -242,18 +272,25 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         # Save Summary Data to CSV
         summary_path = save_dir_path / "shap_summary.csv"
-        mean_abs_shap = np.abs(shap_values_for_plot).mean(axis=0)
+        # Ensure the array is 1D before creating the DataFrame
+        mean_abs_shap = np.abs(shap_values).mean(axis=0).flatten()
         if feature_names is None:
             feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
         summary_df = pd.DataFrame({
             'feature': feature_names,
             'mean_abs_shap_value': mean_abs_shap
         }).sort_values('mean_abs_shap_value', ascending=False)
         summary_df.to_csv(summary_path, index=False)
         _LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
+        plt.ion()
     else:
         _LOGGER.info("No save directory provided. Displaying SHAP dot plot.")
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="dot")
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot")
 def info():

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_trainer.py RENAMED Viewed

@@ -95,14 +95,16 @@ class MyTrainer:
             batch_size=batch_size,
             shuffle=shuffle,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type),
+            drop_last=True  # Drops the last batch if incomplete, selecting a good batch size is key.
         )
         self.test_loader = DataLoader(
             dataset=self.test_dataset,
             batch_size=batch_size,
             shuffle=False,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type)
         )
     def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):

{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "5.3.0"
+version = "5.3.1"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }