PyPI - dragon-ml-toolbox - Versions diffs - 5.3.0__py3-none-any.whl → 5.3.1__py3-none-any.whl - Mend

dragon-ml-toolbox 5.3.0py3-none-any.whl → 5.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (9) hide show

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.3.0
+Version: 5.3.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
-dragon_ml_toolbox-5.3.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-5.3.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
+dragon_ml_toolbox-5.3.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-5.3.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
 ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
 ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
 ml_tools/MICE_imputation.py,sha256=oFHg-OytOzPYTzBR_wIRHhP71cMn3aupDeT59ABsXlQ,11576
-ml_tools/ML_callbacks.py,sha256=eOCSc-1_e5vC2dQN1ydHGKDLeJ3DqB-eLRLuXp2DpFM,13257
+ml_tools/ML_callbacks.py,sha256=hOGWYM6ndaH0ibaHgM14j74MtWFalToY-oTnB2jsQ4A,13268
 ml_tools/ML_datasetmaster.py,sha256=bbKCNA_b_uDIfxP9YIYKZm-VSfUSD15LvegFxpE9DIQ,34315
-ml_tools/ML_evaluation.py,sha256=4dVqe6JF1Ukmk1sAcY8E5EG1oB1_oy2HXE5OT-pZwCs,10273
+ml_tools/ML_evaluation.py,sha256=LX6UkUC80y43lYKBkw03CptZ3PJGkZXfmZZHL-2kd1s,11590
 ml_tools/ML_inference.py,sha256=Fh-X2UQn3AznWBjf-7iPSxwE-EzkGQm1VEIRUAkURmE,5336
 ml_tools/ML_models.py,sha256=SJhKHGAN2VTBqzcHUOpFWuVZ2Y7U1M4P_axG_LNYWcI,6460
 ml_tools/ML_optimization.py,sha256=zGKpWW4SL1-3iiHglDP-dkuADL73T0kxs3Dc-Lyishs,9671
-ml_tools/ML_trainer.py,sha256=t58Ka6ryaYm0Fi5xje-e-fkmz9DwDLIeJLbh04n_gDg,15034
+ml_tools/ML_trainer.py,sha256=ENOxTq07kWYn7ZolMfXYLSy-cLZOdty0dRmutA84SV4,15146
 ml_tools/PSO_optimization.py,sha256=stH2Ux1sftQgX5EwLc85kHcoT4Rmz6zv7sH2yzf4Zrw,22710
 ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
 ml_tools/SQL.py,sha256=9zzS6AFEJM9aj6nE31hDe8S9TqLonk-J1amwZoiHNbk,10468
@@ -26,7 +26,7 @@ ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
 ml_tools/optimization_tools.py,sha256=MuT4OG7_r1QqLUti-yYix7QeCpglezD0oe9BDCq0QXk,5086
 ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
 ml_tools/utilities.py,sha256=T5xbxzBr14odUj7KncSeg-tJzqjmSDLOOmxEaGYLLi4,18447
-dragon_ml_toolbox-5.3.0.dist-info/METADATA,sha256=Lu_JBMfkCPssLk-a2v4b-oZu86cFK1OIB4HtHspVRIk,6643
-dragon_ml_toolbox-5.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-5.3.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-5.3.0.dist-info/RECORD,,
+dragon_ml_toolbox-5.3.1.dist-info/METADATA,sha256=XMn0E2Bh_6X97SScFy08jxJvo_KYeS5yuApaHTDPeqY,6643
+dragon_ml_toolbox-5.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-5.3.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-5.3.1.dist-info/RECORD,,

ml_tools/ML_callbacks.py CHANGED Viewed

@@ -124,7 +124,7 @@ class EarlyStopping(Callback):
                     inferred from the name of the monitored quantity.
         verbose (int): Verbosity mode.
     """
-    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
+    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta: float=0.0, patience: int=5, mode: Literal['auto', 'min', 'max']='auto', verbose: int=0):
         super().__init__()
         self.monitor = monitor
         self.patience = patience
@@ -202,7 +202,7 @@ class ModelCheckpoint(Callback):
         verbose (int): Verbosity mode.
     """
     def __init__(self, save_dir: Union[str,Path], monitor: str = LogKeys.VAL_LOSS,
-                 save_best_only: bool = False, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 1):
+                 save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
         super().__init__()
         self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
         if not self.save_dir.is_dir():

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -195,7 +195,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Optiona
         plt.close(fig_tvp)
-def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain: torch.Tensor,
+def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], instances_to_explain: Union[torch.Tensor,np.ndarray],
                       feature_names: Optional[list[str]]=None, save_dir: Optional[Union[str, Path]] = None):
     """
     Calculates SHAP values and saves summary plots and data.
@@ -207,24 +207,54 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         feature_names (list of str | None): Names of the features for plot labeling.
         save_dir (str | Path | None): Directory to save SHAP artifacts. If None, dot plot is shown.
     """
+    # everything to numpy
+    if isinstance(background_data, np.ndarray):
+        background_data_np = background_data
+    else:
+        background_data_np = background_data.numpy()
+    if isinstance(instances_to_explain, np.ndarray):
+        instances_to_explain_np = instances_to_explain
+    else:
+        instances_to_explain_np = instances_to_explain.numpy()
+    # --- Data Validation Step ---
+    if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
+        _LOGGER.error("❌ Input data for SHAP contains NaN values. Aborting explanation.")
+        return
     print("\n--- SHAP Value Explanation ---")
-    print("Calculating SHAP values... ")
     model.eval()
     model.cpu()
-    explainer = shap.DeepExplainer(model, background_data)
-    shap_values = explainer.shap_values(instances_to_explain)
-    shap_values_for_plot = shap_values[1] if isinstance(shap_values, list) else shap_values
-    if isinstance(shap_values, list):
-        _LOGGER.info("Using SHAP values for the positive class (class 1) for plots.")
+    # 1. Summarize the background data.
+    # Summarize the background data using k-means. 10-50 clusters is a good starting point.
+    background_summary = shap.kmeans(background_data_np, 30)
+    # 2. Define a prediction function wrapper that SHAP can use. It must take a numpy array and return a numpy array.
+    def prediction_wrapper(x_np: np.ndarray) -> np.ndarray:
+        # Convert numpy data to torch tensor
+        x_torch = torch.from_numpy(x_np).float()
+        with torch.no_grad():
+            # Get model output
+            output = model(x_torch)
+        # Return as numpy array
+        return output.cpu().numpy().flatten()
+    # 3. Create the KernelExplainer
+    explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
+    print("Calculating SHAP values with KernelExplainer...")
+    shap_values = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
     if save_dir:
         save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+        plt.ioff()
         # Save Bar Plot
         bar_path = save_dir_path / "shap_bar_plot.svg"
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="bar", show=False)
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
         plt.title("SHAP Feature Importance")
         plt.tight_layout()
         plt.savefig(bar_path)
@@ -233,7 +263,7 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         # Save Dot Plot
         dot_path = save_dir_path / "shap_dot_plot.svg"
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="dot", show=False)
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
         plt.title("SHAP Feature Importance")
         plt.tight_layout()
         plt.savefig(dot_path)
@@ -242,18 +272,25 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
         # Save Summary Data to CSV
         summary_path = save_dir_path / "shap_summary.csv"
-        mean_abs_shap = np.abs(shap_values_for_plot).mean(axis=0)
+        # Ensure the array is 1D before creating the DataFrame
+        mean_abs_shap = np.abs(shap_values).mean(axis=0).flatten()
         if feature_names is None:
             feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
         summary_df = pd.DataFrame({
             'feature': feature_names,
             'mean_abs_shap_value': mean_abs_shap
         }).sort_values('mean_abs_shap_value', ascending=False)
         summary_df.to_csv(summary_path, index=False)
         _LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
+        plt.ion()
     else:
         _LOGGER.info("No save directory provided. Displaying SHAP dot plot.")
-        shap.summary_plot(shap_values_for_plot, instances_to_explain, feature_names=feature_names, plot_type="dot")
+        shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot")
 def info():

ml_tools/ML_trainer.py CHANGED Viewed

@@ -95,14 +95,16 @@ class MyTrainer:
             batch_size=batch_size,
             shuffle=shuffle,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type),
+            drop_last=True  # Drops the last batch if incomplete, selecting a good batch size is key.
         )
         self.test_loader = DataLoader(
             dataset=self.test_dataset,
             batch_size=batch_size,
             shuffle=False,
             num_workers=loader_workers,
-            pin_memory=(self.device.type == "cuda")
+            pin_memory=("cuda" in self.device.type)
         )
     def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.3.0.dist-info → dragon_ml_toolbox-5.3.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 5.3.0__py3-none-any.whl → 5.3.1__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 5.3.0py3-none-any.whl → 5.3.1py3-none-any.whl