dragon-ml-toolbox 5.3.0__tar.gz → 5.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-5.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-5.3.1}/PKG-INFO +1 -1
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_callbacks.py +2 -2
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_evaluation.py +49 -12
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_trainer.py +4 -2
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/pyproject.toml +1 -1
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/LICENSE +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/README.md +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/setup.cfg +0 -0
|
@@ -124,7 +124,7 @@ class EarlyStopping(Callback):
|
|
|
124
124
|
inferred from the name of the monitored quantity.
|
|
125
125
|
verbose (int): Verbosity mode.
|
|
126
126
|
"""
|
|
127
|
-
def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=
|
|
127
|
+
def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta: float=0.0, patience: int=5, mode: Literal['auto', 'min', 'max']='auto', verbose: int=0):
|
|
128
128
|
super().__init__()
|
|
129
129
|
self.monitor = monitor
|
|
130
130
|
self.patience = patience
|
|
@@ -202,7 +202,7 @@ class ModelCheckpoint(Callback):
|
|
|
202
202
|
verbose (int): Verbosity mode.
|
|
203
203
|
"""
|
|
204
204
|
def __init__(self, save_dir: Union[str,Path], monitor: str = LogKeys.VAL_LOSS,
|
|
205
|
-
save_best_only: bool =
|
|
205
|
+
save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
|
|
206
206
|
super().__init__()
|
|
207
207
|
self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
|
|
208
208
|
if not self.save_dir.is_dir():
|
|
@@ -195,7 +195,7 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Optiona
|
|
|
195
195
|
plt.close(fig_tvp)
|
|
196
196
|
|
|
197
197
|
|
|
198
|
-
def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain: torch.Tensor,
|
|
198
|
+
def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], instances_to_explain: Union[torch.Tensor,np.ndarray],
|
|
199
199
|
feature_names: Optional[list[str]]=None, save_dir: Optional[Union[str, Path]] = None):
|
|
200
200
|
"""
|
|
201
201
|
Calculates SHAP values and saves summary plots and data.
|
|
@@ -207,24 +207,54 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
|
|
|
207
207
|
feature_names (list of str | None): Names of the features for plot labeling.
|
|
208
208
|
save_dir (str | Path | None): Directory to save SHAP artifacts. If None, dot plot is shown.
|
|
209
209
|
"""
|
|
210
|
+
# everything to numpy
|
|
211
|
+
if isinstance(background_data, np.ndarray):
|
|
212
|
+
background_data_np = background_data
|
|
213
|
+
else:
|
|
214
|
+
background_data_np = background_data.numpy()
|
|
215
|
+
|
|
216
|
+
if isinstance(instances_to_explain, np.ndarray):
|
|
217
|
+
instances_to_explain_np = instances_to_explain
|
|
218
|
+
else:
|
|
219
|
+
instances_to_explain_np = instances_to_explain.numpy()
|
|
220
|
+
|
|
221
|
+
# --- Data Validation Step ---
|
|
222
|
+
if np.isnan(background_data_np).any() or np.isnan(instances_to_explain_np).any():
|
|
223
|
+
_LOGGER.error("❌ Input data for SHAP contains NaN values. Aborting explanation.")
|
|
224
|
+
return
|
|
225
|
+
|
|
210
226
|
print("\n--- SHAP Value Explanation ---")
|
|
211
|
-
print("Calculating SHAP values... ")
|
|
212
227
|
|
|
213
228
|
model.eval()
|
|
214
229
|
model.cpu()
|
|
215
230
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
231
|
+
# 1. Summarize the background data.
|
|
232
|
+
# Summarize the background data using k-means. 10-50 clusters is a good starting point.
|
|
233
|
+
background_summary = shap.kmeans(background_data_np, 30)
|
|
234
|
+
|
|
235
|
+
# 2. Define a prediction function wrapper that SHAP can use. It must take a numpy array and return a numpy array.
|
|
236
|
+
def prediction_wrapper(x_np: np.ndarray) -> np.ndarray:
|
|
237
|
+
# Convert numpy data to torch tensor
|
|
238
|
+
x_torch = torch.from_numpy(x_np).float()
|
|
239
|
+
with torch.no_grad():
|
|
240
|
+
# Get model output
|
|
241
|
+
output = model(x_torch)
|
|
242
|
+
# Return as numpy array
|
|
243
|
+
return output.cpu().numpy().flatten()
|
|
222
244
|
|
|
245
|
+
# 3. Create the KernelExplainer
|
|
246
|
+
explainer = shap.KernelExplainer(prediction_wrapper, background_summary)
|
|
247
|
+
|
|
248
|
+
print("Calculating SHAP values with KernelExplainer...")
|
|
249
|
+
shap_values = explainer.shap_values(instances_to_explain_np, l1_reg="aic")
|
|
250
|
+
|
|
223
251
|
if save_dir:
|
|
224
252
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
253
|
+
plt.ioff()
|
|
254
|
+
|
|
225
255
|
# Save Bar Plot
|
|
226
256
|
bar_path = save_dir_path / "shap_bar_plot.svg"
|
|
227
|
-
shap.summary_plot(
|
|
257
|
+
shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
|
|
228
258
|
plt.title("SHAP Feature Importance")
|
|
229
259
|
plt.tight_layout()
|
|
230
260
|
plt.savefig(bar_path)
|
|
@@ -233,7 +263,7 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
|
|
|
233
263
|
|
|
234
264
|
# Save Dot Plot
|
|
235
265
|
dot_path = save_dir_path / "shap_dot_plot.svg"
|
|
236
|
-
shap.summary_plot(
|
|
266
|
+
shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
|
|
237
267
|
plt.title("SHAP Feature Importance")
|
|
238
268
|
plt.tight_layout()
|
|
239
269
|
plt.savefig(dot_path)
|
|
@@ -242,18 +272,25 @@ def shap_summary_plot(model, background_data: torch.Tensor, instances_to_explain
|
|
|
242
272
|
|
|
243
273
|
# Save Summary Data to CSV
|
|
244
274
|
summary_path = save_dir_path / "shap_summary.csv"
|
|
245
|
-
|
|
275
|
+
# Ensure the array is 1D before creating the DataFrame
|
|
276
|
+
mean_abs_shap = np.abs(shap_values).mean(axis=0).flatten()
|
|
277
|
+
|
|
246
278
|
if feature_names is None:
|
|
247
279
|
feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
|
|
280
|
+
|
|
248
281
|
summary_df = pd.DataFrame({
|
|
249
282
|
'feature': feature_names,
|
|
250
283
|
'mean_abs_shap_value': mean_abs_shap
|
|
251
284
|
}).sort_values('mean_abs_shap_value', ascending=False)
|
|
285
|
+
|
|
252
286
|
summary_df.to_csv(summary_path, index=False)
|
|
287
|
+
|
|
253
288
|
_LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
|
|
289
|
+
plt.ion()
|
|
290
|
+
|
|
254
291
|
else:
|
|
255
292
|
_LOGGER.info("No save directory provided. Displaying SHAP dot plot.")
|
|
256
|
-
shap.summary_plot(
|
|
293
|
+
shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot")
|
|
257
294
|
|
|
258
295
|
|
|
259
296
|
def info():
|
|
@@ -95,14 +95,16 @@ class MyTrainer:
|
|
|
95
95
|
batch_size=batch_size,
|
|
96
96
|
shuffle=shuffle,
|
|
97
97
|
num_workers=loader_workers,
|
|
98
|
-
pin_memory=(self.device.type
|
|
98
|
+
pin_memory=("cuda" in self.device.type),
|
|
99
|
+
drop_last=True # Drops the last batch if incomplete, selecting a good batch size is key.
|
|
99
100
|
)
|
|
101
|
+
|
|
100
102
|
self.test_loader = DataLoader(
|
|
101
103
|
dataset=self.test_dataset,
|
|
102
104
|
batch_size=batch_size,
|
|
103
105
|
shuffle=False,
|
|
104
106
|
num_workers=loader_workers,
|
|
105
|
-
pin_memory=(self.device.type
|
|
107
|
+
pin_memory=("cuda" in self.device.type)
|
|
106
108
|
)
|
|
107
109
|
|
|
108
110
|
def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-5.3.0 → dragon_ml_toolbox-5.3.1}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|