dragon-ml-toolbox 20.11.0__py3-none-any.whl → 20.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.11.0
3
+ Version: 20.13.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-20.11.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-20.11.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
1
+ dragon_ml_toolbox-20.13.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-20.13.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
5
5
  ml_tools/ETL_cleaning/__init__.py,sha256=gLRHF-qzwpqKTvbbn9chIQELeUDh_XGpBRX28j-5IqI,545
@@ -46,7 +46,7 @@ ml_tools/ML_evaluation/_loss.py,sha256=1a4O25i3Ya_3naNZNL7ELLUL46BY86g1scA7d7q2U
46
46
  ml_tools/ML_evaluation/_regression.py,sha256=UZA7_fg85ZKJQWszioWDtmkplSiXeHJk2fBYR5bRXHY,11225
47
47
  ml_tools/ML_evaluation/_sequence.py,sha256=gUk9Uvmy7MrXkfrriMnfypkgJU5XERHdqekTa2gBaOM,8004
48
48
  ml_tools/ML_evaluation/_vision.py,sha256=abBHQ6Z2GunHNusL3wcLgfI1FVNA6hBUBTq1eOA8FSA,11489
49
- ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py,sha256=RMWkSmqHbb0Lj7W_uQJInEexOjXYmhMkcVHZT77wUrc,18847
49
+ ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py,sha256=eCP19o4sxfG0XlAVKiuuIxdtxO5lqCc0SuhWXx6eObY,20079
50
50
  ml_tools/ML_evaluation_captum/__init__.py,sha256=DZDoZXexCI49JNl_tTmFfYW4hTUYK5QQLex01wMfhnk,333
51
51
  ml_tools/ML_finalize_handler/_ML_finalize_handler.py,sha256=g-vkHJDTGXZsKOUA-Yfg7EuA1SmaHjzesCPiAyRMg2k,7054
52
52
  ml_tools/ML_finalize_handler/__init__.py,sha256=VQyLbCQUcliAAFiOAsnPhyJ7UVYgbSqAbAnpqeOnRSg,198
@@ -86,7 +86,7 @@ ml_tools/ML_trainer/__init__.py,sha256=42kueHa7Z0b_yLbywNCgIxlW6WmgLBqkTFwKH7vFL
86
86
  ml_tools/ML_trainer/_base_trainer.py,sha256=0ATm672NRsjJ6nv_NEl6-OEd9Bst1-s5OPxfG4qe8Lg,18075
87
87
  ml_tools/ML_trainer/_dragon_detection_trainer.py,sha256=B5F93PPnp2fYQmj1SYFRnAPVA39JwZUtJRMCdpSQF7k,16235
88
88
  ml_tools/ML_trainer/_dragon_sequence_trainer.py,sha256=Tj4YGgMrCkLnnNUlT_8wcdJFFcFhsdux308QPiqj-tw,23509
89
- ml_tools/ML_trainer/_dragon_trainer.py,sha256=bvSen_liut6B7gbg53MxOXKpJUkRaHtXDXW2SXBWPYQ,58553
89
+ ml_tools/ML_trainer/_dragon_trainer.py,sha256=KLDLoUcCLIteoGtrLQDTbnD9Cf73V7TUiGT7CGSdeks,58574
90
90
  ml_tools/ML_utilities/__init__.py,sha256=71T3RDKDgHVvFrEr0G7tjuwbDVk_4JZGzwZtejC3PuE,739
91
91
  ml_tools/ML_utilities/_artifact_finder.py,sha256=X4xz_rmi0jVan8Sun_6431TcQiNM-GDHm-DHLA1zYms,15816
92
92
  ml_tools/ML_utilities/_inspection.py,sha256=mXTnjGmdDpBfY99xfekyrGbSvrWHBcVndivMbqPD4PI,13186
@@ -119,7 +119,7 @@ ml_tools/ensemble_learning/_ensemble_learning.py,sha256=MHDZBR20_nStlSSeThFI3bSu
119
119
  ml_tools/excel_handler/__init__.py,sha256=AaWM3n_dqBhJLTs3OEA57ex5YykKXNOwVCyHlVsdnqI,530
120
120
  ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8WxDOfQ4sgxxLs,13965
121
121
  ml_tools/keys/__init__.py,sha256=-0c2pmrhyfROc-oQpEjJGLBMhSagA3CyFijQaaqZRqU,399
122
- ml_tools/keys/_keys.py,sha256=Kr73o9SaH5Y3DT0z0H-1eLwlBplJmjisjoO_EoUNkAg,9388
122
+ ml_tools/keys/_keys.py,sha256=YE_Ux2FYObfWurcQvfCvA3ZehwOvKvtCvIViUuYAYNM,9447
123
123
  ml_tools/math_utilities/__init__.py,sha256=K7Obkkc4rPKj4EbRZf1BsXHfiCg7FXYv_aN9Yc2Z_Vg,400
124
124
  ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
125
125
  ml_tools/optimization_tools/__init__.py,sha256=KD8JXpfGuPndO4AHnjJGu6uV1GRwhOfboD0KZV45kzw,658
@@ -143,7 +143,7 @@ ml_tools/utilities/__init__.py,sha256=h4lE3SQstg-opcQj6QSKhu-HkqSbmHExsWoM9vC5D9
143
143
  ml_tools/utilities/_translate.py,sha256=U8hRPa3PmTpIf9n9yR3gBGmp_hkcsjQLwjAHSHc0WHs,10325
144
144
  ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
145
145
  ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
146
- dragon_ml_toolbox-20.11.0.dist-info/METADATA,sha256=KiKepG9k7M1RbCXxEutcr7EkvDPaWIRiKoSvrTR1HSw,7889
147
- dragon_ml_toolbox-20.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
- dragon_ml_toolbox-20.11.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
149
- dragon_ml_toolbox-20.11.0.dist-info/RECORD,,
146
+ dragon_ml_toolbox-20.13.0.dist-info/METADATA,sha256=bTnTpMlvOFu2IlYpmc0QphbYeqbslxzptluUbEWaO-s,7889
147
+ dragon_ml_toolbox-20.13.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
148
+ dragon_ml_toolbox-20.13.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
149
+ dragon_ml_toolbox-20.13.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -30,7 +30,8 @@ def captum_feature_importance(model: nn.Module,
30
30
  save_dir: Union[str, Path],
31
31
  target_names: Optional[list[str]] = None,
32
32
  n_steps: int = 50,
33
- device: Union[str, torch.device] = 'cpu'):
33
+ device: Union[str, torch.device] = 'cpu',
34
+ verbose: int = 0):
34
35
  """
35
36
  Calculates feature importance using Captum's Integrated Gradients.
36
37
 
@@ -49,7 +50,7 @@ def captum_feature_importance(model: nn.Module,
49
50
  - If `None`, generic names (e.g., "Output_0") will be generated based on model output shape.
50
51
  n_steps (int): Number of steps for the integral approximation. Higher means more accurate but slower.
51
52
  device (str | torch.device): Torch device.
52
-
53
+ verbose (int): Verbosity level.
53
54
  <br>
54
55
 
55
56
  ### NOTE:
@@ -127,7 +128,8 @@ def captum_feature_importance(model: nn.Module,
127
128
  save_dir=save_dir_path,
128
129
  n_steps=n_steps,
129
130
  file_suffix=f"_{clean_name}",
130
- target_name=name # Pass original name for plotting
131
+ target_name=name, # Pass original name for plotting
132
+ verbose=verbose
131
133
  )
132
134
 
133
135
 
@@ -139,7 +141,8 @@ def _process_single_target(ig: 'IntegratedGradients', # type: ignore
139
141
  save_dir: Path,
140
142
  n_steps: int,
141
143
  file_suffix: str,
142
- target_name: str = ""):
144
+ target_name: str = "",
145
+ verbose: int = 0):
143
146
  """
144
147
  Private helper to run the attribution, aggregation, and saving for a single context.
145
148
  """
@@ -153,8 +156,8 @@ def _process_single_target(ig: 'IntegratedGradients', # type: ignore
153
156
  return_convergence_delta=True)
154
157
  # Check convergence quality
155
158
  mean_delta = torch.mean(torch.abs(delta)).item()
156
- if mean_delta > 0.1:
157
- _LOGGER.warning(f"Captum Convergence Delta is high ({mean_delta:.4f}). The attribution approximation may be inaccurate. Consider increasing 'n_steps'.")
159
+ if mean_delta > 0.1 and verbose > 0:
160
+ _LOGGER.warning(f"Captum Convergence Delta is high ({mean_delta:.4f}). Consider increasing 'n_steps'.")
158
161
 
159
162
  except Exception as e:
160
163
  _LOGGER.error(f"Captum attribution failed for target '{target_index}': {e}")
@@ -198,11 +201,36 @@ def _process_single_target(ig: 'IntegratedGradients', # type: ignore
198
201
  min_len = min(len(mean_abs_attr), len(feature_names))
199
202
  mean_abs_attr = mean_abs_attr[:min_len]
200
203
  feature_names = feature_names[:min_len]
204
+
205
+ # Calculate percentages (Before Min-Max scaling to preserve relative importance)
206
+ total_attr_sum = np.sum(mean_abs_attr)
207
+ if total_attr_sum > 0:
208
+ attr_percentages = (mean_abs_attr / total_attr_sum) * 100.0
209
+ else:
210
+ attr_percentages = np.zeros_like(mean_abs_attr)
211
+
212
+ # Min-Max Scaling
213
+ target_min = 0.01
214
+ target_max = 1.0
215
+
216
+ _min = np.min(mean_abs_attr)
217
+ _max = np.max(mean_abs_attr)
218
+
219
+ if _max > _min:
220
+ # 1. Normalize to [0, 1]
221
+ mean_abs_attr = (mean_abs_attr - _min) / (_max - _min)
222
+ # 2. Scale to [target_min, target_max]
223
+ mean_abs_attr = mean_abs_attr * (target_max - target_min) + target_min
224
+ else:
225
+ # Fallback: if all values are identical (e.g. all 0.0), set to target_min
226
+ fill_val = target_min if _max == 0 else target_max
227
+ mean_abs_attr = np.full_like(mean_abs_attr, fill_val)
201
228
 
202
229
  # --- Save Data to CSV ---
203
230
  summary_df = pd.DataFrame({
204
231
  CaptumKeys.FEATURE_COLUMN: feature_names,
205
- CaptumKeys.IMPORTANCE_COLUMN: mean_abs_attr
232
+ CaptumKeys.IMPORTANCE_COLUMN: mean_abs_attr,
233
+ CaptumKeys.PERCENT_COLUMN: attr_percentages
206
234
  }).sort_values(CaptumKeys.IMPORTANCE_COLUMN, ascending=False)
207
235
 
208
236
  csv_name = f"{CaptumKeys.SAVENAME}{file_suffix}.csv"
@@ -210,10 +238,13 @@ def _process_single_target(ig: 'IntegratedGradients', # type: ignore
210
238
  summary_df.to_csv(csv_path, index=False)
211
239
 
212
240
  # --- Generate Plot ---
213
- plot_df = summary_df.head(20).sort_values(CaptumKeys.IMPORTANCE_COLUMN, ascending=True)
214
- plt.figure(figsize=(10, 8), dpi=200)
215
- plt.barh(plot_df[CaptumKeys.FEATURE_COLUMN], plot_df[CaptumKeys.IMPORTANCE_COLUMN], color='mediumpurple')
216
- plt.xlabel("Mean Absolute Attribution")
241
+ plot_df = summary_df.head(20).sort_values(CaptumKeys.PERCENT_COLUMN, ascending=True)
242
+ plt.figure(figsize=(10, 8), dpi=300)
243
+ plt.barh(plot_df[CaptumKeys.FEATURE_COLUMN], plot_df[CaptumKeys.PERCENT_COLUMN], color='mediumpurple')
244
+ # plt.xlim(0, 1.05) # standardized scale # Removed to reflect actual percentages
245
+ plt.xlim(left=0) # start at 0
246
+ # plt.xlabel("Scaled Mean Absolute Attribution")
247
+ plt.xlabel("Relative Importance (%)")
217
248
 
218
249
  title = "Feature Importance"
219
250
 
@@ -33,8 +33,7 @@ from ..ML_configuration import (FormatRegressionMetrics,
33
33
  FinalizeMultiTargetRegression,
34
34
  FinalizeRegression)
35
35
 
36
- from ..path_manager import make_fullpath
37
- from ..keys._keys import PyTorchLogKeys, PyTorchCheckpointKeys, DatasetKeys, MLTaskKeys, MagicWords, DragonTrainerKeys, ScalerKeys
36
+ from ..keys._keys import PyTorchLogKeys, PyTorchCheckpointKeys, DatasetKeys, MLTaskKeys, DragonTrainerKeys, ScalerKeys
38
37
  from .._core import get_logger
39
38
 
40
39
  from ._base_trainer import _BaseDragonTrainer
@@ -824,7 +823,8 @@ class DragonTrainer(_BaseDragonTrainer):
824
823
  n_samples: int = 100,
825
824
  feature_names: Optional[list[str]] = None,
826
825
  target_names: Optional[list[str]] = None,
827
- n_steps: int = 50):
826
+ n_steps: int = 50,
827
+ verbose: int = 0):
828
828
  """
829
829
  Explains model predictions using Captum's Integrated Gradients.
830
830
 
@@ -927,7 +927,8 @@ class DragonTrainer(_BaseDragonTrainer):
927
927
  save_dir=save_dir,
928
928
  target_names=target_names,
929
929
  n_steps=n_steps,
930
- device=self.device
930
+ device=self.device,
931
+ verbose=verbose
931
932
  )
932
933
 
933
934
  def _attention_helper(self, dataloader: DataLoader):
ml_tools/keys/_keys.py CHANGED
@@ -99,8 +99,9 @@ class SHAPKeys:
99
99
 
100
100
  class CaptumKeys:
101
101
  """Keys for Captum functions"""
102
- FEATURE_COLUMN = "feature"
103
- IMPORTANCE_COLUMN = "importance"
102
+ FEATURE_COLUMN = "Feature"
103
+ IMPORTANCE_COLUMN = "Scaled Mean Attribution"
104
+ PERCENT_COLUMN = "Relative Importance(%)"
104
105
  SAVENAME = "captum_summary"
105
106
  PLOT_NAME = "captum_importance_plot"
106
107