explainiverse 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
explainiverse/__init__.py CHANGED
@@ -33,7 +33,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
33
33
  from explainiverse.adapters import TORCH_AVAILABLE
34
34
  from explainiverse.engine.suite import ExplanationSuite
35
35
 
36
- __version__ = "0.2.2"
36
+ __version__ = "0.2.4"
37
37
 
38
38
  __all__ = [
39
39
  # Core
@@ -369,6 +369,8 @@ def _create_default_registry() -> ExplainerRegistry:
369
369
  from explainiverse.explainers.global_explainers.ale import ALEExplainer
370
370
  from explainiverse.explainers.global_explainers.sage import SAGEExplainer
371
371
  from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
372
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
373
+ from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
372
374
 
373
375
  registry = ExplainerRegistry()
374
376
 
@@ -461,6 +463,40 @@ def _create_default_registry() -> ExplainerRegistry:
461
463
  )
462
464
  )
463
465
 
466
+ # Register Integrated Gradients (for neural networks)
467
+ registry.register(
468
+ name="integrated_gradients",
469
+ explainer_class=IntegratedGradientsExplainer,
470
+ meta=ExplainerMeta(
471
+ scope="local",
472
+ model_types=["neural"],
473
+ data_types=["tabular", "image"],
474
+ task_types=["classification", "regression"],
475
+ description="Integrated Gradients - axiomatic attributions for neural networks (requires PyTorch)",
476
+ paper_reference="Sundararajan et al., 2017 - 'Axiomatic Attribution for Deep Networks' (ICML)",
477
+ complexity="O(n_steps * forward_pass)",
478
+ requires_training_data=False,
479
+ supports_batching=True
480
+ )
481
+ )
482
+
483
+ # Register GradCAM (for CNNs)
484
+ registry.register(
485
+ name="gradcam",
486
+ explainer_class=GradCAMExplainer,
487
+ meta=ExplainerMeta(
488
+ scope="local",
489
+ model_types=["neural"],
490
+ data_types=["image"],
491
+ task_types=["classification"],
492
+ description="GradCAM/GradCAM++ - visual explanations for CNNs via gradient-weighted activations (requires PyTorch)",
493
+ paper_reference="Selvaraju et al., 2017 - 'Grad-CAM: Visual Explanations from Deep Networks' (ICCV)",
494
+ complexity="O(forward_pass + backward_pass)",
495
+ requires_training_data=False,
496
+ supports_batching=True
497
+ )
498
+ )
499
+
464
500
  # =========================================================================
465
501
  # Global Explainers (model-level)
466
502
  # =========================================================================
@@ -8,6 +8,7 @@ Local Explainers (instance-level):
8
8
  - TreeSHAP: Optimized exact SHAP for tree-based models
9
9
  - Anchors: High-precision rule-based explanations
10
10
  - Counterfactual: Diverse counterfactual explanations
11
+ - Integrated Gradients: Gradient-based attributions for neural networks
11
12
 
12
13
  Global Explainers (model-level):
13
14
  - Permutation Importance: Feature importance via permutation
@@ -25,6 +26,8 @@ from explainiverse.explainers.global_explainers.permutation_importance import Pe
25
26
  from explainiverse.explainers.global_explainers.partial_dependence import PartialDependenceExplainer
26
27
  from explainiverse.explainers.global_explainers.ale import ALEExplainer
27
28
  from explainiverse.explainers.global_explainers.sage import SAGEExplainer
29
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
30
+ from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
28
31
 
29
32
  __all__ = [
30
33
  # Local explainers
@@ -33,6 +36,8 @@ __all__ = [
33
36
  "TreeShapExplainer",
34
37
  "AnchorsExplainer",
35
38
  "CounterfactualExplainer",
39
+ "IntegratedGradientsExplainer",
40
+ "GradCAMExplainer",
36
41
  # Global explainers
37
42
  "PermutationImportanceExplainer",
38
43
  "PartialDependenceExplainer",
@@ -0,0 +1,12 @@
1
+ # src/explainiverse/explainers/gradient/__init__.py
2
+ """
3
+ Gradient-based explainers for neural networks.
4
+
5
+ These explainers require models that support gradient computation,
6
+ typically via the PyTorchAdapter.
7
+ """
8
+
9
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
10
+ from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
11
+
12
+ __all__ = ["IntegratedGradientsExplainer", "GradCAMExplainer"]
@@ -0,0 +1,390 @@
1
+ # src/explainiverse/explainers/gradient/gradcam.py
2
+ """
3
+ GradCAM and GradCAM++ - Visual Explanations for CNNs.
4
+
5
+ GradCAM produces visual explanations by highlighting important regions
6
+ in an image that contribute to the model's prediction. It uses gradients
7
+ flowing into the final convolutional layer to produce a coarse localization map.
8
+
9
+ GradCAM++ improves upon GradCAM by using a weighted combination of positive
10
+ partial derivatives, providing better localization for multiple instances
11
+ of the same class.
12
+
13
+ References:
14
+ GradCAM: Selvaraju et al., 2017 - "Grad-CAM: Visual Explanations from
15
+ Deep Networks via Gradient-based Localization"
16
+ https://arxiv.org/abs/1610.02391
17
+
18
+ GradCAM++: Chattopadhay et al., 2018 - "Grad-CAM++: Generalized Gradient-based
19
+ Visual Explanations for Deep Convolutional Networks"
20
+ https://arxiv.org/abs/1710.11063
21
+
22
+ Example:
23
+ from explainiverse.explainers.gradient import GradCAMExplainer
24
+ from explainiverse.adapters import PyTorchAdapter
25
+
26
+ # For a CNN model
27
+ adapter = PyTorchAdapter(cnn_model, task="classification")
28
+
29
+ explainer = GradCAMExplainer(
30
+ model=adapter,
31
+ target_layer="layer4", # Last conv layer
32
+ class_names=class_names
33
+ )
34
+
35
+ explanation = explainer.explain(image)
36
+ heatmap = explanation.explanation_data["heatmap"]
37
+ """
38
+
39
+ import numpy as np
40
+ from typing import List, Optional, Tuple, Union
41
+
42
+ from explainiverse.core.explainer import BaseExplainer
43
+ from explainiverse.core.explanation import Explanation
44
+
45
+
46
+ class GradCAMExplainer(BaseExplainer):
47
+ """
48
+ GradCAM and GradCAM++ explainer for CNNs.
49
+
50
+ Produces visual heatmaps showing which regions of an input image
51
+ are most important for the model's prediction.
52
+
53
+ Attributes:
54
+ model: PyTorchAdapter wrapping a CNN model
55
+ target_layer: Name of the convolutional layer to use
56
+ class_names: List of class names
57
+ method: "gradcam" or "gradcam++"
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ model,
63
+ target_layer: str,
64
+ class_names: Optional[List[str]] = None,
65
+ method: str = "gradcam"
66
+ ):
67
+ """
68
+ Initialize the GradCAM explainer.
69
+
70
+ Args:
71
+ model: A PyTorchAdapter wrapping a CNN model.
72
+ target_layer: Name of the target convolutional layer.
73
+ Usually the last conv layer before the classifier.
74
+ Use adapter.list_layers() to see available layers.
75
+ class_names: List of class names for classification.
76
+ method: "gradcam" for standard GradCAM, "gradcam++" for improved version.
77
+ """
78
+ super().__init__(model)
79
+
80
+ # Validate model has layer access
81
+ if not hasattr(model, 'get_layer_gradients'):
82
+ raise TypeError(
83
+ "Model adapter must have get_layer_gradients() method. "
84
+ "Use PyTorchAdapter for PyTorch models."
85
+ )
86
+
87
+ self.target_layer = target_layer
88
+ self.class_names = list(class_names) if class_names else None
89
+ self.method = method.lower()
90
+
91
+ if self.method not in ["gradcam", "gradcam++"]:
92
+ raise ValueError(f"Method must be 'gradcam' or 'gradcam++', got '{method}'")
93
+
94
+ def _compute_gradcam(
95
+ self,
96
+ activations: np.ndarray,
97
+ gradients: np.ndarray
98
+ ) -> np.ndarray:
99
+ """
100
+ Compute standard GradCAM heatmap.
101
+
102
+ GradCAM = ReLU(sum_k(alpha_k * A^k))
103
+ where alpha_k = global_avg_pool(gradients for channel k)
104
+ """
105
+ # Global average pooling of gradients to get weights
106
+ # activations shape: (batch, channels, height, width)
107
+ # gradients shape: (batch, channels, height, width)
108
+
109
+ # For each channel, compute the average gradient (importance weight)
110
+ weights = np.mean(gradients, axis=(2, 3), keepdims=True) # (batch, channels, 1, 1)
111
+
112
+ # Weighted combination of activation maps
113
+ cam = np.sum(weights * activations, axis=1) # (batch, height, width)
114
+
115
+ # Apply ReLU (we only care about positive influence)
116
+ cam = np.maximum(cam, 0)
117
+
118
+ return cam
119
+
120
+ def _compute_gradcam_plusplus(
121
+ self,
122
+ activations: np.ndarray,
123
+ gradients: np.ndarray
124
+ ) -> np.ndarray:
125
+ """
126
+ Compute GradCAM++ heatmap.
127
+
128
+ GradCAM++ uses higher-order derivatives to weight the gradients,
129
+ providing better localization especially for multiple instances.
130
+ """
131
+ # First derivative
132
+ grad_2 = gradients ** 2
133
+ grad_3 = gradients ** 3
134
+
135
+ # Sum over spatial dimensions for denominator
136
+ sum_activations = np.sum(activations, axis=(2, 3), keepdims=True)
137
+
138
+ # Avoid division by zero
139
+ eps = 1e-8
140
+
141
+ # Alpha coefficients (pixel-wise weights)
142
+ alpha_num = grad_2
143
+ alpha_denom = 2 * grad_2 + sum_activations * grad_3 + eps
144
+ alpha = alpha_num / alpha_denom
145
+
146
+ # Set alpha to 0 where gradients are 0
147
+ alpha = np.where(gradients != 0, alpha, 0)
148
+
149
+ # Weights are sum of (alpha * ReLU(gradients))
150
+ weights = np.sum(alpha * np.maximum(gradients, 0), axis=(2, 3), keepdims=True)
151
+
152
+ # Weighted combination
153
+ cam = np.sum(weights * activations, axis=1)
154
+
155
+ # Apply ReLU
156
+ cam = np.maximum(cam, 0)
157
+
158
+ return cam
159
+
160
+ def _normalize_heatmap(self, heatmap: np.ndarray) -> np.ndarray:
161
+ """Normalize heatmap to [0, 1] range."""
162
+ heatmap = heatmap.squeeze()
163
+
164
+ min_val = heatmap.min()
165
+ max_val = heatmap.max()
166
+
167
+ if max_val - min_val > 1e-8:
168
+ heatmap = (heatmap - min_val) / (max_val - min_val)
169
+ else:
170
+ heatmap = np.zeros_like(heatmap)
171
+
172
+ return heatmap
173
+
174
+ def _resize_heatmap(
175
+ self,
176
+ heatmap: np.ndarray,
177
+ target_size: Tuple[int, int]
178
+ ) -> np.ndarray:
179
+ """
180
+ Resize heatmap to match input image size.
181
+
182
+ Uses simple bilinear-like interpolation without requiring scipy/cv2.
183
+ """
184
+ h, w = heatmap.shape
185
+ target_h, target_w = target_size
186
+
187
+ # Create coordinate grids
188
+ y_ratio = h / target_h
189
+ x_ratio = w / target_w
190
+
191
+ y_coords = np.arange(target_h) * y_ratio
192
+ x_coords = np.arange(target_w) * x_ratio
193
+
194
+ # Get integer indices and fractions
195
+ y_floor = np.floor(y_coords).astype(int)
196
+ x_floor = np.floor(x_coords).astype(int)
197
+
198
+ y_ceil = np.minimum(y_floor + 1, h - 1)
199
+ x_ceil = np.minimum(x_floor + 1, w - 1)
200
+
201
+ y_frac = y_coords - y_floor
202
+ x_frac = x_coords - x_floor
203
+
204
+ # Bilinear interpolation
205
+ resized = np.zeros((target_h, target_w))
206
+ for i in range(target_h):
207
+ for j in range(target_w):
208
+ top_left = heatmap[y_floor[i], x_floor[j]]
209
+ top_right = heatmap[y_floor[i], x_ceil[j]]
210
+ bottom_left = heatmap[y_ceil[i], x_floor[j]]
211
+ bottom_right = heatmap[y_ceil[i], x_ceil[j]]
212
+
213
+ top = top_left * (1 - x_frac[j]) + top_right * x_frac[j]
214
+ bottom = bottom_left * (1 - x_frac[j]) + bottom_right * x_frac[j]
215
+
216
+ resized[i, j] = top * (1 - y_frac[i]) + bottom * y_frac[i]
217
+
218
+ return resized
219
+
220
+ def explain(
221
+ self,
222
+ image: np.ndarray,
223
+ target_class: Optional[int] = None,
224
+ resize_to_input: bool = True
225
+ ) -> Explanation:
226
+ """
227
+ Generate GradCAM explanation for an image.
228
+
229
+ Args:
230
+ image: Input image as numpy array. Expected shapes:
231
+ - (C, H, W) for single image
232
+ - (1, C, H, W) for batched single image
233
+ - (H, W, C) will be transposed automatically
234
+ target_class: Class to explain. If None, uses predicted class.
235
+ resize_to_input: If True, resize heatmap to match input size.
236
+
237
+ Returns:
238
+ Explanation object with heatmap and metadata.
239
+ """
240
+ image = np.array(image, dtype=np.float32)
241
+
242
+ # Handle different input shapes
243
+ if image.ndim == 3:
244
+ # Could be (C, H, W) or (H, W, C)
245
+ if image.shape[0] in [1, 3, 4]: # Likely (C, H, W)
246
+ image = image[np.newaxis, ...] # Add batch dim
247
+ else: # Likely (H, W, C)
248
+ image = np.transpose(image, (2, 0, 1))[np.newaxis, ...]
249
+ elif image.ndim == 4:
250
+ pass # Already (N, C, H, W)
251
+ else:
252
+ raise ValueError(f"Expected 3D or 4D input, got shape {image.shape}")
253
+
254
+ input_size = (image.shape[2], image.shape[3]) # (H, W)
255
+
256
+ # Get activations and gradients for target layer
257
+ activations, gradients = self.model.get_layer_gradients(
258
+ image,
259
+ layer_name=self.target_layer,
260
+ target_class=target_class
261
+ )
262
+
263
+ # Ensure 4D: (batch, channels, height, width)
264
+ if activations.ndim == 2:
265
+ # Fully connected layer output, reshape
266
+ side = int(np.sqrt(activations.shape[1]))
267
+ activations = activations.reshape(1, 1, side, side)
268
+ gradients = gradients.reshape(1, 1, side, side)
269
+ elif activations.ndim == 3:
270
+ activations = activations[np.newaxis, ...]
271
+ gradients = gradients[np.newaxis, ...]
272
+
273
+ # Compute CAM based on method
274
+ if self.method == "gradcam":
275
+ cam = self._compute_gradcam(activations, gradients)
276
+ else: # gradcam++
277
+ cam = self._compute_gradcam_plusplus(activations, gradients)
278
+
279
+ # Normalize to [0, 1]
280
+ heatmap = self._normalize_heatmap(cam)
281
+
282
+ # Optionally resize to input size
283
+ if resize_to_input and heatmap.shape != input_size:
284
+ heatmap = self._resize_heatmap(heatmap, input_size)
285
+
286
+ # Determine target class info
287
+ if target_class is None:
288
+ predictions = self.model.predict(image)
289
+ target_class = int(np.argmax(predictions))
290
+
291
+ if self.class_names and target_class < len(self.class_names):
292
+ label_name = self.class_names[target_class]
293
+ else:
294
+ label_name = f"class_{target_class}"
295
+
296
+ return Explanation(
297
+ explainer_name=f"GradCAM" if self.method == "gradcam" else "GradCAM++",
298
+ target_class=label_name,
299
+ explanation_data={
300
+ "heatmap": heatmap.tolist(),
301
+ "heatmap_shape": list(heatmap.shape),
302
+ "target_layer": self.target_layer,
303
+ "method": self.method,
304
+ "input_shape": list(image.shape)
305
+ }
306
+ )
307
+
308
+ def explain_batch(
309
+ self,
310
+ images: np.ndarray,
311
+ target_class: Optional[int] = None
312
+ ) -> List[Explanation]:
313
+ """
314
+ Generate explanations for multiple images.
315
+
316
+ Args:
317
+ images: Batch of images (N, C, H, W).
318
+ target_class: Target class for all images.
319
+
320
+ Returns:
321
+ List of Explanation objects.
322
+ """
323
+ images = np.array(images)
324
+
325
+ return [
326
+ self.explain(images[i], target_class=target_class)
327
+ for i in range(images.shape[0])
328
+ ]
329
+
330
+ def get_overlay(
331
+ self,
332
+ image: np.ndarray,
333
+ heatmap: np.ndarray,
334
+ alpha: float = 0.5,
335
+ colormap: str = "jet"
336
+ ) -> np.ndarray:
337
+ """
338
+ Create an overlay of the heatmap on the original image.
339
+
340
+ This is a simple implementation without matplotlib/cv2 dependencies.
341
+ For better visualizations, use the heatmap with your preferred
342
+ visualization library.
343
+
344
+ Args:
345
+ image: Original image (H, W, 3) in [0, 255] or [0, 1] range.
346
+ heatmap: GradCAM heatmap (H, W) in [0, 1] range.
347
+ alpha: Transparency of the heatmap overlay.
348
+ colormap: Color scheme (currently only "jet" supported).
349
+
350
+ Returns:
351
+ Overlaid image as numpy array (H, W, 3) in [0, 1] range.
352
+ """
353
+ image = np.array(image)
354
+ heatmap = np.array(heatmap)
355
+
356
+ # Normalize image to [0, 1]
357
+ if image.max() > 1:
358
+ image = image / 255.0
359
+
360
+ # Handle channel-first format
361
+ if image.ndim == 3 and image.shape[0] in [1, 3]:
362
+ image = np.transpose(image, (1, 2, 0))
363
+
364
+ # Simple jet colormap approximation
365
+ def jet_colormap(x):
366
+ """Simple jet colormap: blue -> cyan -> green -> yellow -> red"""
367
+ r = np.clip(1.5 - np.abs(4 * x - 3), 0, 1)
368
+ g = np.clip(1.5 - np.abs(4 * x - 2), 0, 1)
369
+ b = np.clip(1.5 - np.abs(4 * x - 1), 0, 1)
370
+ return np.stack([r, g, b], axis=-1)
371
+
372
+ # Apply colormap to heatmap
373
+ colored_heatmap = jet_colormap(heatmap)
374
+
375
+ # Ensure same size
376
+ if colored_heatmap.shape[:2] != image.shape[:2]:
377
+ colored_heatmap = self._resize_heatmap(
378
+ colored_heatmap.mean(axis=-1),
379
+ image.shape[:2]
380
+ )
381
+ colored_heatmap = jet_colormap(colored_heatmap)
382
+
383
+ # Blend
384
+ if image.ndim == 2:
385
+ image = np.stack([image] * 3, axis=-1)
386
+
387
+ overlay = (1 - alpha) * image + alpha * colored_heatmap
388
+ overlay = np.clip(overlay, 0, 1)
389
+
390
+ return overlay
@@ -0,0 +1,348 @@
1
+ # src/explainiverse/explainers/gradient/integrated_gradients.py
2
+ """
3
+ Integrated Gradients - Axiomatic Attribution for Deep Networks.
4
+
5
+ Integrated Gradients computes feature attributions by accumulating gradients
6
+ along a straight-line path from a baseline to the input. It satisfies two
7
+ key axioms:
8
+ - Sensitivity: If a feature differs between input and baseline and changes
9
+ the prediction, it receives non-zero attribution.
10
+ - Implementation Invariance: Attributions are identical for functionally
11
+ equivalent networks.
12
+
13
+ Reference:
14
+ Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic Attribution for
15
+ Deep Networks. ICML 2017. https://arxiv.org/abs/1703.01365
16
+
17
+ Example:
18
+ from explainiverse.explainers.gradient import IntegratedGradientsExplainer
19
+ from explainiverse.adapters import PyTorchAdapter
20
+
21
+ adapter = PyTorchAdapter(model, task="classification")
22
+
23
+ explainer = IntegratedGradientsExplainer(
24
+ model=adapter,
25
+ feature_names=feature_names,
26
+ n_steps=50
27
+ )
28
+
29
+ explanation = explainer.explain(instance)
30
+ """
31
+
32
+ import numpy as np
33
+ from typing import List, Optional, Union, Callable
34
+
35
+ from explainiverse.core.explainer import BaseExplainer
36
+ from explainiverse.core.explanation import Explanation
37
+
38
+
39
+ class IntegratedGradientsExplainer(BaseExplainer):
40
+ """
41
+ Integrated Gradients explainer for neural networks.
42
+
43
+ Computes attributions by integrating gradients along the path from
44
+ a baseline (default: zero vector) to the input. The integral is
45
+ approximated using the Riemann sum.
46
+
47
+ Attributes:
48
+ model: Model adapter with predict_with_gradients() method
49
+ feature_names: List of feature names
50
+ class_names: List of class names (for classification)
51
+ n_steps: Number of steps for integral approximation
52
+ baseline: Baseline input (default: zeros)
53
+ method: Integration method ("riemann_middle", "riemann_left", "riemann_right", "riemann_trapezoid")
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ model,
59
+ feature_names: List[str],
60
+ class_names: Optional[List[str]] = None,
61
+ n_steps: int = 50,
62
+ baseline: Optional[np.ndarray] = None,
63
+ method: str = "riemann_middle"
64
+ ):
65
+ """
66
+ Initialize the Integrated Gradients explainer.
67
+
68
+ Args:
69
+ model: A model adapter with predict_with_gradients() method.
70
+ Use PyTorchAdapter for PyTorch models.
71
+ feature_names: List of input feature names.
72
+ class_names: List of class names (for classification tasks).
73
+ n_steps: Number of steps for approximating the integral.
74
+ More steps = more accurate but slower. Default: 50.
75
+ baseline: Baseline input for comparison. If None, uses zeros.
76
+ Can also be "random" for random baseline or a callable.
77
+ method: Integration method:
78
+ - "riemann_middle": Middle Riemann sum (default, most accurate)
79
+ - "riemann_left": Left Riemann sum
80
+ - "riemann_right": Right Riemann sum
81
+ - "riemann_trapezoid": Trapezoidal rule
82
+ """
83
+ super().__init__(model)
84
+
85
+ # Validate model has gradient capability
86
+ if not hasattr(model, 'predict_with_gradients'):
87
+ raise TypeError(
88
+ "Model adapter must have predict_with_gradients() method. "
89
+ "Use PyTorchAdapter for PyTorch models."
90
+ )
91
+
92
+ self.feature_names = list(feature_names)
93
+ self.class_names = list(class_names) if class_names else None
94
+ self.n_steps = n_steps
95
+ self.baseline = baseline
96
+ self.method = method
97
+
98
+ def _get_baseline(self, instance: np.ndarray) -> np.ndarray:
99
+ """Get the baseline for a given input shape."""
100
+ if self.baseline is None:
101
+ # Default: zero baseline
102
+ return np.zeros_like(instance)
103
+ elif isinstance(self.baseline, str) and self.baseline == "random":
104
+ # Random baseline (useful for images)
105
+ return np.random.uniform(
106
+ low=instance.min(),
107
+ high=instance.max(),
108
+ size=instance.shape
109
+ ).astype(instance.dtype)
110
+ elif callable(self.baseline):
111
+ return self.baseline(instance)
112
+ else:
113
+ return np.array(self.baseline).reshape(instance.shape)
114
+
115
+ def _get_interpolation_alphas(self) -> np.ndarray:
116
+ """Get interpolation points based on method."""
117
+ if self.method == "riemann_left":
118
+ return np.linspace(0, 1 - 1/self.n_steps, self.n_steps)
119
+ elif self.method == "riemann_right":
120
+ return np.linspace(1/self.n_steps, 1, self.n_steps)
121
+ elif self.method == "riemann_middle":
122
+ return np.linspace(0.5/self.n_steps, 1 - 0.5/self.n_steps, self.n_steps)
123
+ elif self.method == "riemann_trapezoid":
124
+ return np.linspace(0, 1, self.n_steps + 1)
125
+ else:
126
+ raise ValueError(f"Unknown method: {self.method}")
127
+
128
+ def _compute_integrated_gradients(
129
+ self,
130
+ instance: np.ndarray,
131
+ baseline: np.ndarray,
132
+ target_class: Optional[int] = None
133
+ ) -> np.ndarray:
134
+ """
135
+ Compute integrated gradients for a single instance.
136
+
137
+ The integral is approximated as:
138
+ IG_i = (x_i - x'_i) * sum_{k=1}^{m} grad_i(x' + k/m * (x - x')) / m
139
+
140
+ where x is the input, x' is the baseline, and m is n_steps.
141
+ """
142
+ # Get interpolation points
143
+ alphas = self._get_interpolation_alphas()
144
+
145
+ # Compute path from baseline to input
146
+ # Shape: (n_steps, n_features)
147
+ delta = instance - baseline
148
+ interpolated_inputs = baseline + alphas[:, np.newaxis] * delta
149
+
150
+ # Compute gradients at each interpolation point
151
+ all_gradients = []
152
+ for interp_input in interpolated_inputs:
153
+ _, gradients = self.model.predict_with_gradients(
154
+ interp_input.reshape(1, -1),
155
+ target_class=target_class
156
+ )
157
+ all_gradients.append(gradients.flatten())
158
+
159
+ all_gradients = np.array(all_gradients) # Shape: (n_steps, n_features)
160
+
161
+ # Approximate the integral
162
+ if self.method == "riemann_trapezoid":
163
+ # Trapezoidal rule: (f(0) + 2*f(1) + ... + 2*f(n-1) + f(n)) / (2n)
164
+ weights = np.ones(self.n_steps + 1)
165
+ weights[0] = 0.5
166
+ weights[-1] = 0.5
167
+ avg_gradients = np.average(all_gradients, axis=0, weights=weights)
168
+ else:
169
+ # Standard Riemann sum: average of gradients
170
+ avg_gradients = np.mean(all_gradients, axis=0)
171
+
172
+ # Scale by input - baseline difference
173
+ integrated_gradients = delta * avg_gradients
174
+
175
+ return integrated_gradients
176
+
177
+ def explain(
178
+ self,
179
+ instance: np.ndarray,
180
+ target_class: Optional[int] = None,
181
+ baseline: Optional[np.ndarray] = None,
182
+ return_convergence_delta: bool = False
183
+ ) -> Explanation:
184
+ """
185
+ Generate Integrated Gradients explanation for an instance.
186
+
187
+ Args:
188
+ instance: 1D numpy array of input features.
189
+ target_class: For classification, which class to explain.
190
+ If None, uses the predicted class.
191
+ baseline: Override the default baseline for this explanation.
192
+ return_convergence_delta: If True, include the convergence delta
193
+ (difference between sum of attributions
194
+ and prediction difference).
195
+
196
+ Returns:
197
+ Explanation object with feature attributions.
198
+ """
199
+ instance = np.array(instance).flatten().astype(np.float32)
200
+
201
+ # Get baseline
202
+ if baseline is not None:
203
+ bl = np.array(baseline).flatten().astype(np.float32)
204
+ else:
205
+ bl = self._get_baseline(instance)
206
+
207
+ # Determine target class if not specified
208
+ if target_class is None and self.class_names:
209
+ predictions = self.model.predict(instance.reshape(1, -1))
210
+ target_class = int(np.argmax(predictions))
211
+
212
+ # Compute integrated gradients
213
+ ig_attributions = self._compute_integrated_gradients(
214
+ instance, bl, target_class
215
+ )
216
+
217
+ # Build attributions dict
218
+ attributions = {
219
+ fname: float(ig_attributions[i])
220
+ for i, fname in enumerate(self.feature_names)
221
+ }
222
+
223
+ # Determine class name
224
+ if self.class_names and target_class is not None:
225
+ label_name = self.class_names[target_class]
226
+ else:
227
+ label_name = f"class_{target_class}" if target_class is not None else "output"
228
+
229
+ explanation_data = {
230
+ "feature_attributions": attributions,
231
+ "attributions_raw": ig_attributions.tolist(),
232
+ "baseline": bl.tolist(),
233
+ "n_steps": self.n_steps,
234
+ "method": self.method
235
+ }
236
+
237
+ # Optionally compute convergence delta
238
+ if return_convergence_delta:
239
+ # The sum of attributions should equal F(x) - F(baseline)
240
+ pred_input = self.model.predict(instance.reshape(1, -1))
241
+ pred_baseline = self.model.predict(bl.reshape(1, -1))
242
+
243
+ if target_class is not None:
244
+ pred_diff = pred_input[0, target_class] - pred_baseline[0, target_class]
245
+ else:
246
+ pred_diff = pred_input[0, 0] - pred_baseline[0, 0]
247
+
248
+ attribution_sum = np.sum(ig_attributions)
249
+ convergence_delta = abs(pred_diff - attribution_sum)
250
+
251
+ explanation_data["convergence_delta"] = float(convergence_delta)
252
+ explanation_data["prediction_difference"] = float(pred_diff)
253
+ explanation_data["attribution_sum"] = float(attribution_sum)
254
+
255
+ return Explanation(
256
+ explainer_name="IntegratedGradients",
257
+ target_class=label_name,
258
+ explanation_data=explanation_data
259
+ )
260
+
261
+ def explain_batch(
262
+ self,
263
+ X: np.ndarray,
264
+ target_class: Optional[int] = None
265
+ ) -> List[Explanation]:
266
+ """
267
+ Generate explanations for multiple instances.
268
+
269
+ Note: This is not optimized for batching - it processes
270
+ instances sequentially. For large batches, consider using
271
+ the batched gradient computation in a custom implementation.
272
+
273
+ Args:
274
+ X: 2D numpy array of instances (n_samples, n_features).
275
+ target_class: Target class for all instances.
276
+
277
+ Returns:
278
+ List of Explanation objects.
279
+ """
280
+ X = np.array(X)
281
+ if X.ndim == 1:
282
+ X = X.reshape(1, -1)
283
+
284
+ return [
285
+ self.explain(X[i], target_class=target_class)
286
+ for i in range(X.shape[0])
287
+ ]
288
+
289
+ def compute_attributions_with_noise(
290
+ self,
291
+ instance: np.ndarray,
292
+ target_class: Optional[int] = None,
293
+ n_samples: int = 5,
294
+ noise_scale: float = 0.1
295
+ ) -> Explanation:
296
+ """
297
+ Compute attributions averaged over noisy baselines (SmoothGrad-style).
298
+
299
+ This can help reduce noise in the attributions by averaging over
300
+ multiple baselines sampled around the zero baseline.
301
+
302
+ Args:
303
+ instance: Input instance.
304
+ target_class: Target class for attribution.
305
+ n_samples: Number of noisy baselines to average.
306
+ noise_scale: Standard deviation of Gaussian noise.
307
+
308
+ Returns:
309
+ Explanation with averaged attributions.
310
+ """
311
+ instance = np.array(instance).flatten().astype(np.float32)
312
+
313
+ all_attributions = []
314
+ for _ in range(n_samples):
315
+ # Create noisy baseline
316
+ noise = np.random.normal(0, noise_scale, instance.shape).astype(np.float32)
317
+ noisy_baseline = noise # Noise around zero
318
+
319
+ ig = self._compute_integrated_gradients(
320
+ instance, noisy_baseline, target_class
321
+ )
322
+ all_attributions.append(ig)
323
+
324
+ # Average attributions
325
+ avg_attributions = np.mean(all_attributions, axis=0)
326
+ std_attributions = np.std(all_attributions, axis=0)
327
+
328
+ attributions = {
329
+ fname: float(avg_attributions[i])
330
+ for i, fname in enumerate(self.feature_names)
331
+ }
332
+
333
+ if self.class_names and target_class is not None:
334
+ label_name = self.class_names[target_class]
335
+ else:
336
+ label_name = f"class_{target_class}" if target_class is not None else "output"
337
+
338
+ return Explanation(
339
+ explainer_name="IntegratedGradients_Smooth",
340
+ target_class=label_name,
341
+ explanation_data={
342
+ "feature_attributions": attributions,
343
+ "attributions_raw": avg_attributions.tolist(),
344
+ "attributions_std": std_attributions.tolist(),
345
+ "n_samples": n_samples,
346
+ "noise_scale": noise_scale
347
+ }
348
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -31,7 +31,7 @@ Description-Content-Type: text/markdown
31
31
  # Explainiverse
32
32
 
33
33
  **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI).
34
- It provides a standardized interface for model-agnostic explainability with 9 state-of-the-art XAI methods, evaluation metrics, and a plugin registry for easy extensibility.
34
+ It provides a standardized interface for model-agnostic explainability with 11 state-of-the-art XAI methods, evaluation metrics, and a plugin registry for easy extensibility.
35
35
 
36
36
  ---
37
37
 
@@ -43,6 +43,8 @@ It provides a standardized interface for model-agnostic explainability with 9 st
43
43
  - **LIME** - Local Interpretable Model-agnostic Explanations ([Ribeiro et al., 2016](https://arxiv.org/abs/1602.04938))
44
44
  - **SHAP** - SHapley Additive exPlanations via KernelSHAP ([Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874))
45
45
  - **TreeSHAP** - Exact SHAP values for tree models, 10x+ faster ([Lundberg et al., 2018](https://arxiv.org/abs/1802.03888))
46
+ - **Integrated Gradients** - Axiomatic attributions for neural networks ([Sundararajan et al., 2017](https://arxiv.org/abs/1703.01365))
47
+ - **GradCAM/GradCAM++** - Visual explanations for CNNs ([Selvaraju et al., 2017](https://arxiv.org/abs/1610.02391))
46
48
  - **Anchors** - High-precision rule-based explanations ([Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491))
47
49
  - **Counterfactual** - DiCE-style diverse counterfactual explanations ([Mothilal et al., 2020](https://arxiv.org/abs/1905.07697))
48
50
 
@@ -109,7 +111,7 @@ adapter = SklearnAdapter(model, class_names=iris.target_names.tolist())
109
111
 
110
112
  # List available explainers
111
113
  print(default_registry.list_explainers())
112
- # ['lime', 'shap', 'treeshap', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
114
+ # ['lime', 'shap', 'treeshap', 'integrated_gradients', 'gradcam', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
113
115
 
114
116
  # Create and use an explainer
115
117
  explainer = default_registry.create(
@@ -128,11 +130,11 @@ print(explanation.explanation_data["feature_attributions"])
128
130
  ```python
129
131
  # Find local explainers for tabular data
130
132
  local_tabular = default_registry.filter(scope="local", data_type="tabular")
131
- print(local_tabular) # ['lime', 'shap', 'treeshap', 'anchors', 'counterfactual']
133
+ print(local_tabular) # ['lime', 'shap', 'treeshap', 'integrated_gradients', 'anchors', 'counterfactual']
132
134
 
133
- # Find explainers optimized for tree models
134
- tree_explainers = default_registry.filter(model_type="tree")
135
- print(tree_explainers) # ['treeshap']
135
+ # Find explainers for images/CNNs
136
+ image_explainers = default_registry.filter(data_type="image")
137
+ print(image_explainers) # ['lime', 'integrated_gradients', 'gradcam']
136
138
 
137
139
  # Get recommendations
138
140
  recommendations = default_registry.recommend(
@@ -200,6 +202,61 @@ predictions, gradients = adapter.predict_with_gradients(X)
200
202
  activations = adapter.get_layer_output(X, layer_name="0")
201
203
  ```
202
204
 
205
+ ### Integrated Gradients for Neural Networks
206
+
207
+ ```python
208
+ from explainiverse.explainers import IntegratedGradientsExplainer
209
+ from explainiverse import PyTorchAdapter
210
+
211
+ # Wrap your PyTorch model
212
+ adapter = PyTorchAdapter(model, task="classification", class_names=class_names)
213
+
214
+ # Create IG explainer
215
+ explainer = IntegratedGradientsExplainer(
216
+ model=adapter,
217
+ feature_names=feature_names,
218
+ class_names=class_names,
219
+ n_steps=50 # More steps = more accurate
220
+ )
221
+
222
+ # Explain a prediction
223
+ explanation = explainer.explain(X_test[0])
224
+ print(explanation.explanation_data["feature_attributions"])
225
+
226
+ # Check convergence (sum of attributions ≈ F(x) - F(baseline))
227
+ explanation = explainer.explain(X_test[0], return_convergence_delta=True)
228
+ print(f"Convergence delta: {explanation.explanation_data['convergence_delta']}")
229
+ ```
230
+
231
+ ### GradCAM for CNN Visual Explanations
232
+
233
+ ```python
234
+ from explainiverse.explainers import GradCAMExplainer
235
+ from explainiverse import PyTorchAdapter
236
+
237
+ # Wrap your CNN model
238
+ adapter = PyTorchAdapter(cnn_model, task="classification", class_names=class_names)
239
+
240
+ # Find the last convolutional layer
241
+ layers = adapter.list_layers()
242
+ target_layer = "layer4" # Adjust based on your model architecture
243
+
244
+ # Create GradCAM explainer
245
+ explainer = GradCAMExplainer(
246
+ model=adapter,
247
+ target_layer=target_layer,
248
+ class_names=class_names,
249
+ method="gradcam" # or "gradcam++" for improved version
250
+ )
251
+
252
+ # Explain an image prediction
253
+ explanation = explainer.explain(image) # image shape: (C, H, W) or (N, C, H, W)
254
+ heatmap = explanation.explanation_data["heatmap"]
255
+
256
+ # Create overlay visualization
257
+ overlay = explainer.get_overlay(original_image, heatmap, alpha=0.5)
258
+ ```
259
+
203
260
  ### Using Specific Explainers
204
261
 
205
262
  ```python
@@ -300,13 +357,13 @@ poetry run pytest tests/test_new_explainers.py -v
300
357
  ## Roadmap
301
358
 
302
359
  - [x] LIME, SHAP (KernelSHAP)
303
- - [x] TreeSHAP (optimized for tree models) ✅ NEW
360
+ - [x] TreeSHAP (optimized for tree models) ✅
304
361
  - [x] Anchors, Counterfactuals
305
362
  - [x] Permutation Importance, PDP, ALE, SAGE
306
363
  - [x] Explainer Registry with filtering
307
- - [x] PyTorch Adapter ✅ NEW
308
- - [ ] Integrated Gradients (gradient-based for neural nets)
309
- - [ ] GradCAM for CNNs
364
+ - [x] PyTorch Adapter ✅
365
+ - [x] Integrated Gradients
366
+ - [x] GradCAM/GradCAM++ for CNNs ✅ NEW
310
367
  - [ ] TensorFlow adapter
311
368
  - [ ] Interactive visualization dashboard
312
369
 
@@ -1,4 +1,4 @@
1
- explainiverse/__init__.py,sha256=-4H6WbfGwpeoNpO9w0CEahKQBPsvIYe_lK5e10cZWD0,1612
1
+ explainiverse/__init__.py,sha256=EEo8Stx-Lau5WZLQ5wqA4ESGY9HA_j-dqnpyp9MgV90,1612
2
2
  explainiverse/adapters/__init__.py,sha256=HcQGISyp-YQ4jEj2IYveX_c9X5otLcTNWRnVRRhzRik,781
3
3
  explainiverse/adapters/base_adapter.py,sha256=Nqt0GeDn_-PjTyJcZsE8dRTulavqFQsv8sMYWS_ps-M,603
4
4
  explainiverse/adapters/pytorch_adapter.py,sha256=GTilJAR1VF_OgWG88qZoqlqefHaSXB3i9iOwCJkyHTg,13318
@@ -6,12 +6,12 @@ explainiverse/adapters/sklearn_adapter.py,sha256=pzIBtMuqrG-6ZbUqUCMt7rSk3Ow0Fgr
6
6
  explainiverse/core/__init__.py,sha256=P3jHMnH5coFqTTO1w-gT-rurkCM1-9r3pF-055pbXMg,474
7
7
  explainiverse/core/explainer.py,sha256=Z9on-9VblYDlQx9oBm1BHpmAf_NsQajZ3qr-u48Aejo,784
8
8
  explainiverse/core/explanation.py,sha256=6zxFh_TH8tFHc-r_H5-WHQ05Sp1Kp2TxLz3gyFek5jo,881
9
- explainiverse/core/registry.py,sha256=_BXWi1fJY3cGjYA1Xn1DwvY91jbpJrpX6_8EVzrRT20,19876
9
+ explainiverse/core/registry.py,sha256=BjyPhcxWC8zaiRflDF3JCaUMPsBmHHDNfwM13bTWxjE,21476
10
10
  explainiverse/engine/__init__.py,sha256=1sZO8nH1mmwK2e-KUavBQm7zYDWUe27nyWoFy9tgsiA,197
11
11
  explainiverse/engine/suite.py,sha256=sq8SK_6Pf0qRckTmVJ7Mdosu9bhkjAGPGN8ymLGFP9E,4914
12
12
  explainiverse/evaluation/__init__.py,sha256=Y50L_b4HKthg4epwcayPHXh0l4i4MUuzvaNlqPmUNZY,212
13
13
  explainiverse/evaluation/metrics.py,sha256=tSBXtyA_-0zOGCGjlPZU6LdGKRH_QpWfgKa78sdlovs,7453
14
- explainiverse/explainers/__init__.py,sha256=Op-Z_BTJ7BdqA_9gTnruomN2-rKtrkPCt1Zq1iCzxr0,1758
14
+ explainiverse/explainers/__init__.py,sha256=epXDNoYIAxW0KNtGBCkjS28FmDSBYry59HdTY9vJXCs,2057
15
15
  explainiverse/explainers/attribution/__init__.py,sha256=YeVs9bS_IWDtqGbp6T37V6Zp5ZDWzLdAXHxxyFGpiQM,431
16
16
  explainiverse/explainers/attribution/lime_wrapper.py,sha256=OnXIV7t6yd-vt38sIi7XmHFbgzlZfCEbRlFyGGd5XiE,3245
17
17
  explainiverse/explainers/attribution/shap_wrapper.py,sha256=tKie5AvN7mb55PWOYdMvW0lUAYjfHPzYosEloEY2ZzI,3210
@@ -23,9 +23,12 @@ explainiverse/explainers/global_explainers/ale.py,sha256=tgG3XTppCf8LiD7uKzBt4DI
23
23
  explainiverse/explainers/global_explainers/partial_dependence.py,sha256=dH6yMjpwZads3pACR3rSykTbssLGHH7e6HfMlpl-S3I,6745
24
24
  explainiverse/explainers/global_explainers/permutation_importance.py,sha256=bcgKz1S_D3lrBMgpqEF_Z6qw8Knxl_cfR50hrSO2tBc,4410
25
25
  explainiverse/explainers/global_explainers/sage.py,sha256=57Xw1SK529x5JXWt0TVrcFYUUP3C65LfUwgoM-Z3gaw,5839
26
+ explainiverse/explainers/gradient/__init__.py,sha256=lVPiSGV_swSwV8k7Z4c6XETwDdTRO09D6bv8TSMsNd8,441
27
+ explainiverse/explainers/gradient/gradcam.py,sha256=ywW_8PhALwegkpSUDQMFvvVFkA5NnMMW6BB5tb3i8bw,13721
28
+ explainiverse/explainers/gradient/integrated_gradients.py,sha256=feBgY3Vw2rDti7fxRZtLkxse75m2dbP_R05ARqo2BRM,13367
26
29
  explainiverse/explainers/rule_based/__init__.py,sha256=gKzlFCAzwurAMLJcuYgal4XhDj1thteBGcaHWmN7iWk,243
27
30
  explainiverse/explainers/rule_based/anchors_wrapper.py,sha256=ML7W6aam-eMGZHy5ilol8qupZvNBJpYAFatEEPnuMyo,13254
28
- explainiverse-0.2.2.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
29
- explainiverse-0.2.2.dist-info/METADATA,sha256=kis3ejJCLRhBJWf5p13FzY2ZeSbnWfJxk6LS1hd7A1w,9497
30
- explainiverse-0.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
- explainiverse-0.2.2.dist-info/RECORD,,
31
+ explainiverse-0.2.4.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
32
+ explainiverse-0.2.4.dist-info/METADATA,sha256=5LUzE3WCwdp0QRyonWTkRyHnFmPjGcSlnzbs011f8ZA,11483
33
+ explainiverse-0.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
34
+ explainiverse-0.2.4.dist-info/RECORD,,