explainiverse 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
explainiverse/__init__.py CHANGED
@@ -33,7 +33,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
33
33
  from explainiverse.adapters import TORCH_AVAILABLE
34
34
  from explainiverse.engine.suite import ExplanationSuite
35
35
 
36
- __version__ = "0.4.0"
36
+ __version__ = "0.5.0"
37
37
 
38
38
  __all__ = [
39
39
  # Core
@@ -372,6 +372,7 @@ def _create_default_registry() -> ExplainerRegistry:
372
372
  from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
373
373
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
374
374
  from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
375
+ from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
375
376
  from explainiverse.explainers.example_based.protodash import ProtoDashExplainer
376
377
 
377
378
  registry = ExplainerRegistry()
@@ -533,6 +534,23 @@ def _create_default_registry() -> ExplainerRegistry:
533
534
  )
534
535
  )
535
536
 
537
+ # Register SmoothGrad (for neural networks)
538
+ registry.register(
539
+ name="smoothgrad",
540
+ explainer_class=SmoothGradExplainer,
541
+ meta=ExplainerMeta(
542
+ scope="local",
543
+ model_types=["neural"],
544
+ data_types=["tabular", "image"],
545
+ task_types=["classification", "regression"],
546
+ description="SmoothGrad - noise-averaged gradients for smoother saliency maps (requires PyTorch)",
547
+ paper_reference="Smilkov et al., 2017 - 'SmoothGrad: removing noise by adding noise' (ICML Workshop)",
548
+ complexity="O(n_samples * forward_pass)",
549
+ requires_training_data=False,
550
+ supports_batching=True
551
+ )
552
+ )
553
+
536
554
  # =========================================================================
537
555
  # Global Explainers (model-level)
538
556
  # =========================================================================
@@ -9,10 +9,12 @@ typically via the PyTorchAdapter.
9
9
  from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
10
10
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
11
11
  from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
12
+ from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
12
13
 
13
14
  __all__ = [
14
15
  "IntegratedGradientsExplainer",
15
16
  "GradCAMExplainer",
16
17
  "DeepLIFTExplainer",
17
18
  "DeepLIFTShapExplainer",
19
+ "SmoothGradExplainer",
18
20
  ]
@@ -0,0 +1,424 @@
1
+ # src/explainiverse/explainers/gradient/smoothgrad.py
2
+ """
3
+ SmoothGrad - Removing Noise by Adding Noise.
4
+
5
+ SmoothGrad reduces noise in gradient-based saliency maps by averaging
6
+ gradients computed on noisy copies of the input. This produces smoother,
7
+ more visually coherent attributions that are often easier to interpret.
8
+
9
+ Key Properties:
10
+ - Simple: Just averages gradients over noisy inputs
11
+ - Effective: Significantly reduces noise in saliency maps
12
+ - Flexible: Works with any gradient-based method
13
+ - Fast: Only requires multiple forward/backward passes (parallelizable)
14
+
15
+ Variants:
16
+ - SmoothGrad: Average of gradients
17
+ - SmoothGrad-Squared: Average of squared gradients (sharper)
18
+ - VarGrad: Variance of gradients (uncertainty quantification)
19
+
20
+ Reference:
21
+ Smilkov, D., Thorat, N., Kim, B., Viégas, F., & Wattenberg, M. (2017).
22
+ SmoothGrad: removing noise by adding noise.
23
+ ICML Workshop on Visualization for Deep Learning.
24
+ https://arxiv.org/abs/1706.03825
25
+
26
+ Example:
27
+ from explainiverse.explainers.gradient import SmoothGradExplainer
28
+ from explainiverse.adapters import PyTorchAdapter
29
+
30
+ adapter = PyTorchAdapter(model, task="classification")
31
+
32
+ explainer = SmoothGradExplainer(
33
+ model=adapter,
34
+ feature_names=feature_names,
35
+ n_samples=50,
36
+ noise_scale=0.15
37
+ )
38
+
39
+ explanation = explainer.explain(instance)
40
+ """
41
+
42
+ import numpy as np
43
+ from typing import List, Optional
44
+
45
+ from explainiverse.core.explainer import BaseExplainer
46
+ from explainiverse.core.explanation import Explanation
47
+
48
+
49
+ class SmoothGradExplainer(BaseExplainer):
50
+ """
51
+ SmoothGrad explainer for neural networks.
52
+
53
+ Computes attributions by averaging gradients over noisy copies of the
54
+ input. The noise helps smooth out local fluctuations in the gradient
55
+ landscape, producing more interpretable saliency maps.
56
+
57
+ Algorithm:
58
+ SmoothGrad(x) = (1/n) * Σ_{i=1}^{n} ∂f(x + ε_i)/∂x
59
+ where ε_i ~ N(0, σ²I) or U(-σ, σ)
60
+
61
+ Attributes:
62
+ model: Model adapter with predict_with_gradients() method
63
+ feature_names: List of feature names
64
+ class_names: List of class names (for classification)
65
+ n_samples: Number of noisy samples to average
66
+ noise_scale: Standard deviation (Gaussian) or half-range (Uniform)
67
+ noise_type: Type of noise distribution ("gaussian" or "uniform")
68
+
69
+ Example:
70
+ >>> explainer = SmoothGradExplainer(adapter, feature_names, n_samples=50)
71
+ >>> explanation = explainer.explain(instance)
72
+ >>> print(explanation.explanation_data["feature_attributions"])
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ model,
78
+ feature_names: List[str],
79
+ class_names: Optional[List[str]] = None,
80
+ n_samples: int = 50,
81
+ noise_scale: float = 0.15,
82
+ noise_type: str = "gaussian"
83
+ ):
84
+ """
85
+ Initialize the SmoothGrad explainer.
86
+
87
+ Args:
88
+ model: A model adapter with predict_with_gradients() method.
89
+ Use PyTorchAdapter for PyTorch models.
90
+ feature_names: List of input feature names.
91
+ class_names: List of class names (for classification tasks).
92
+ n_samples: Number of noisy samples to average. More samples
93
+ reduce variance but increase computation. Default: 50.
94
+ noise_scale: Scale of the noise to add:
95
+ - For "gaussian": standard deviation (default: 0.15)
96
+ - For "uniform": half-range, noise in [-scale, scale]
97
+ Typically set to 10-20% of the input range.
98
+ noise_type: Type of noise distribution:
99
+ - "gaussian": Normal distribution N(0, σ²) (default)
100
+ - "uniform": Uniform distribution U(-σ, σ)
101
+
102
+ Raises:
103
+ TypeError: If model doesn't have predict_with_gradients method.
104
+ ValueError: If n_samples < 1, noise_scale < 0, or invalid noise_type.
105
+ """
106
+ super().__init__(model)
107
+
108
+ # Validate model has gradient capability
109
+ if not hasattr(model, 'predict_with_gradients'):
110
+ raise TypeError(
111
+ "Model adapter must have predict_with_gradients() method. "
112
+ "Use PyTorchAdapter for PyTorch models."
113
+ )
114
+
115
+ # Validate parameters
116
+ if n_samples < 1:
117
+ raise ValueError(f"n_samples must be >= 1, got {n_samples}")
118
+
119
+ if noise_scale < 0:
120
+ raise ValueError(f"noise_scale must be >= 0, got {noise_scale}")
121
+
122
+ if noise_type not in ["gaussian", "uniform"]:
123
+ raise ValueError(
124
+ f"noise_type must be 'gaussian' or 'uniform', got '{noise_type}'"
125
+ )
126
+
127
+ self.feature_names = list(feature_names)
128
+ self.class_names = list(class_names) if class_names else None
129
+ self.n_samples = n_samples
130
+ self.noise_scale = noise_scale
131
+ self.noise_type = noise_type
132
+
133
+ def _generate_noise(self, shape: tuple) -> np.ndarray:
134
+ """
135
+ Generate noise samples based on the configured noise type.
136
+
137
+ Args:
138
+ shape: Shape of the noise array to generate.
139
+
140
+ Returns:
141
+ Numpy array of noise samples.
142
+ """
143
+ if self.noise_type == "gaussian":
144
+ return np.random.normal(0, self.noise_scale, shape).astype(np.float32)
145
+ else: # uniform
146
+ return np.random.uniform(
147
+ -self.noise_scale,
148
+ self.noise_scale,
149
+ shape
150
+ ).astype(np.float32)
151
+
152
+ def _compute_smoothgrad(
153
+ self,
154
+ instance: np.ndarray,
155
+ target_class: Optional[int] = None,
156
+ method: str = "smoothgrad",
157
+ absolute_value: bool = False
158
+ ) -> tuple:
159
+ """
160
+ Compute SmoothGrad attributions for a single instance.
161
+
162
+ Args:
163
+ instance: Input instance (1D array).
164
+ target_class: Target class for gradient computation.
165
+ method: Aggregation method:
166
+ - "smoothgrad": Average of gradients (default)
167
+ - "smoothgrad_squared": Average of squared gradients
168
+ - "vargrad": Variance of gradients
169
+ absolute_value: If True, take absolute value of final attributions.
170
+
171
+ Returns:
172
+ Tuple of (attributions, std_attributions) arrays.
173
+ """
174
+ instance = instance.flatten().astype(np.float32)
175
+
176
+ # Collect gradients for all noisy samples
177
+ all_gradients = []
178
+
179
+ for _ in range(self.n_samples):
180
+ # Add noise to input
181
+ if self.noise_scale > 0:
182
+ noise = self._generate_noise(instance.shape)
183
+ noisy_input = instance + noise
184
+ else:
185
+ noisy_input = instance.copy()
186
+
187
+ # Compute gradient
188
+ _, gradients = self.model.predict_with_gradients(
189
+ noisy_input.reshape(1, -1),
190
+ target_class=target_class
191
+ )
192
+ all_gradients.append(gradients.flatten())
193
+
194
+ all_gradients = np.array(all_gradients) # Shape: (n_samples, n_features)
195
+
196
+ # Compute attributions based on method
197
+ if method == "smoothgrad":
198
+ attributions = np.mean(all_gradients, axis=0)
199
+ std_attributions = np.std(all_gradients, axis=0)
200
+ elif method == "smoothgrad_squared":
201
+ # Average of squared gradients
202
+ squared_gradients = all_gradients ** 2
203
+ attributions = np.mean(squared_gradients, axis=0)
204
+ std_attributions = np.std(squared_gradients, axis=0)
205
+ elif method == "vargrad":
206
+ # Variance of gradients
207
+ attributions = np.var(all_gradients, axis=0)
208
+ std_attributions = np.zeros_like(attributions) # No std for variance
209
+ else:
210
+ raise ValueError(
211
+ f"Unknown method: '{method}'. "
212
+ f"Use 'smoothgrad', 'smoothgrad_squared', or 'vargrad'."
213
+ )
214
+
215
+ # Apply absolute value if requested
216
+ if absolute_value:
217
+ attributions = np.abs(attributions)
218
+
219
+ return attributions, std_attributions
220
+
221
+ def explain(
222
+ self,
223
+ instance: np.ndarray,
224
+ target_class: Optional[int] = None,
225
+ method: str = "smoothgrad",
226
+ absolute_value: bool = False
227
+ ) -> Explanation:
228
+ """
229
+ Generate SmoothGrad explanation for an instance.
230
+
231
+ Args:
232
+ instance: 1D numpy array of input features.
233
+ target_class: For classification, which class to explain.
234
+ If None, uses the predicted class.
235
+ method: Aggregation method:
236
+ - "smoothgrad": Average of gradients (default)
237
+ - "smoothgrad_squared": Average of squared gradients (sharper)
238
+ - "vargrad": Variance of gradients (uncertainty)
239
+ absolute_value: If True, return absolute values of attributions.
240
+ Useful for feature importance without direction.
241
+
242
+ Returns:
243
+ Explanation object with feature attributions.
244
+
245
+ Example:
246
+ >>> explanation = explainer.explain(instance)
247
+ >>> print(explanation.explanation_data["feature_attributions"])
248
+ """
249
+ instance = np.array(instance).flatten().astype(np.float32)
250
+
251
+ # Determine target class if not specified
252
+ if target_class is None and self.class_names:
253
+ predictions = self.model.predict(instance.reshape(1, -1))
254
+ target_class = int(np.argmax(predictions))
255
+
256
+ # Compute SmoothGrad
257
+ attributions, std_attributions = self._compute_smoothgrad(
258
+ instance, target_class, method, absolute_value
259
+ )
260
+
261
+ # Build attributions dict
262
+ attributions_dict = {
263
+ fname: float(attributions[i])
264
+ for i, fname in enumerate(self.feature_names)
265
+ }
266
+
267
+ # Determine explainer name based on method
268
+ if method == "smoothgrad":
269
+ explainer_name = "SmoothGrad"
270
+ elif method == "smoothgrad_squared":
271
+ explainer_name = "SmoothGrad_Squared"
272
+ elif method == "vargrad":
273
+ explainer_name = "VarGrad"
274
+ else:
275
+ explainer_name = f"SmoothGrad_{method}"
276
+
277
+ # Determine class name
278
+ if self.class_names and target_class is not None:
279
+ label_name = self.class_names[target_class]
280
+ else:
281
+ label_name = f"class_{target_class}" if target_class is not None else "output"
282
+
283
+ explanation_data = {
284
+ "feature_attributions": attributions_dict,
285
+ "attributions_raw": attributions.tolist(),
286
+ "attributions_std": std_attributions.tolist(),
287
+ "n_samples": self.n_samples,
288
+ "noise_scale": self.noise_scale,
289
+ "noise_type": self.noise_type,
290
+ "method": method,
291
+ "absolute_value": absolute_value
292
+ }
293
+
294
+ return Explanation(
295
+ explainer_name=explainer_name,
296
+ target_class=label_name,
297
+ explanation_data=explanation_data
298
+ )
299
+
300
+ def explain_batch(
301
+ self,
302
+ X: np.ndarray,
303
+ target_class: Optional[int] = None,
304
+ method: str = "smoothgrad",
305
+ absolute_value: bool = False
306
+ ) -> List[Explanation]:
307
+ """
308
+ Generate explanations for multiple instances.
309
+
310
+ Args:
311
+ X: 2D numpy array of instances (n_samples, n_features),
312
+ or 1D array for single instance.
313
+ target_class: Target class for all instances. If None,
314
+ uses predicted class for each instance.
315
+ method: Aggregation method (see explain()).
316
+ absolute_value: If True, return absolute values.
317
+
318
+ Returns:
319
+ List of Explanation objects.
320
+
321
+ Example:
322
+ >>> explanations = explainer.explain_batch(X_test[:10])
323
+ >>> for exp in explanations:
324
+ ... print(exp.target_class)
325
+ """
326
+ X = np.array(X)
327
+ if X.ndim == 1:
328
+ X = X.reshape(1, -1)
329
+
330
+ return [
331
+ self.explain(
332
+ X[i],
333
+ target_class=target_class,
334
+ method=method,
335
+ absolute_value=absolute_value
336
+ )
337
+ for i in range(X.shape[0])
338
+ ]
339
+
340
+ def compute_with_baseline_comparison(
341
+ self,
342
+ instance: np.ndarray,
343
+ target_class: Optional[int] = None
344
+ ) -> dict:
345
+ """
346
+ Compare SmoothGrad with raw gradient for analysis.
347
+
348
+ Useful for understanding the smoothing effect and validating
349
+ that SmoothGrad is reducing noise appropriately.
350
+
351
+ Args:
352
+ instance: Input instance.
353
+ target_class: Target class for gradient computation.
354
+
355
+ Returns:
356
+ Dictionary containing:
357
+ - smoothgrad: SmoothGrad attributions
358
+ - raw_gradient: Single gradient (no noise)
359
+ - smoothgrad_squared: Squared variant
360
+ - vargrad: Variance of gradients
361
+ - correlation: Correlation between smoothgrad and raw
362
+ """
363
+ instance = np.array(instance).flatten().astype(np.float32)
364
+
365
+ # Determine target class
366
+ if target_class is None and self.class_names:
367
+ predictions = self.model.predict(instance.reshape(1, -1))
368
+ target_class = int(np.argmax(predictions))
369
+
370
+ # Raw gradient (no noise)
371
+ _, raw_gradient = self.model.predict_with_gradients(
372
+ instance.reshape(1, -1),
373
+ target_class=target_class
374
+ )
375
+ raw_gradient = raw_gradient.flatten()
376
+
377
+ # SmoothGrad variants
378
+ smoothgrad, _ = self._compute_smoothgrad(instance, target_class, "smoothgrad")
379
+ smoothgrad_squared, _ = self._compute_smoothgrad(instance, target_class, "smoothgrad_squared")
380
+ vargrad, _ = self._compute_smoothgrad(instance, target_class, "vargrad")
381
+
382
+ # Compute correlation
383
+ correlation = np.corrcoef(smoothgrad, raw_gradient)[0, 1]
384
+
385
+ return {
386
+ "smoothgrad": smoothgrad.tolist(),
387
+ "raw_gradient": raw_gradient.tolist(),
388
+ "smoothgrad_squared": smoothgrad_squared.tolist(),
389
+ "vargrad": vargrad.tolist(),
390
+ "correlation": float(correlation),
391
+ "n_samples": self.n_samples,
392
+ "noise_scale": self.noise_scale
393
+ }
394
+
395
+ def adaptive_noise_scale(
396
+ self,
397
+ instance: np.ndarray,
398
+ percentile: float = 15.0
399
+ ) -> float:
400
+ """
401
+ Compute adaptive noise scale based on input statistics.
402
+
403
+ The original SmoothGrad paper suggests using noise scale
404
+ proportional to the input range. This method computes an
405
+ appropriate scale based on the instance.
406
+
407
+ Args:
408
+ instance: Input instance.
409
+ percentile: Percentage of input range to use as noise scale.
410
+ Default: 15% (recommended in paper).
411
+
412
+ Returns:
413
+ Recommended noise scale for this instance.
414
+ """
415
+ instance = np.array(instance).flatten()
416
+ input_range = instance.max() - instance.min()
417
+
418
+ # Avoid zero scale for constant inputs
419
+ if input_range == 0:
420
+ input_range = np.abs(instance).max()
421
+ if input_range == 0:
422
+ input_range = 1.0
423
+
424
+ return float(input_range * percentile / 100.0)