explainiverse 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- explainiverse/__init__.py +1 -1
- explainiverse/core/registry.py +36 -0
- explainiverse/explainers/gradient/__init__.py +4 -0
- explainiverse/explainers/gradient/saliency.py +293 -0
- explainiverse/explainers/gradient/smoothgrad.py +424 -0
- explainiverse-0.6.0.dist-info/METADATA +652 -0
- {explainiverse-0.4.0.dist-info → explainiverse-0.6.0.dist-info}/RECORD +9 -7
- explainiverse-0.4.0.dist-info/METADATA +0 -391
- {explainiverse-0.4.0.dist-info → explainiverse-0.6.0.dist-info}/LICENSE +0 -0
- {explainiverse-0.4.0.dist-info → explainiverse-0.6.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
# src/explainiverse/explainers/gradient/smoothgrad.py
|
|
2
|
+
"""
|
|
3
|
+
SmoothGrad - Removing Noise by Adding Noise.
|
|
4
|
+
|
|
5
|
+
SmoothGrad reduces noise in gradient-based saliency maps by averaging
|
|
6
|
+
gradients computed on noisy copies of the input. This produces smoother,
|
|
7
|
+
more visually coherent attributions that are often easier to interpret.
|
|
8
|
+
|
|
9
|
+
Key Properties:
|
|
10
|
+
- Simple: Just averages gradients over noisy inputs
|
|
11
|
+
- Effective: Significantly reduces noise in saliency maps
|
|
12
|
+
- Flexible: Works with any gradient-based method
|
|
13
|
+
- Fast: Only requires multiple forward/backward passes (parallelizable)
|
|
14
|
+
|
|
15
|
+
Variants:
|
|
16
|
+
- SmoothGrad: Average of gradients
|
|
17
|
+
- SmoothGrad-Squared: Average of squared gradients (sharper)
|
|
18
|
+
- VarGrad: Variance of gradients (uncertainty quantification)
|
|
19
|
+
|
|
20
|
+
Reference:
|
|
21
|
+
Smilkov, D., Thorat, N., Kim, B., Viégas, F., & Wattenberg, M. (2017).
|
|
22
|
+
SmoothGrad: removing noise by adding noise.
|
|
23
|
+
ICML Workshop on Visualization for Deep Learning.
|
|
24
|
+
https://arxiv.org/abs/1706.03825
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
from explainiverse.explainers.gradient import SmoothGradExplainer
|
|
28
|
+
from explainiverse.adapters import PyTorchAdapter
|
|
29
|
+
|
|
30
|
+
adapter = PyTorchAdapter(model, task="classification")
|
|
31
|
+
|
|
32
|
+
explainer = SmoothGradExplainer(
|
|
33
|
+
model=adapter,
|
|
34
|
+
feature_names=feature_names,
|
|
35
|
+
n_samples=50,
|
|
36
|
+
noise_scale=0.15
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
explanation = explainer.explain(instance)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
import numpy as np
|
|
43
|
+
from typing import List, Optional
|
|
44
|
+
|
|
45
|
+
from explainiverse.core.explainer import BaseExplainer
|
|
46
|
+
from explainiverse.core.explanation import Explanation
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SmoothGradExplainer(BaseExplainer):
|
|
50
|
+
"""
|
|
51
|
+
SmoothGrad explainer for neural networks.
|
|
52
|
+
|
|
53
|
+
Computes attributions by averaging gradients over noisy copies of the
|
|
54
|
+
input. The noise helps smooth out local fluctuations in the gradient
|
|
55
|
+
landscape, producing more interpretable saliency maps.
|
|
56
|
+
|
|
57
|
+
Algorithm:
|
|
58
|
+
SmoothGrad(x) = (1/n) * Σ_{i=1}^{n} ∂f(x + ε_i)/∂x
|
|
59
|
+
where ε_i ~ N(0, σ²I) or U(-σ, σ)
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
model: Model adapter with predict_with_gradients() method
|
|
63
|
+
feature_names: List of feature names
|
|
64
|
+
class_names: List of class names (for classification)
|
|
65
|
+
n_samples: Number of noisy samples to average
|
|
66
|
+
noise_scale: Standard deviation (Gaussian) or half-range (Uniform)
|
|
67
|
+
noise_type: Type of noise distribution ("gaussian" or "uniform")
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
>>> explainer = SmoothGradExplainer(adapter, feature_names, n_samples=50)
|
|
71
|
+
>>> explanation = explainer.explain(instance)
|
|
72
|
+
>>> print(explanation.explanation_data["feature_attributions"])
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
model,
|
|
78
|
+
feature_names: List[str],
|
|
79
|
+
class_names: Optional[List[str]] = None,
|
|
80
|
+
n_samples: int = 50,
|
|
81
|
+
noise_scale: float = 0.15,
|
|
82
|
+
noise_type: str = "gaussian"
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Initialize the SmoothGrad explainer.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
model: A model adapter with predict_with_gradients() method.
|
|
89
|
+
Use PyTorchAdapter for PyTorch models.
|
|
90
|
+
feature_names: List of input feature names.
|
|
91
|
+
class_names: List of class names (for classification tasks).
|
|
92
|
+
n_samples: Number of noisy samples to average. More samples
|
|
93
|
+
reduce variance but increase computation. Default: 50.
|
|
94
|
+
noise_scale: Scale of the noise to add:
|
|
95
|
+
- For "gaussian": standard deviation (default: 0.15)
|
|
96
|
+
- For "uniform": half-range, noise in [-scale, scale]
|
|
97
|
+
Typically set to 10-20% of the input range.
|
|
98
|
+
noise_type: Type of noise distribution:
|
|
99
|
+
- "gaussian": Normal distribution N(0, σ²) (default)
|
|
100
|
+
- "uniform": Uniform distribution U(-σ, σ)
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
TypeError: If model doesn't have predict_with_gradients method.
|
|
104
|
+
ValueError: If n_samples < 1, noise_scale < 0, or invalid noise_type.
|
|
105
|
+
"""
|
|
106
|
+
super().__init__(model)
|
|
107
|
+
|
|
108
|
+
# Validate model has gradient capability
|
|
109
|
+
if not hasattr(model, 'predict_with_gradients'):
|
|
110
|
+
raise TypeError(
|
|
111
|
+
"Model adapter must have predict_with_gradients() method. "
|
|
112
|
+
"Use PyTorchAdapter for PyTorch models."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Validate parameters
|
|
116
|
+
if n_samples < 1:
|
|
117
|
+
raise ValueError(f"n_samples must be >= 1, got {n_samples}")
|
|
118
|
+
|
|
119
|
+
if noise_scale < 0:
|
|
120
|
+
raise ValueError(f"noise_scale must be >= 0, got {noise_scale}")
|
|
121
|
+
|
|
122
|
+
if noise_type not in ["gaussian", "uniform"]:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
f"noise_type must be 'gaussian' or 'uniform', got '{noise_type}'"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self.feature_names = list(feature_names)
|
|
128
|
+
self.class_names = list(class_names) if class_names else None
|
|
129
|
+
self.n_samples = n_samples
|
|
130
|
+
self.noise_scale = noise_scale
|
|
131
|
+
self.noise_type = noise_type
|
|
132
|
+
|
|
133
|
+
def _generate_noise(self, shape: tuple) -> np.ndarray:
|
|
134
|
+
"""
|
|
135
|
+
Generate noise samples based on the configured noise type.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
shape: Shape of the noise array to generate.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Numpy array of noise samples.
|
|
142
|
+
"""
|
|
143
|
+
if self.noise_type == "gaussian":
|
|
144
|
+
return np.random.normal(0, self.noise_scale, shape).astype(np.float32)
|
|
145
|
+
else: # uniform
|
|
146
|
+
return np.random.uniform(
|
|
147
|
+
-self.noise_scale,
|
|
148
|
+
self.noise_scale,
|
|
149
|
+
shape
|
|
150
|
+
).astype(np.float32)
|
|
151
|
+
|
|
152
|
+
def _compute_smoothgrad(
|
|
153
|
+
self,
|
|
154
|
+
instance: np.ndarray,
|
|
155
|
+
target_class: Optional[int] = None,
|
|
156
|
+
method: str = "smoothgrad",
|
|
157
|
+
absolute_value: bool = False
|
|
158
|
+
) -> tuple:
|
|
159
|
+
"""
|
|
160
|
+
Compute SmoothGrad attributions for a single instance.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
instance: Input instance (1D array).
|
|
164
|
+
target_class: Target class for gradient computation.
|
|
165
|
+
method: Aggregation method:
|
|
166
|
+
- "smoothgrad": Average of gradients (default)
|
|
167
|
+
- "smoothgrad_squared": Average of squared gradients
|
|
168
|
+
- "vargrad": Variance of gradients
|
|
169
|
+
absolute_value: If True, take absolute value of final attributions.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (attributions, std_attributions) arrays.
|
|
173
|
+
"""
|
|
174
|
+
instance = instance.flatten().astype(np.float32)
|
|
175
|
+
|
|
176
|
+
# Collect gradients for all noisy samples
|
|
177
|
+
all_gradients = []
|
|
178
|
+
|
|
179
|
+
for _ in range(self.n_samples):
|
|
180
|
+
# Add noise to input
|
|
181
|
+
if self.noise_scale > 0:
|
|
182
|
+
noise = self._generate_noise(instance.shape)
|
|
183
|
+
noisy_input = instance + noise
|
|
184
|
+
else:
|
|
185
|
+
noisy_input = instance.copy()
|
|
186
|
+
|
|
187
|
+
# Compute gradient
|
|
188
|
+
_, gradients = self.model.predict_with_gradients(
|
|
189
|
+
noisy_input.reshape(1, -1),
|
|
190
|
+
target_class=target_class
|
|
191
|
+
)
|
|
192
|
+
all_gradients.append(gradients.flatten())
|
|
193
|
+
|
|
194
|
+
all_gradients = np.array(all_gradients) # Shape: (n_samples, n_features)
|
|
195
|
+
|
|
196
|
+
# Compute attributions based on method
|
|
197
|
+
if method == "smoothgrad":
|
|
198
|
+
attributions = np.mean(all_gradients, axis=0)
|
|
199
|
+
std_attributions = np.std(all_gradients, axis=0)
|
|
200
|
+
elif method == "smoothgrad_squared":
|
|
201
|
+
# Average of squared gradients
|
|
202
|
+
squared_gradients = all_gradients ** 2
|
|
203
|
+
attributions = np.mean(squared_gradients, axis=0)
|
|
204
|
+
std_attributions = np.std(squared_gradients, axis=0)
|
|
205
|
+
elif method == "vargrad":
|
|
206
|
+
# Variance of gradients
|
|
207
|
+
attributions = np.var(all_gradients, axis=0)
|
|
208
|
+
std_attributions = np.zeros_like(attributions) # No std for variance
|
|
209
|
+
else:
|
|
210
|
+
raise ValueError(
|
|
211
|
+
f"Unknown method: '{method}'. "
|
|
212
|
+
f"Use 'smoothgrad', 'smoothgrad_squared', or 'vargrad'."
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Apply absolute value if requested
|
|
216
|
+
if absolute_value:
|
|
217
|
+
attributions = np.abs(attributions)
|
|
218
|
+
|
|
219
|
+
return attributions, std_attributions
|
|
220
|
+
|
|
221
|
+
def explain(
|
|
222
|
+
self,
|
|
223
|
+
instance: np.ndarray,
|
|
224
|
+
target_class: Optional[int] = None,
|
|
225
|
+
method: str = "smoothgrad",
|
|
226
|
+
absolute_value: bool = False
|
|
227
|
+
) -> Explanation:
|
|
228
|
+
"""
|
|
229
|
+
Generate SmoothGrad explanation for an instance.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
instance: 1D numpy array of input features.
|
|
233
|
+
target_class: For classification, which class to explain.
|
|
234
|
+
If None, uses the predicted class.
|
|
235
|
+
method: Aggregation method:
|
|
236
|
+
- "smoothgrad": Average of gradients (default)
|
|
237
|
+
- "smoothgrad_squared": Average of squared gradients (sharper)
|
|
238
|
+
- "vargrad": Variance of gradients (uncertainty)
|
|
239
|
+
absolute_value: If True, return absolute values of attributions.
|
|
240
|
+
Useful for feature importance without direction.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Explanation object with feature attributions.
|
|
244
|
+
|
|
245
|
+
Example:
|
|
246
|
+
>>> explanation = explainer.explain(instance)
|
|
247
|
+
>>> print(explanation.explanation_data["feature_attributions"])
|
|
248
|
+
"""
|
|
249
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
250
|
+
|
|
251
|
+
# Determine target class if not specified
|
|
252
|
+
if target_class is None and self.class_names:
|
|
253
|
+
predictions = self.model.predict(instance.reshape(1, -1))
|
|
254
|
+
target_class = int(np.argmax(predictions))
|
|
255
|
+
|
|
256
|
+
# Compute SmoothGrad
|
|
257
|
+
attributions, std_attributions = self._compute_smoothgrad(
|
|
258
|
+
instance, target_class, method, absolute_value
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Build attributions dict
|
|
262
|
+
attributions_dict = {
|
|
263
|
+
fname: float(attributions[i])
|
|
264
|
+
for i, fname in enumerate(self.feature_names)
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# Determine explainer name based on method
|
|
268
|
+
if method == "smoothgrad":
|
|
269
|
+
explainer_name = "SmoothGrad"
|
|
270
|
+
elif method == "smoothgrad_squared":
|
|
271
|
+
explainer_name = "SmoothGrad_Squared"
|
|
272
|
+
elif method == "vargrad":
|
|
273
|
+
explainer_name = "VarGrad"
|
|
274
|
+
else:
|
|
275
|
+
explainer_name = f"SmoothGrad_{method}"
|
|
276
|
+
|
|
277
|
+
# Determine class name
|
|
278
|
+
if self.class_names and target_class is not None:
|
|
279
|
+
label_name = self.class_names[target_class]
|
|
280
|
+
else:
|
|
281
|
+
label_name = f"class_{target_class}" if target_class is not None else "output"
|
|
282
|
+
|
|
283
|
+
explanation_data = {
|
|
284
|
+
"feature_attributions": attributions_dict,
|
|
285
|
+
"attributions_raw": attributions.tolist(),
|
|
286
|
+
"attributions_std": std_attributions.tolist(),
|
|
287
|
+
"n_samples": self.n_samples,
|
|
288
|
+
"noise_scale": self.noise_scale,
|
|
289
|
+
"noise_type": self.noise_type,
|
|
290
|
+
"method": method,
|
|
291
|
+
"absolute_value": absolute_value
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return Explanation(
|
|
295
|
+
explainer_name=explainer_name,
|
|
296
|
+
target_class=label_name,
|
|
297
|
+
explanation_data=explanation_data
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def explain_batch(
|
|
301
|
+
self,
|
|
302
|
+
X: np.ndarray,
|
|
303
|
+
target_class: Optional[int] = None,
|
|
304
|
+
method: str = "smoothgrad",
|
|
305
|
+
absolute_value: bool = False
|
|
306
|
+
) -> List[Explanation]:
|
|
307
|
+
"""
|
|
308
|
+
Generate explanations for multiple instances.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
X: 2D numpy array of instances (n_samples, n_features),
|
|
312
|
+
or 1D array for single instance.
|
|
313
|
+
target_class: Target class for all instances. If None,
|
|
314
|
+
uses predicted class for each instance.
|
|
315
|
+
method: Aggregation method (see explain()).
|
|
316
|
+
absolute_value: If True, return absolute values.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
List of Explanation objects.
|
|
320
|
+
|
|
321
|
+
Example:
|
|
322
|
+
>>> explanations = explainer.explain_batch(X_test[:10])
|
|
323
|
+
>>> for exp in explanations:
|
|
324
|
+
... print(exp.target_class)
|
|
325
|
+
"""
|
|
326
|
+
X = np.array(X)
|
|
327
|
+
if X.ndim == 1:
|
|
328
|
+
X = X.reshape(1, -1)
|
|
329
|
+
|
|
330
|
+
return [
|
|
331
|
+
self.explain(
|
|
332
|
+
X[i],
|
|
333
|
+
target_class=target_class,
|
|
334
|
+
method=method,
|
|
335
|
+
absolute_value=absolute_value
|
|
336
|
+
)
|
|
337
|
+
for i in range(X.shape[0])
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
def compute_with_baseline_comparison(
|
|
341
|
+
self,
|
|
342
|
+
instance: np.ndarray,
|
|
343
|
+
target_class: Optional[int] = None
|
|
344
|
+
) -> dict:
|
|
345
|
+
"""
|
|
346
|
+
Compare SmoothGrad with raw gradient for analysis.
|
|
347
|
+
|
|
348
|
+
Useful for understanding the smoothing effect and validating
|
|
349
|
+
that SmoothGrad is reducing noise appropriately.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
instance: Input instance.
|
|
353
|
+
target_class: Target class for gradient computation.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Dictionary containing:
|
|
357
|
+
- smoothgrad: SmoothGrad attributions
|
|
358
|
+
- raw_gradient: Single gradient (no noise)
|
|
359
|
+
- smoothgrad_squared: Squared variant
|
|
360
|
+
- vargrad: Variance of gradients
|
|
361
|
+
- correlation: Correlation between smoothgrad and raw
|
|
362
|
+
"""
|
|
363
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
364
|
+
|
|
365
|
+
# Determine target class
|
|
366
|
+
if target_class is None and self.class_names:
|
|
367
|
+
predictions = self.model.predict(instance.reshape(1, -1))
|
|
368
|
+
target_class = int(np.argmax(predictions))
|
|
369
|
+
|
|
370
|
+
# Raw gradient (no noise)
|
|
371
|
+
_, raw_gradient = self.model.predict_with_gradients(
|
|
372
|
+
instance.reshape(1, -1),
|
|
373
|
+
target_class=target_class
|
|
374
|
+
)
|
|
375
|
+
raw_gradient = raw_gradient.flatten()
|
|
376
|
+
|
|
377
|
+
# SmoothGrad variants
|
|
378
|
+
smoothgrad, _ = self._compute_smoothgrad(instance, target_class, "smoothgrad")
|
|
379
|
+
smoothgrad_squared, _ = self._compute_smoothgrad(instance, target_class, "smoothgrad_squared")
|
|
380
|
+
vargrad, _ = self._compute_smoothgrad(instance, target_class, "vargrad")
|
|
381
|
+
|
|
382
|
+
# Compute correlation
|
|
383
|
+
correlation = np.corrcoef(smoothgrad, raw_gradient)[0, 1]
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
"smoothgrad": smoothgrad.tolist(),
|
|
387
|
+
"raw_gradient": raw_gradient.tolist(),
|
|
388
|
+
"smoothgrad_squared": smoothgrad_squared.tolist(),
|
|
389
|
+
"vargrad": vargrad.tolist(),
|
|
390
|
+
"correlation": float(correlation),
|
|
391
|
+
"n_samples": self.n_samples,
|
|
392
|
+
"noise_scale": self.noise_scale
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
def adaptive_noise_scale(
|
|
396
|
+
self,
|
|
397
|
+
instance: np.ndarray,
|
|
398
|
+
percentile: float = 15.0
|
|
399
|
+
) -> float:
|
|
400
|
+
"""
|
|
401
|
+
Compute adaptive noise scale based on input statistics.
|
|
402
|
+
|
|
403
|
+
The original SmoothGrad paper suggests using noise scale
|
|
404
|
+
proportional to the input range. This method computes an
|
|
405
|
+
appropriate scale based on the instance.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
instance: Input instance.
|
|
409
|
+
percentile: Percentage of input range to use as noise scale.
|
|
410
|
+
Default: 15% (recommended in paper).
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Recommended noise scale for this instance.
|
|
414
|
+
"""
|
|
415
|
+
instance = np.array(instance).flatten()
|
|
416
|
+
input_range = instance.max() - instance.min()
|
|
417
|
+
|
|
418
|
+
# Avoid zero scale for constant inputs
|
|
419
|
+
if input_range == 0:
|
|
420
|
+
input_range = np.abs(instance).max()
|
|
421
|
+
if input_range == 0:
|
|
422
|
+
input_range = 1.0
|
|
423
|
+
|
|
424
|
+
return float(input_range * percentile / 100.0)
|