explainiverse 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- explainiverse/__init__.py +15 -3
- explainiverse/adapters/__init__.py +11 -1
- explainiverse/adapters/pytorch_adapter.py +396 -0
- explainiverse/core/registry.py +18 -0
- explainiverse/explainers/__init__.py +3 -0
- explainiverse/explainers/gradient/__init__.py +11 -0
- explainiverse/explainers/gradient/integrated_gradients.py +348 -0
- {explainiverse-0.2.1.dist-info → explainiverse-0.2.3.dist-info}/METADATA +107 -11
- {explainiverse-0.2.1.dist-info → explainiverse-0.2.3.dist-info}/RECORD +11 -8
- {explainiverse-0.2.1.dist-info → explainiverse-0.2.3.dist-info}/LICENSE +0 -0
- {explainiverse-0.2.1.dist-info → explainiverse-0.2.3.dist-info}/WHEEL +0 -0
explainiverse/__init__.py
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
"""
|
|
3
3
|
Explainiverse - A unified, extensible explainability framework.
|
|
4
4
|
|
|
5
|
-
Supports multiple XAI methods including LIME, SHAP, Anchors,
|
|
6
|
-
Permutation Importance, PDP, ALE, and SAGE through a
|
|
5
|
+
Supports multiple XAI methods including LIME, SHAP, TreeSHAP, Anchors,
|
|
6
|
+
Counterfactuals, Permutation Importance, PDP, ALE, and SAGE through a
|
|
7
|
+
consistent interface.
|
|
7
8
|
|
|
8
9
|
Quick Start:
|
|
9
10
|
from explainiverse import default_registry
|
|
@@ -14,6 +15,10 @@ Quick Start:
|
|
|
14
15
|
# Create an explainer
|
|
15
16
|
explainer = default_registry.create("lime", model=adapter, training_data=X, ...)
|
|
16
17
|
explanation = explainer.explain(instance)
|
|
18
|
+
|
|
19
|
+
For PyTorch models:
|
|
20
|
+
from explainiverse import PyTorchAdapter # Requires torch
|
|
21
|
+
adapter = PyTorchAdapter(model, task="classification")
|
|
17
22
|
"""
|
|
18
23
|
|
|
19
24
|
from explainiverse.core.explainer import BaseExplainer
|
|
@@ -25,9 +30,10 @@ from explainiverse.core.registry import (
|
|
|
25
30
|
get_default_registry,
|
|
26
31
|
)
|
|
27
32
|
from explainiverse.adapters.sklearn_adapter import SklearnAdapter
|
|
33
|
+
from explainiverse.adapters import TORCH_AVAILABLE
|
|
28
34
|
from explainiverse.engine.suite import ExplanationSuite
|
|
29
35
|
|
|
30
|
-
__version__ = "0.2.
|
|
36
|
+
__version__ = "0.2.3"
|
|
31
37
|
|
|
32
38
|
__all__ = [
|
|
33
39
|
# Core
|
|
@@ -40,6 +46,12 @@ __all__ = [
|
|
|
40
46
|
"get_default_registry",
|
|
41
47
|
# Adapters
|
|
42
48
|
"SklearnAdapter",
|
|
49
|
+
"TORCH_AVAILABLE",
|
|
43
50
|
# Engine
|
|
44
51
|
"ExplanationSuite",
|
|
45
52
|
]
|
|
53
|
+
|
|
54
|
+
# Conditionally export PyTorchAdapter if torch is available
|
|
55
|
+
if TORCH_AVAILABLE:
|
|
56
|
+
from explainiverse.adapters import PyTorchAdapter
|
|
57
|
+
__all__.append("PyTorchAdapter")
|
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
# src/explainiverse/adapters/__init__.py
|
|
2
2
|
"""
|
|
3
3
|
Model adapters - wrappers that provide a consistent interface for different ML frameworks.
|
|
4
|
+
|
|
5
|
+
Available adapters:
|
|
6
|
+
- SklearnAdapter: For scikit-learn models (always available)
|
|
7
|
+
- PyTorchAdapter: For PyTorch nn.Module models (requires torch)
|
|
4
8
|
"""
|
|
5
9
|
|
|
6
10
|
from explainiverse.adapters.base_adapter import BaseModelAdapter
|
|
7
11
|
from explainiverse.adapters.sklearn_adapter import SklearnAdapter
|
|
8
12
|
|
|
9
|
-
|
|
13
|
+
# Conditionally import PyTorchAdapter if torch is available
|
|
14
|
+
try:
|
|
15
|
+
from explainiverse.adapters.pytorch_adapter import PyTorchAdapter, TORCH_AVAILABLE
|
|
16
|
+
__all__ = ["BaseModelAdapter", "SklearnAdapter", "PyTorchAdapter", "TORCH_AVAILABLE"]
|
|
17
|
+
except ImportError:
|
|
18
|
+
TORCH_AVAILABLE = False
|
|
19
|
+
__all__ = ["BaseModelAdapter", "SklearnAdapter", "TORCH_AVAILABLE"]
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
# src/explainiverse/adapters/pytorch_adapter.py
|
|
2
|
+
"""
|
|
3
|
+
PyTorch Model Adapter for Explainiverse.
|
|
4
|
+
|
|
5
|
+
Provides a unified interface for PyTorch neural networks, enabling
|
|
6
|
+
compatibility with all explainers in the framework.
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
import torch.nn as nn
|
|
10
|
+
from explainiverse.adapters import PyTorchAdapter
|
|
11
|
+
|
|
12
|
+
model = nn.Sequential(
|
|
13
|
+
nn.Linear(10, 64),
|
|
14
|
+
nn.ReLU(),
|
|
15
|
+
nn.Linear(64, 3)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
adapter = PyTorchAdapter(
|
|
19
|
+
model,
|
|
20
|
+
task="classification",
|
|
21
|
+
class_names=["cat", "dog", "bird"]
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
probs = adapter.predict(X) # Returns numpy array
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
from typing import List, Optional, Union, Callable
|
|
29
|
+
|
|
30
|
+
from .base_adapter import BaseModelAdapter
|
|
31
|
+
|
|
32
|
+
# Check if PyTorch is available
|
|
33
|
+
try:
|
|
34
|
+
import torch
|
|
35
|
+
import torch.nn as nn
|
|
36
|
+
TORCH_AVAILABLE = True
|
|
37
|
+
except ImportError:
|
|
38
|
+
TORCH_AVAILABLE = False
|
|
39
|
+
torch = None
|
|
40
|
+
nn = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _check_torch_available():
|
|
44
|
+
"""Raise ImportError if PyTorch is not installed."""
|
|
45
|
+
if not TORCH_AVAILABLE:
|
|
46
|
+
raise ImportError(
|
|
47
|
+
"PyTorch is required for PyTorchAdapter. "
|
|
48
|
+
"Install it with: pip install torch"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class PyTorchAdapter(BaseModelAdapter):
|
|
53
|
+
"""
|
|
54
|
+
Adapter for PyTorch neural network models.
|
|
55
|
+
|
|
56
|
+
Wraps a PyTorch nn.Module to provide a consistent interface for
|
|
57
|
+
explainability methods. Handles device management, tensor/numpy
|
|
58
|
+
conversions, and supports both classification and regression tasks.
|
|
59
|
+
|
|
60
|
+
Attributes:
|
|
61
|
+
model: The PyTorch model (nn.Module)
|
|
62
|
+
task: "classification" or "regression"
|
|
63
|
+
device: torch.device for computation
|
|
64
|
+
class_names: List of class names (for classification)
|
|
65
|
+
feature_names: List of feature names
|
|
66
|
+
output_activation: Optional activation function for outputs
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
>>> model = MyNeuralNetwork()
|
|
70
|
+
>>> adapter = PyTorchAdapter(model, task="classification")
|
|
71
|
+
>>> probs = adapter.predict(X_numpy) # Returns probabilities
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
model,
|
|
77
|
+
task: str = "classification",
|
|
78
|
+
feature_names: Optional[List[str]] = None,
|
|
79
|
+
class_names: Optional[List[str]] = None,
|
|
80
|
+
device: Optional[str] = None,
|
|
81
|
+
output_activation: Optional[str] = "auto",
|
|
82
|
+
batch_size: int = 32
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Initialize the PyTorch adapter.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
model: A PyTorch nn.Module model.
|
|
89
|
+
task: "classification" or "regression".
|
|
90
|
+
feature_names: List of input feature names.
|
|
91
|
+
class_names: List of output class names (classification only).
|
|
92
|
+
device: Device to run on ("cpu", "cuda", "cuda:0", etc.).
|
|
93
|
+
If None, auto-detects based on model parameters.
|
|
94
|
+
output_activation: Activation for output layer:
|
|
95
|
+
- "auto": softmax for classification, none for regression
|
|
96
|
+
- "softmax": Apply softmax (classification)
|
|
97
|
+
- "sigmoid": Apply sigmoid (binary classification)
|
|
98
|
+
- "none" or None: No activation (raw logits/values)
|
|
99
|
+
batch_size: Batch size for large inputs (default: 32).
|
|
100
|
+
"""
|
|
101
|
+
_check_torch_available()
|
|
102
|
+
|
|
103
|
+
if not isinstance(model, nn.Module):
|
|
104
|
+
raise TypeError(
|
|
105
|
+
f"Expected nn.Module, got {type(model).__name__}. "
|
|
106
|
+
"For sklearn models, use SklearnAdapter instead."
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
super().__init__(model, feature_names)
|
|
110
|
+
|
|
111
|
+
self.task = task
|
|
112
|
+
self.class_names = list(class_names) if class_names else None
|
|
113
|
+
self.batch_size = batch_size
|
|
114
|
+
|
|
115
|
+
# Determine device
|
|
116
|
+
if device is not None:
|
|
117
|
+
self.device = torch.device(device)
|
|
118
|
+
else:
|
|
119
|
+
# Auto-detect from model parameters
|
|
120
|
+
try:
|
|
121
|
+
param = next(model.parameters())
|
|
122
|
+
self.device = param.device
|
|
123
|
+
except StopIteration:
|
|
124
|
+
# Model has no parameters, use CPU
|
|
125
|
+
self.device = torch.device("cpu")
|
|
126
|
+
|
|
127
|
+
# Move model to device and set to eval mode
|
|
128
|
+
self.model = model.to(self.device)
|
|
129
|
+
self.model.eval()
|
|
130
|
+
|
|
131
|
+
# Configure output activation
|
|
132
|
+
if output_activation == "auto":
|
|
133
|
+
if task == "classification":
|
|
134
|
+
self.output_activation = "softmax"
|
|
135
|
+
else:
|
|
136
|
+
self.output_activation = None
|
|
137
|
+
else:
|
|
138
|
+
self.output_activation = output_activation if output_activation != "none" else None
|
|
139
|
+
|
|
140
|
+
def _to_tensor(self, data: np.ndarray) -> "torch.Tensor":
|
|
141
|
+
"""Convert numpy array to tensor on the correct device."""
|
|
142
|
+
if isinstance(data, torch.Tensor):
|
|
143
|
+
return data.to(self.device).float()
|
|
144
|
+
return torch.tensor(data, dtype=torch.float32, device=self.device)
|
|
145
|
+
|
|
146
|
+
def _to_numpy(self, tensor: "torch.Tensor") -> np.ndarray:
|
|
147
|
+
"""Convert tensor to numpy array."""
|
|
148
|
+
return tensor.detach().cpu().numpy()
|
|
149
|
+
|
|
150
|
+
def _apply_activation(self, output: "torch.Tensor") -> "torch.Tensor":
|
|
151
|
+
"""Apply output activation function."""
|
|
152
|
+
if self.output_activation == "softmax":
|
|
153
|
+
return torch.softmax(output, dim=-1)
|
|
154
|
+
elif self.output_activation == "sigmoid":
|
|
155
|
+
return torch.sigmoid(output)
|
|
156
|
+
return output
|
|
157
|
+
|
|
158
|
+
def predict(self, data: np.ndarray) -> np.ndarray:
|
|
159
|
+
"""
|
|
160
|
+
Generate predictions for input data.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
data: Input data as numpy array. Shape: (n_samples, n_features)
|
|
164
|
+
or (n_samples, channels, height, width) for images.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Predictions as numpy array:
|
|
168
|
+
- Classification: probabilities of shape (n_samples, n_classes)
|
|
169
|
+
- Regression: values of shape (n_samples, n_outputs)
|
|
170
|
+
"""
|
|
171
|
+
data = np.array(data)
|
|
172
|
+
|
|
173
|
+
# Handle single instance
|
|
174
|
+
if data.ndim == 1:
|
|
175
|
+
data = data.reshape(1, -1)
|
|
176
|
+
|
|
177
|
+
n_samples = data.shape[0]
|
|
178
|
+
outputs = []
|
|
179
|
+
|
|
180
|
+
with torch.no_grad():
|
|
181
|
+
for i in range(0, n_samples, self.batch_size):
|
|
182
|
+
batch = data[i:i + self.batch_size]
|
|
183
|
+
tensor_batch = self._to_tensor(batch)
|
|
184
|
+
|
|
185
|
+
output = self.model(tensor_batch)
|
|
186
|
+
output = self._apply_activation(output)
|
|
187
|
+
outputs.append(self._to_numpy(output))
|
|
188
|
+
|
|
189
|
+
return np.vstack(outputs)
|
|
190
|
+
|
|
191
|
+
def predict_with_gradients(
|
|
192
|
+
self,
|
|
193
|
+
data: np.ndarray,
|
|
194
|
+
target_class: Optional[int] = None
|
|
195
|
+
) -> tuple:
|
|
196
|
+
"""
|
|
197
|
+
Generate predictions and compute gradients w.r.t. inputs.
|
|
198
|
+
|
|
199
|
+
This is essential for gradient-based attribution methods like
|
|
200
|
+
Integrated Gradients, GradCAM, and Saliency Maps.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
data: Input data as numpy array.
|
|
204
|
+
target_class: Class index for gradient computation.
|
|
205
|
+
If None, uses the predicted class.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Tuple of (predictions, gradients) as numpy arrays.
|
|
209
|
+
"""
|
|
210
|
+
data = np.array(data)
|
|
211
|
+
if data.ndim == 1:
|
|
212
|
+
data = data.reshape(1, -1)
|
|
213
|
+
|
|
214
|
+
# Convert to tensor with gradient tracking
|
|
215
|
+
tensor_data = self._to_tensor(data)
|
|
216
|
+
tensor_data.requires_grad_(True)
|
|
217
|
+
|
|
218
|
+
# Forward pass
|
|
219
|
+
output = self.model(tensor_data)
|
|
220
|
+
activated_output = self._apply_activation(output)
|
|
221
|
+
|
|
222
|
+
# Determine target for gradient
|
|
223
|
+
if self.task == "classification":
|
|
224
|
+
if target_class is None:
|
|
225
|
+
target_class = output.argmax(dim=-1)
|
|
226
|
+
elif isinstance(target_class, int):
|
|
227
|
+
target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
|
|
228
|
+
|
|
229
|
+
# Select target class scores for gradient
|
|
230
|
+
target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
|
|
231
|
+
else:
|
|
232
|
+
# Regression: gradient w.r.t. output
|
|
233
|
+
target_scores = output.squeeze()
|
|
234
|
+
|
|
235
|
+
# Backward pass
|
|
236
|
+
if target_scores.dim() == 0:
|
|
237
|
+
target_scores.backward()
|
|
238
|
+
else:
|
|
239
|
+
target_scores.sum().backward()
|
|
240
|
+
|
|
241
|
+
gradients = tensor_data.grad
|
|
242
|
+
|
|
243
|
+
return (
|
|
244
|
+
self._to_numpy(activated_output),
|
|
245
|
+
self._to_numpy(gradients)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def get_layer_output(
|
|
249
|
+
self,
|
|
250
|
+
data: np.ndarray,
|
|
251
|
+
layer_name: str
|
|
252
|
+
) -> np.ndarray:
|
|
253
|
+
"""
|
|
254
|
+
Get intermediate layer activations.
|
|
255
|
+
|
|
256
|
+
Useful for methods like GradCAM that need feature map activations.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
data: Input data as numpy array.
|
|
260
|
+
layer_name: Name of the layer to extract (as registered in model).
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Layer activations as numpy array.
|
|
264
|
+
"""
|
|
265
|
+
data = np.array(data)
|
|
266
|
+
if data.ndim == 1:
|
|
267
|
+
data = data.reshape(1, -1)
|
|
268
|
+
|
|
269
|
+
activations = {}
|
|
270
|
+
|
|
271
|
+
def hook_fn(module, input, output):
|
|
272
|
+
activations['output'] = output
|
|
273
|
+
|
|
274
|
+
# Find and hook the layer
|
|
275
|
+
layer = dict(self.model.named_modules()).get(layer_name)
|
|
276
|
+
if layer is None:
|
|
277
|
+
available = list(dict(self.model.named_modules()).keys())
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"Layer '{layer_name}' not found. Available layers: {available}"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
handle = layer.register_forward_hook(hook_fn)
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
with torch.no_grad():
|
|
286
|
+
tensor_data = self._to_tensor(data)
|
|
287
|
+
_ = self.model(tensor_data)
|
|
288
|
+
finally:
|
|
289
|
+
handle.remove()
|
|
290
|
+
|
|
291
|
+
return self._to_numpy(activations['output'])
|
|
292
|
+
|
|
293
|
+
def get_layer_gradients(
|
|
294
|
+
self,
|
|
295
|
+
data: np.ndarray,
|
|
296
|
+
layer_name: str,
|
|
297
|
+
target_class: Optional[int] = None
|
|
298
|
+
) -> tuple:
|
|
299
|
+
"""
|
|
300
|
+
Get gradients of output w.r.t. a specific layer's activations.
|
|
301
|
+
|
|
302
|
+
Essential for GradCAM and similar visualization methods.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
data: Input data as numpy array.
|
|
306
|
+
layer_name: Name of the layer for gradient computation.
|
|
307
|
+
target_class: Target class for gradient (classification).
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Tuple of (layer_activations, layer_gradients) as numpy arrays.
|
|
311
|
+
"""
|
|
312
|
+
data = np.array(data)
|
|
313
|
+
if data.ndim == 1:
|
|
314
|
+
data = data.reshape(1, -1)
|
|
315
|
+
|
|
316
|
+
activations = {}
|
|
317
|
+
gradients = {}
|
|
318
|
+
|
|
319
|
+
def forward_hook(module, input, output):
|
|
320
|
+
activations['output'] = output
|
|
321
|
+
|
|
322
|
+
def backward_hook(module, grad_input, grad_output):
|
|
323
|
+
gradients['output'] = grad_output[0]
|
|
324
|
+
|
|
325
|
+
# Find and hook the layer
|
|
326
|
+
layer = dict(self.model.named_modules()).get(layer_name)
|
|
327
|
+
if layer is None:
|
|
328
|
+
available = list(dict(self.model.named_modules()).keys())
|
|
329
|
+
raise ValueError(
|
|
330
|
+
f"Layer '{layer_name}' not found. Available layers: {available}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
forward_handle = layer.register_forward_hook(forward_hook)
|
|
334
|
+
backward_handle = layer.register_full_backward_hook(backward_hook)
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
tensor_data = self._to_tensor(data)
|
|
338
|
+
tensor_data.requires_grad_(True)
|
|
339
|
+
|
|
340
|
+
output = self.model(tensor_data)
|
|
341
|
+
|
|
342
|
+
if self.task == "classification":
|
|
343
|
+
if target_class is None:
|
|
344
|
+
target_class = output.argmax(dim=-1)
|
|
345
|
+
elif isinstance(target_class, int):
|
|
346
|
+
target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
|
|
347
|
+
|
|
348
|
+
target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
|
|
349
|
+
else:
|
|
350
|
+
target_scores = output.squeeze()
|
|
351
|
+
|
|
352
|
+
if target_scores.dim() == 0:
|
|
353
|
+
target_scores.backward()
|
|
354
|
+
else:
|
|
355
|
+
target_scores.sum().backward()
|
|
356
|
+
finally:
|
|
357
|
+
forward_handle.remove()
|
|
358
|
+
backward_handle.remove()
|
|
359
|
+
|
|
360
|
+
return (
|
|
361
|
+
self._to_numpy(activations['output']),
|
|
362
|
+
self._to_numpy(gradients['output'])
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def list_layers(self) -> List[str]:
|
|
366
|
+
"""
|
|
367
|
+
List all named layers/modules in the model.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
List of layer names that can be used with get_layer_output/gradients.
|
|
371
|
+
"""
|
|
372
|
+
return [name for name, _ in self.model.named_modules() if name]
|
|
373
|
+
|
|
374
|
+
def to(self, device: str) -> "PyTorchAdapter":
|
|
375
|
+
"""
|
|
376
|
+
Move the model to a different device.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
device: Target device ("cpu", "cuda", "cuda:0", etc.)
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Self for chaining.
|
|
383
|
+
"""
|
|
384
|
+
self.device = torch.device(device)
|
|
385
|
+
self.model = self.model.to(self.device)
|
|
386
|
+
return self
|
|
387
|
+
|
|
388
|
+
def train_mode(self) -> "PyTorchAdapter":
|
|
389
|
+
"""Set model to training mode (enables dropout, batchnorm updates)."""
|
|
390
|
+
self.model.train()
|
|
391
|
+
return self
|
|
392
|
+
|
|
393
|
+
def eval_mode(self) -> "PyTorchAdapter":
|
|
394
|
+
"""Set model to evaluation mode (disables dropout, freezes batchnorm)."""
|
|
395
|
+
self.model.eval()
|
|
396
|
+
return self
|
explainiverse/core/registry.py
CHANGED
|
@@ -369,6 +369,7 @@ def _create_default_registry() -> ExplainerRegistry:
|
|
|
369
369
|
from explainiverse.explainers.global_explainers.ale import ALEExplainer
|
|
370
370
|
from explainiverse.explainers.global_explainers.sage import SAGEExplainer
|
|
371
371
|
from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
|
|
372
|
+
from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
|
|
372
373
|
|
|
373
374
|
registry = ExplainerRegistry()
|
|
374
375
|
|
|
@@ -461,6 +462,23 @@ def _create_default_registry() -> ExplainerRegistry:
|
|
|
461
462
|
)
|
|
462
463
|
)
|
|
463
464
|
|
|
465
|
+
# Register Integrated Gradients (for neural networks)
|
|
466
|
+
registry.register(
|
|
467
|
+
name="integrated_gradients",
|
|
468
|
+
explainer_class=IntegratedGradientsExplainer,
|
|
469
|
+
meta=ExplainerMeta(
|
|
470
|
+
scope="local",
|
|
471
|
+
model_types=["neural"],
|
|
472
|
+
data_types=["tabular", "image"],
|
|
473
|
+
task_types=["classification", "regression"],
|
|
474
|
+
description="Integrated Gradients - axiomatic attributions for neural networks (requires PyTorch)",
|
|
475
|
+
paper_reference="Sundararajan et al., 2017 - 'Axiomatic Attribution for Deep Networks' (ICML)",
|
|
476
|
+
complexity="O(n_steps * forward_pass)",
|
|
477
|
+
requires_training_data=False,
|
|
478
|
+
supports_batching=True
|
|
479
|
+
)
|
|
480
|
+
)
|
|
481
|
+
|
|
464
482
|
# =========================================================================
|
|
465
483
|
# Global Explainers (model-level)
|
|
466
484
|
# =========================================================================
|
|
@@ -8,6 +8,7 @@ Local Explainers (instance-level):
|
|
|
8
8
|
- TreeSHAP: Optimized exact SHAP for tree-based models
|
|
9
9
|
- Anchors: High-precision rule-based explanations
|
|
10
10
|
- Counterfactual: Diverse counterfactual explanations
|
|
11
|
+
- Integrated Gradients: Gradient-based attributions for neural networks
|
|
11
12
|
|
|
12
13
|
Global Explainers (model-level):
|
|
13
14
|
- Permutation Importance: Feature importance via permutation
|
|
@@ -25,6 +26,7 @@ from explainiverse.explainers.global_explainers.permutation_importance import Pe
|
|
|
25
26
|
from explainiverse.explainers.global_explainers.partial_dependence import PartialDependenceExplainer
|
|
26
27
|
from explainiverse.explainers.global_explainers.ale import ALEExplainer
|
|
27
28
|
from explainiverse.explainers.global_explainers.sage import SAGEExplainer
|
|
29
|
+
from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
|
|
28
30
|
|
|
29
31
|
__all__ = [
|
|
30
32
|
# Local explainers
|
|
@@ -33,6 +35,7 @@ __all__ = [
|
|
|
33
35
|
"TreeShapExplainer",
|
|
34
36
|
"AnchorsExplainer",
|
|
35
37
|
"CounterfactualExplainer",
|
|
38
|
+
"IntegratedGradientsExplainer",
|
|
36
39
|
# Global explainers
|
|
37
40
|
"PermutationImportanceExplainer",
|
|
38
41
|
"PartialDependenceExplainer",
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# src/explainiverse/explainers/gradient/__init__.py
|
|
2
|
+
"""
|
|
3
|
+
Gradient-based explainers for neural networks.
|
|
4
|
+
|
|
5
|
+
These explainers require models that support gradient computation,
|
|
6
|
+
typically via the PyTorchAdapter.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
|
|
10
|
+
|
|
11
|
+
__all__ = ["IntegratedGradientsExplainer"]
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
# src/explainiverse/explainers/gradient/integrated_gradients.py
|
|
2
|
+
"""
|
|
3
|
+
Integrated Gradients - Axiomatic Attribution for Deep Networks.
|
|
4
|
+
|
|
5
|
+
Integrated Gradients computes feature attributions by accumulating gradients
|
|
6
|
+
along a straight-line path from a baseline to the input. It satisfies two
|
|
7
|
+
key axioms:
|
|
8
|
+
- Sensitivity: If a feature differs between input and baseline and changes
|
|
9
|
+
the prediction, it receives non-zero attribution.
|
|
10
|
+
- Implementation Invariance: Attributions are identical for functionally
|
|
11
|
+
equivalent networks.
|
|
12
|
+
|
|
13
|
+
Reference:
|
|
14
|
+
Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic Attribution for
|
|
15
|
+
Deep Networks. ICML 2017. https://arxiv.org/abs/1703.01365
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
from explainiverse.explainers.gradient import IntegratedGradientsExplainer
|
|
19
|
+
from explainiverse.adapters import PyTorchAdapter
|
|
20
|
+
|
|
21
|
+
adapter = PyTorchAdapter(model, task="classification")
|
|
22
|
+
|
|
23
|
+
explainer = IntegratedGradientsExplainer(
|
|
24
|
+
model=adapter,
|
|
25
|
+
feature_names=feature_names,
|
|
26
|
+
n_steps=50
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
explanation = explainer.explain(instance)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import numpy as np
|
|
33
|
+
from typing import List, Optional, Union, Callable
|
|
34
|
+
|
|
35
|
+
from explainiverse.core.explainer import BaseExplainer
|
|
36
|
+
from explainiverse.core.explanation import Explanation
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class IntegratedGradientsExplainer(BaseExplainer):
|
|
40
|
+
"""
|
|
41
|
+
Integrated Gradients explainer for neural networks.
|
|
42
|
+
|
|
43
|
+
Computes attributions by integrating gradients along the path from
|
|
44
|
+
a baseline (default: zero vector) to the input. The integral is
|
|
45
|
+
approximated using the Riemann sum.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
model: Model adapter with predict_with_gradients() method
|
|
49
|
+
feature_names: List of feature names
|
|
50
|
+
class_names: List of class names (for classification)
|
|
51
|
+
n_steps: Number of steps for integral approximation
|
|
52
|
+
baseline: Baseline input (default: zeros)
|
|
53
|
+
method: Integration method ("riemann_middle", "riemann_left", "riemann_right", "riemann_trapezoid")
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
model,
|
|
59
|
+
feature_names: List[str],
|
|
60
|
+
class_names: Optional[List[str]] = None,
|
|
61
|
+
n_steps: int = 50,
|
|
62
|
+
baseline: Optional[np.ndarray] = None,
|
|
63
|
+
method: str = "riemann_middle"
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Initialize the Integrated Gradients explainer.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
model: A model adapter with predict_with_gradients() method.
|
|
70
|
+
Use PyTorchAdapter for PyTorch models.
|
|
71
|
+
feature_names: List of input feature names.
|
|
72
|
+
class_names: List of class names (for classification tasks).
|
|
73
|
+
n_steps: Number of steps for approximating the integral.
|
|
74
|
+
More steps = more accurate but slower. Default: 50.
|
|
75
|
+
baseline: Baseline input for comparison. If None, uses zeros.
|
|
76
|
+
Can also be "random" for random baseline or a callable.
|
|
77
|
+
method: Integration method:
|
|
78
|
+
- "riemann_middle": Middle Riemann sum (default, most accurate)
|
|
79
|
+
- "riemann_left": Left Riemann sum
|
|
80
|
+
- "riemann_right": Right Riemann sum
|
|
81
|
+
- "riemann_trapezoid": Trapezoidal rule
|
|
82
|
+
"""
|
|
83
|
+
super().__init__(model)
|
|
84
|
+
|
|
85
|
+
# Validate model has gradient capability
|
|
86
|
+
if not hasattr(model, 'predict_with_gradients'):
|
|
87
|
+
raise TypeError(
|
|
88
|
+
"Model adapter must have predict_with_gradients() method. "
|
|
89
|
+
"Use PyTorchAdapter for PyTorch models."
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self.feature_names = list(feature_names)
|
|
93
|
+
self.class_names = list(class_names) if class_names else None
|
|
94
|
+
self.n_steps = n_steps
|
|
95
|
+
self.baseline = baseline
|
|
96
|
+
self.method = method
|
|
97
|
+
|
|
98
|
+
def _get_baseline(self, instance: np.ndarray) -> np.ndarray:
|
|
99
|
+
"""Get the baseline for a given input shape."""
|
|
100
|
+
if self.baseline is None:
|
|
101
|
+
# Default: zero baseline
|
|
102
|
+
return np.zeros_like(instance)
|
|
103
|
+
elif isinstance(self.baseline, str) and self.baseline == "random":
|
|
104
|
+
# Random baseline (useful for images)
|
|
105
|
+
return np.random.uniform(
|
|
106
|
+
low=instance.min(),
|
|
107
|
+
high=instance.max(),
|
|
108
|
+
size=instance.shape
|
|
109
|
+
).astype(instance.dtype)
|
|
110
|
+
elif callable(self.baseline):
|
|
111
|
+
return self.baseline(instance)
|
|
112
|
+
else:
|
|
113
|
+
return np.array(self.baseline).reshape(instance.shape)
|
|
114
|
+
|
|
115
|
+
def _get_interpolation_alphas(self) -> np.ndarray:
|
|
116
|
+
"""Get interpolation points based on method."""
|
|
117
|
+
if self.method == "riemann_left":
|
|
118
|
+
return np.linspace(0, 1 - 1/self.n_steps, self.n_steps)
|
|
119
|
+
elif self.method == "riemann_right":
|
|
120
|
+
return np.linspace(1/self.n_steps, 1, self.n_steps)
|
|
121
|
+
elif self.method == "riemann_middle":
|
|
122
|
+
return np.linspace(0.5/self.n_steps, 1 - 0.5/self.n_steps, self.n_steps)
|
|
123
|
+
elif self.method == "riemann_trapezoid":
|
|
124
|
+
return np.linspace(0, 1, self.n_steps + 1)
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError(f"Unknown method: {self.method}")
|
|
127
|
+
|
|
128
|
+
def _compute_integrated_gradients(
|
|
129
|
+
self,
|
|
130
|
+
instance: np.ndarray,
|
|
131
|
+
baseline: np.ndarray,
|
|
132
|
+
target_class: Optional[int] = None
|
|
133
|
+
) -> np.ndarray:
|
|
134
|
+
"""
|
|
135
|
+
Compute integrated gradients for a single instance.
|
|
136
|
+
|
|
137
|
+
The integral is approximated as:
|
|
138
|
+
IG_i = (x_i - x'_i) * sum_{k=1}^{m} grad_i(x' + k/m * (x - x')) / m
|
|
139
|
+
|
|
140
|
+
where x is the input, x' is the baseline, and m is n_steps.
|
|
141
|
+
"""
|
|
142
|
+
# Get interpolation points
|
|
143
|
+
alphas = self._get_interpolation_alphas()
|
|
144
|
+
|
|
145
|
+
# Compute path from baseline to input
|
|
146
|
+
# Shape: (n_steps, n_features)
|
|
147
|
+
delta = instance - baseline
|
|
148
|
+
interpolated_inputs = baseline + alphas[:, np.newaxis] * delta
|
|
149
|
+
|
|
150
|
+
# Compute gradients at each interpolation point
|
|
151
|
+
all_gradients = []
|
|
152
|
+
for interp_input in interpolated_inputs:
|
|
153
|
+
_, gradients = self.model.predict_with_gradients(
|
|
154
|
+
interp_input.reshape(1, -1),
|
|
155
|
+
target_class=target_class
|
|
156
|
+
)
|
|
157
|
+
all_gradients.append(gradients.flatten())
|
|
158
|
+
|
|
159
|
+
all_gradients = np.array(all_gradients) # Shape: (n_steps, n_features)
|
|
160
|
+
|
|
161
|
+
# Approximate the integral
|
|
162
|
+
if self.method == "riemann_trapezoid":
|
|
163
|
+
# Trapezoidal rule: (f(0) + 2*f(1) + ... + 2*f(n-1) + f(n)) / (2n)
|
|
164
|
+
weights = np.ones(self.n_steps + 1)
|
|
165
|
+
weights[0] = 0.5
|
|
166
|
+
weights[-1] = 0.5
|
|
167
|
+
avg_gradients = np.average(all_gradients, axis=0, weights=weights)
|
|
168
|
+
else:
|
|
169
|
+
# Standard Riemann sum: average of gradients
|
|
170
|
+
avg_gradients = np.mean(all_gradients, axis=0)
|
|
171
|
+
|
|
172
|
+
# Scale by input - baseline difference
|
|
173
|
+
integrated_gradients = delta * avg_gradients
|
|
174
|
+
|
|
175
|
+
return integrated_gradients
|
|
176
|
+
|
|
177
|
+
def explain(
|
|
178
|
+
self,
|
|
179
|
+
instance: np.ndarray,
|
|
180
|
+
target_class: Optional[int] = None,
|
|
181
|
+
baseline: Optional[np.ndarray] = None,
|
|
182
|
+
return_convergence_delta: bool = False
|
|
183
|
+
) -> Explanation:
|
|
184
|
+
"""
|
|
185
|
+
Generate Integrated Gradients explanation for an instance.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
instance: 1D numpy array of input features.
|
|
189
|
+
target_class: For classification, which class to explain.
|
|
190
|
+
If None, uses the predicted class.
|
|
191
|
+
baseline: Override the default baseline for this explanation.
|
|
192
|
+
return_convergence_delta: If True, include the convergence delta
|
|
193
|
+
(difference between sum of attributions
|
|
194
|
+
and prediction difference).
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Explanation object with feature attributions.
|
|
198
|
+
"""
|
|
199
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
200
|
+
|
|
201
|
+
# Get baseline
|
|
202
|
+
if baseline is not None:
|
|
203
|
+
bl = np.array(baseline).flatten().astype(np.float32)
|
|
204
|
+
else:
|
|
205
|
+
bl = self._get_baseline(instance)
|
|
206
|
+
|
|
207
|
+
# Determine target class if not specified
|
|
208
|
+
if target_class is None and self.class_names:
|
|
209
|
+
predictions = self.model.predict(instance.reshape(1, -1))
|
|
210
|
+
target_class = int(np.argmax(predictions))
|
|
211
|
+
|
|
212
|
+
# Compute integrated gradients
|
|
213
|
+
ig_attributions = self._compute_integrated_gradients(
|
|
214
|
+
instance, bl, target_class
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Build attributions dict
|
|
218
|
+
attributions = {
|
|
219
|
+
fname: float(ig_attributions[i])
|
|
220
|
+
for i, fname in enumerate(self.feature_names)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# Determine class name
|
|
224
|
+
if self.class_names and target_class is not None:
|
|
225
|
+
label_name = self.class_names[target_class]
|
|
226
|
+
else:
|
|
227
|
+
label_name = f"class_{target_class}" if target_class is not None else "output"
|
|
228
|
+
|
|
229
|
+
explanation_data = {
|
|
230
|
+
"feature_attributions": attributions,
|
|
231
|
+
"attributions_raw": ig_attributions.tolist(),
|
|
232
|
+
"baseline": bl.tolist(),
|
|
233
|
+
"n_steps": self.n_steps,
|
|
234
|
+
"method": self.method
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# Optionally compute convergence delta
|
|
238
|
+
if return_convergence_delta:
|
|
239
|
+
# The sum of attributions should equal F(x) - F(baseline)
|
|
240
|
+
pred_input = self.model.predict(instance.reshape(1, -1))
|
|
241
|
+
pred_baseline = self.model.predict(bl.reshape(1, -1))
|
|
242
|
+
|
|
243
|
+
if target_class is not None:
|
|
244
|
+
pred_diff = pred_input[0, target_class] - pred_baseline[0, target_class]
|
|
245
|
+
else:
|
|
246
|
+
pred_diff = pred_input[0, 0] - pred_baseline[0, 0]
|
|
247
|
+
|
|
248
|
+
attribution_sum = np.sum(ig_attributions)
|
|
249
|
+
convergence_delta = abs(pred_diff - attribution_sum)
|
|
250
|
+
|
|
251
|
+
explanation_data["convergence_delta"] = float(convergence_delta)
|
|
252
|
+
explanation_data["prediction_difference"] = float(pred_diff)
|
|
253
|
+
explanation_data["attribution_sum"] = float(attribution_sum)
|
|
254
|
+
|
|
255
|
+
return Explanation(
|
|
256
|
+
explainer_name="IntegratedGradients",
|
|
257
|
+
target_class=label_name,
|
|
258
|
+
explanation_data=explanation_data
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def explain_batch(
|
|
262
|
+
self,
|
|
263
|
+
X: np.ndarray,
|
|
264
|
+
target_class: Optional[int] = None
|
|
265
|
+
) -> List[Explanation]:
|
|
266
|
+
"""
|
|
267
|
+
Generate explanations for multiple instances.
|
|
268
|
+
|
|
269
|
+
Note: This is not optimized for batching - it processes
|
|
270
|
+
instances sequentially. For large batches, consider using
|
|
271
|
+
the batched gradient computation in a custom implementation.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
X: 2D numpy array of instances (n_samples, n_features).
|
|
275
|
+
target_class: Target class for all instances.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
List of Explanation objects.
|
|
279
|
+
"""
|
|
280
|
+
X = np.array(X)
|
|
281
|
+
if X.ndim == 1:
|
|
282
|
+
X = X.reshape(1, -1)
|
|
283
|
+
|
|
284
|
+
return [
|
|
285
|
+
self.explain(X[i], target_class=target_class)
|
|
286
|
+
for i in range(X.shape[0])
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
def compute_attributions_with_noise(
|
|
290
|
+
self,
|
|
291
|
+
instance: np.ndarray,
|
|
292
|
+
target_class: Optional[int] = None,
|
|
293
|
+
n_samples: int = 5,
|
|
294
|
+
noise_scale: float = 0.1
|
|
295
|
+
) -> Explanation:
|
|
296
|
+
"""
|
|
297
|
+
Compute attributions averaged over noisy baselines (SmoothGrad-style).
|
|
298
|
+
|
|
299
|
+
This can help reduce noise in the attributions by averaging over
|
|
300
|
+
multiple baselines sampled around the zero baseline.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
instance: Input instance.
|
|
304
|
+
target_class: Target class for attribution.
|
|
305
|
+
n_samples: Number of noisy baselines to average.
|
|
306
|
+
noise_scale: Standard deviation of Gaussian noise.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Explanation with averaged attributions.
|
|
310
|
+
"""
|
|
311
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
312
|
+
|
|
313
|
+
all_attributions = []
|
|
314
|
+
for _ in range(n_samples):
|
|
315
|
+
# Create noisy baseline
|
|
316
|
+
noise = np.random.normal(0, noise_scale, instance.shape).astype(np.float32)
|
|
317
|
+
noisy_baseline = noise # Noise around zero
|
|
318
|
+
|
|
319
|
+
ig = self._compute_integrated_gradients(
|
|
320
|
+
instance, noisy_baseline, target_class
|
|
321
|
+
)
|
|
322
|
+
all_attributions.append(ig)
|
|
323
|
+
|
|
324
|
+
# Average attributions
|
|
325
|
+
avg_attributions = np.mean(all_attributions, axis=0)
|
|
326
|
+
std_attributions = np.std(all_attributions, axis=0)
|
|
327
|
+
|
|
328
|
+
attributions = {
|
|
329
|
+
fname: float(avg_attributions[i])
|
|
330
|
+
for i, fname in enumerate(self.feature_names)
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if self.class_names and target_class is not None:
|
|
334
|
+
label_name = self.class_names[target_class]
|
|
335
|
+
else:
|
|
336
|
+
label_name = f"class_{target_class}" if target_class is not None else "output"
|
|
337
|
+
|
|
338
|
+
return Explanation(
|
|
339
|
+
explainer_name="IntegratedGradients_Smooth",
|
|
340
|
+
target_class=label_name,
|
|
341
|
+
explanation_data={
|
|
342
|
+
"feature_attributions": attributions,
|
|
343
|
+
"attributions_raw": avg_attributions.tolist(),
|
|
344
|
+
"attributions_std": std_attributions.tolist(),
|
|
345
|
+
"n_samples": n_samples,
|
|
346
|
+
"noise_scale": noise_scale
|
|
347
|
+
}
|
|
348
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: explainiverse
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
|
|
5
5
|
Home-page: https://github.com/jemsbhai/explainiverse
|
|
6
6
|
License: MIT
|
|
@@ -17,11 +17,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Provides-Extra: torch
|
|
20
21
|
Requires-Dist: lime (>=0.2.0.1,<0.3.0.0)
|
|
21
22
|
Requires-Dist: numpy (>=1.24,<2.0)
|
|
22
23
|
Requires-Dist: scikit-learn (>=1.1,<1.6)
|
|
23
24
|
Requires-Dist: scipy (>=1.10,<2.0)
|
|
24
25
|
Requires-Dist: shap (>=0.48.0,<0.49.0)
|
|
26
|
+
Requires-Dist: torch (>=2.0) ; extra == "torch"
|
|
25
27
|
Requires-Dist: xgboost (>=1.7,<3.0)
|
|
26
28
|
Project-URL: Repository, https://github.com/jemsbhai/explainiverse
|
|
27
29
|
Description-Content-Type: text/markdown
|
|
@@ -29,7 +31,7 @@ Description-Content-Type: text/markdown
|
|
|
29
31
|
# Explainiverse
|
|
30
32
|
|
|
31
33
|
**Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI).
|
|
32
|
-
It provides a standardized interface for model-agnostic explainability with
|
|
34
|
+
It provides a standardized interface for model-agnostic explainability with 10 state-of-the-art XAI methods, evaluation metrics, and a plugin registry for easy extensibility.
|
|
33
35
|
|
|
34
36
|
---
|
|
35
37
|
|
|
@@ -40,6 +42,8 @@ It provides a standardized interface for model-agnostic explainability with 8 st
|
|
|
40
42
|
**Local Explainers** (instance-level explanations):
|
|
41
43
|
- **LIME** - Local Interpretable Model-agnostic Explanations ([Ribeiro et al., 2016](https://arxiv.org/abs/1602.04938))
|
|
42
44
|
- **SHAP** - SHapley Additive exPlanations via KernelSHAP ([Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874))
|
|
45
|
+
- **TreeSHAP** - Exact SHAP values for tree models, 10x+ faster ([Lundberg et al., 2018](https://arxiv.org/abs/1802.03888))
|
|
46
|
+
- **Integrated Gradients** - Axiomatic attributions for neural networks ([Sundararajan et al., 2017](https://arxiv.org/abs/1703.01365))
|
|
43
47
|
- **Anchors** - High-precision rule-based explanations ([Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491))
|
|
44
48
|
- **Counterfactual** - DiCE-style diverse counterfactual explanations ([Mothilal et al., 2020](https://arxiv.org/abs/1905.07697))
|
|
45
49
|
|
|
@@ -62,7 +66,7 @@ It provides a standardized interface for model-agnostic explainability with 8 st
|
|
|
62
66
|
### 🧪 Standardized Interface
|
|
63
67
|
- Consistent `BaseExplainer` API
|
|
64
68
|
- Unified `Explanation` output format
|
|
65
|
-
- Model adapters for sklearn and
|
|
69
|
+
- Model adapters for sklearn and PyTorch
|
|
66
70
|
|
|
67
71
|
---
|
|
68
72
|
|
|
@@ -74,6 +78,12 @@ From PyPI:
|
|
|
74
78
|
pip install explainiverse
|
|
75
79
|
```
|
|
76
80
|
|
|
81
|
+
With PyTorch support (for neural network explanations):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install explainiverse[torch]
|
|
85
|
+
```
|
|
86
|
+
|
|
77
87
|
For development:
|
|
78
88
|
|
|
79
89
|
```bash
|
|
@@ -100,7 +110,7 @@ adapter = SklearnAdapter(model, class_names=iris.target_names.tolist())
|
|
|
100
110
|
|
|
101
111
|
# List available explainers
|
|
102
112
|
print(default_registry.list_explainers())
|
|
103
|
-
# ['lime', 'shap', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
|
|
113
|
+
# ['lime', 'shap', 'treeshap', 'integrated_gradients', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
|
|
104
114
|
|
|
105
115
|
# Create and use an explainer
|
|
106
116
|
explainer = default_registry.create(
|
|
@@ -119,11 +129,11 @@ print(explanation.explanation_data["feature_attributions"])
|
|
|
119
129
|
```python
|
|
120
130
|
# Find local explainers for tabular data
|
|
121
131
|
local_tabular = default_registry.filter(scope="local", data_type="tabular")
|
|
122
|
-
print(local_tabular) # ['lime', 'shap', 'anchors', 'counterfactual']
|
|
132
|
+
print(local_tabular) # ['lime', 'shap', 'treeshap', 'integrated_gradients', 'anchors', 'counterfactual']
|
|
123
133
|
|
|
124
|
-
# Find
|
|
125
|
-
|
|
126
|
-
print(
|
|
134
|
+
# Find explainers optimized for tree models
|
|
135
|
+
tree_explainers = default_registry.filter(model_type="tree")
|
|
136
|
+
print(tree_explainers) # ['treeshap']
|
|
127
137
|
|
|
128
138
|
# Get recommendations
|
|
129
139
|
recommendations = default_registry.recommend(
|
|
@@ -133,6 +143,90 @@ recommendations = default_registry.recommend(
|
|
|
133
143
|
)
|
|
134
144
|
```
|
|
135
145
|
|
|
146
|
+
### TreeSHAP for Tree Models (10x+ Faster)
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from explainiverse.explainers import TreeShapExplainer
|
|
150
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
151
|
+
|
|
152
|
+
# Train a tree-based model
|
|
153
|
+
model = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
|
|
154
|
+
|
|
155
|
+
# TreeSHAP works directly with the model (no adapter needed)
|
|
156
|
+
explainer = TreeShapExplainer(
|
|
157
|
+
model=model,
|
|
158
|
+
feature_names=feature_names,
|
|
159
|
+
class_names=class_names
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Single instance explanation
|
|
163
|
+
explanation = explainer.explain(X_test[0])
|
|
164
|
+
print(explanation.explanation_data["feature_attributions"])
|
|
165
|
+
|
|
166
|
+
# Batch explanations (efficient)
|
|
167
|
+
explanations = explainer.explain_batch(X_test[:10])
|
|
168
|
+
|
|
169
|
+
# Feature interactions
|
|
170
|
+
interactions = explainer.explain_interactions(X_test[0])
|
|
171
|
+
print(interactions.explanation_data["interaction_matrix"])
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### PyTorch Adapter for Neural Networks
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from explainiverse import PyTorchAdapter
|
|
178
|
+
import torch.nn as nn
|
|
179
|
+
|
|
180
|
+
# Define a PyTorch model
|
|
181
|
+
model = nn.Sequential(
|
|
182
|
+
nn.Linear(10, 64),
|
|
183
|
+
nn.ReLU(),
|
|
184
|
+
nn.Linear(64, 3)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Wrap with adapter
|
|
188
|
+
adapter = PyTorchAdapter(
|
|
189
|
+
model,
|
|
190
|
+
task="classification",
|
|
191
|
+
class_names=["cat", "dog", "bird"]
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Use with any explainer
|
|
195
|
+
predictions = adapter.predict(X) # Returns numpy array
|
|
196
|
+
|
|
197
|
+
# Get gradients for attribution methods
|
|
198
|
+
predictions, gradients = adapter.predict_with_gradients(X)
|
|
199
|
+
|
|
200
|
+
# Access intermediate layers
|
|
201
|
+
activations = adapter.get_layer_output(X, layer_name="0")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Integrated Gradients for Neural Networks
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from explainiverse.explainers import IntegratedGradientsExplainer
|
|
208
|
+
from explainiverse import PyTorchAdapter
|
|
209
|
+
|
|
210
|
+
# Wrap your PyTorch model
|
|
211
|
+
adapter = PyTorchAdapter(model, task="classification", class_names=class_names)
|
|
212
|
+
|
|
213
|
+
# Create IG explainer
|
|
214
|
+
explainer = IntegratedGradientsExplainer(
|
|
215
|
+
model=adapter,
|
|
216
|
+
feature_names=feature_names,
|
|
217
|
+
class_names=class_names,
|
|
218
|
+
n_steps=50 # More steps = more accurate
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Explain a prediction
|
|
222
|
+
explanation = explainer.explain(X_test[0])
|
|
223
|
+
print(explanation.explanation_data["feature_attributions"])
|
|
224
|
+
|
|
225
|
+
# Check convergence (sum of attributions ≈ F(x) - F(baseline))
|
|
226
|
+
explanation = explainer.explain(X_test[0], return_convergence_delta=True)
|
|
227
|
+
print(f"Convergence delta: {explanation.explanation_data['convergence_delta']}")
|
|
228
|
+
```
|
|
229
|
+
|
|
136
230
|
### Using Specific Explainers
|
|
137
231
|
|
|
138
232
|
```python
|
|
@@ -233,12 +327,14 @@ poetry run pytest tests/test_new_explainers.py -v
|
|
|
233
327
|
## Roadmap
|
|
234
328
|
|
|
235
329
|
- [x] LIME, SHAP (KernelSHAP)
|
|
330
|
+
- [x] TreeSHAP (optimized for tree models) ✅
|
|
236
331
|
- [x] Anchors, Counterfactuals
|
|
237
332
|
- [x] Permutation Importance, PDP, ALE, SAGE
|
|
238
333
|
- [x] Explainer Registry with filtering
|
|
239
|
-
- [
|
|
240
|
-
- [
|
|
241
|
-
- [ ]
|
|
334
|
+
- [x] PyTorch Adapter ✅
|
|
335
|
+
- [x] Integrated Gradients ✅ NEW
|
|
336
|
+
- [ ] GradCAM for CNNs
|
|
337
|
+
- [ ] TensorFlow adapter
|
|
242
338
|
- [ ] Interactive visualization dashboard
|
|
243
339
|
|
|
244
340
|
---
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
explainiverse/__init__.py,sha256=
|
|
2
|
-
explainiverse/adapters/__init__.py,sha256=
|
|
1
|
+
explainiverse/__init__.py,sha256=NmrLPOGZZPZTq1vY0G4gid5ZJWxsVGd3CfTXVIDvjaQ,1612
|
|
2
|
+
explainiverse/adapters/__init__.py,sha256=HcQGISyp-YQ4jEj2IYveX_c9X5otLcTNWRnVRRhzRik,781
|
|
3
3
|
explainiverse/adapters/base_adapter.py,sha256=Nqt0GeDn_-PjTyJcZsE8dRTulavqFQsv8sMYWS_ps-M,603
|
|
4
|
+
explainiverse/adapters/pytorch_adapter.py,sha256=GTilJAR1VF_OgWG88qZoqlqefHaSXB3i9iOwCJkyHTg,13318
|
|
4
5
|
explainiverse/adapters/sklearn_adapter.py,sha256=pzIBtMuqrG-6ZbUqUCMt7rSk3Ow0FgrY268FSweFvw4,958
|
|
5
6
|
explainiverse/core/__init__.py,sha256=P3jHMnH5coFqTTO1w-gT-rurkCM1-9r3pF-055pbXMg,474
|
|
6
7
|
explainiverse/core/explainer.py,sha256=Z9on-9VblYDlQx9oBm1BHpmAf_NsQajZ3qr-u48Aejo,784
|
|
7
8
|
explainiverse/core/explanation.py,sha256=6zxFh_TH8tFHc-r_H5-WHQ05Sp1Kp2TxLz3gyFek5jo,881
|
|
8
|
-
explainiverse/core/registry.py,sha256=
|
|
9
|
+
explainiverse/core/registry.py,sha256=AC8XDIdX2IGyx0KkmDajAjdo5YsrM3dcKvYoQu1vNCk,20711
|
|
9
10
|
explainiverse/engine/__init__.py,sha256=1sZO8nH1mmwK2e-KUavBQm7zYDWUe27nyWoFy9tgsiA,197
|
|
10
11
|
explainiverse/engine/suite.py,sha256=sq8SK_6Pf0qRckTmVJ7Mdosu9bhkjAGPGN8ymLGFP9E,4914
|
|
11
12
|
explainiverse/evaluation/__init__.py,sha256=Y50L_b4HKthg4epwcayPHXh0l4i4MUuzvaNlqPmUNZY,212
|
|
12
13
|
explainiverse/evaluation/metrics.py,sha256=tSBXtyA_-0zOGCGjlPZU6LdGKRH_QpWfgKa78sdlovs,7453
|
|
13
|
-
explainiverse/explainers/__init__.py,sha256=
|
|
14
|
+
explainiverse/explainers/__init__.py,sha256=3yhamu1E2hpb0vE_hg3xK621YJdZYcy7gsSGgCT4Km4,1962
|
|
14
15
|
explainiverse/explainers/attribution/__init__.py,sha256=YeVs9bS_IWDtqGbp6T37V6Zp5ZDWzLdAXHxxyFGpiQM,431
|
|
15
16
|
explainiverse/explainers/attribution/lime_wrapper.py,sha256=OnXIV7t6yd-vt38sIi7XmHFbgzlZfCEbRlFyGGd5XiE,3245
|
|
16
17
|
explainiverse/explainers/attribution/shap_wrapper.py,sha256=tKie5AvN7mb55PWOYdMvW0lUAYjfHPzYosEloEY2ZzI,3210
|
|
@@ -22,9 +23,11 @@ explainiverse/explainers/global_explainers/ale.py,sha256=tgG3XTppCf8LiD7uKzBt4DI
|
|
|
22
23
|
explainiverse/explainers/global_explainers/partial_dependence.py,sha256=dH6yMjpwZads3pACR3rSykTbssLGHH7e6HfMlpl-S3I,6745
|
|
23
24
|
explainiverse/explainers/global_explainers/permutation_importance.py,sha256=bcgKz1S_D3lrBMgpqEF_Z6qw8Knxl_cfR50hrSO2tBc,4410
|
|
24
25
|
explainiverse/explainers/global_explainers/sage.py,sha256=57Xw1SK529x5JXWt0TVrcFYUUP3C65LfUwgoM-Z3gaw,5839
|
|
26
|
+
explainiverse/explainers/gradient/__init__.py,sha256=Z4uSZcBhnHGp7DCd7bhcIMj_3f_uuCFw5AGA1JX6myQ,350
|
|
27
|
+
explainiverse/explainers/gradient/integrated_gradients.py,sha256=feBgY3Vw2rDti7fxRZtLkxse75m2dbP_R05ARqo2BRM,13367
|
|
25
28
|
explainiverse/explainers/rule_based/__init__.py,sha256=gKzlFCAzwurAMLJcuYgal4XhDj1thteBGcaHWmN7iWk,243
|
|
26
29
|
explainiverse/explainers/rule_based/anchors_wrapper.py,sha256=ML7W6aam-eMGZHy5ilol8qupZvNBJpYAFatEEPnuMyo,13254
|
|
27
|
-
explainiverse-0.2.
|
|
28
|
-
explainiverse-0.2.
|
|
29
|
-
explainiverse-0.2.
|
|
30
|
-
explainiverse-0.2.
|
|
30
|
+
explainiverse-0.2.3.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
|
|
31
|
+
explainiverse-0.2.3.dist-info/METADATA,sha256=TGuHUB9HZEcTbkQ7vmXl6ygm9arV5tlzufCHMoFqmdk,10465
|
|
32
|
+
explainiverse-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
33
|
+
explainiverse-0.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|