explainiverse 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
explainiverse/__init__.py CHANGED
@@ -2,8 +2,9 @@
2
2
  """
3
3
  Explainiverse - A unified, extensible explainability framework.
4
4
 
5
- Supports multiple XAI methods including LIME, SHAP, Anchors, Counterfactuals,
6
- Permutation Importance, PDP, ALE, and SAGE through a consistent interface.
5
+ Supports multiple XAI methods including LIME, SHAP, TreeSHAP, Anchors,
6
+ Counterfactuals, Permutation Importance, PDP, ALE, and SAGE through a
7
+ consistent interface.
7
8
 
8
9
  Quick Start:
9
10
  from explainiverse import default_registry
@@ -14,6 +15,10 @@ Quick Start:
14
15
  # Create an explainer
15
16
  explainer = default_registry.create("lime", model=adapter, training_data=X, ...)
16
17
  explanation = explainer.explain(instance)
18
+
19
+ For PyTorch models:
20
+ from explainiverse import PyTorchAdapter # Requires torch
21
+ adapter = PyTorchAdapter(model, task="classification")
17
22
  """
18
23
 
19
24
  from explainiverse.core.explainer import BaseExplainer
@@ -25,9 +30,10 @@ from explainiverse.core.registry import (
25
30
  get_default_registry,
26
31
  )
27
32
  from explainiverse.adapters.sklearn_adapter import SklearnAdapter
33
+ from explainiverse.adapters import TORCH_AVAILABLE
28
34
  from explainiverse.engine.suite import ExplanationSuite
29
35
 
30
- __version__ = "0.2.1"
36
+ __version__ = "0.2.3"
31
37
 
32
38
  __all__ = [
33
39
  # Core
@@ -40,6 +46,12 @@ __all__ = [
40
46
  "get_default_registry",
41
47
  # Adapters
42
48
  "SklearnAdapter",
49
+ "TORCH_AVAILABLE",
43
50
  # Engine
44
51
  "ExplanationSuite",
45
52
  ]
53
+
54
+ # Conditionally export PyTorchAdapter if torch is available
55
+ if TORCH_AVAILABLE:
56
+ from explainiverse.adapters import PyTorchAdapter
57
+ __all__.append("PyTorchAdapter")
@@ -1,9 +1,19 @@
1
1
  # src/explainiverse/adapters/__init__.py
2
2
  """
3
3
  Model adapters - wrappers that provide a consistent interface for different ML frameworks.
4
+
5
+ Available adapters:
6
+ - SklearnAdapter: For scikit-learn models (always available)
7
+ - PyTorchAdapter: For PyTorch nn.Module models (requires torch)
4
8
  """
5
9
 
6
10
  from explainiverse.adapters.base_adapter import BaseModelAdapter
7
11
  from explainiverse.adapters.sklearn_adapter import SklearnAdapter
8
12
 
9
- __all__ = ["BaseModelAdapter", "SklearnAdapter"]
13
+ # Conditionally import PyTorchAdapter if torch is available
14
+ try:
15
+ from explainiverse.adapters.pytorch_adapter import PyTorchAdapter, TORCH_AVAILABLE
16
+ __all__ = ["BaseModelAdapter", "SklearnAdapter", "PyTorchAdapter", "TORCH_AVAILABLE"]
17
+ except ImportError:
18
+ TORCH_AVAILABLE = False
19
+ __all__ = ["BaseModelAdapter", "SklearnAdapter", "TORCH_AVAILABLE"]
@@ -0,0 +1,396 @@
1
+ # src/explainiverse/adapters/pytorch_adapter.py
2
+ """
3
+ PyTorch Model Adapter for Explainiverse.
4
+
5
+ Provides a unified interface for PyTorch neural networks, enabling
6
+ compatibility with all explainers in the framework.
7
+
8
+ Example:
9
+ import torch.nn as nn
10
+ from explainiverse.adapters import PyTorchAdapter
11
+
12
+ model = nn.Sequential(
13
+ nn.Linear(10, 64),
14
+ nn.ReLU(),
15
+ nn.Linear(64, 3)
16
+ )
17
+
18
+ adapter = PyTorchAdapter(
19
+ model,
20
+ task="classification",
21
+ class_names=["cat", "dog", "bird"]
22
+ )
23
+
24
+ probs = adapter.predict(X) # Returns numpy array
25
+ """
26
+
27
+ import numpy as np
28
+ from typing import List, Optional, Union, Callable
29
+
30
+ from .base_adapter import BaseModelAdapter
31
+
32
+ # Check if PyTorch is available
33
+ try:
34
+ import torch
35
+ import torch.nn as nn
36
+ TORCH_AVAILABLE = True
37
+ except ImportError:
38
+ TORCH_AVAILABLE = False
39
+ torch = None
40
+ nn = None
41
+
42
+
43
+ def _check_torch_available():
44
+ """Raise ImportError if PyTorch is not installed."""
45
+ if not TORCH_AVAILABLE:
46
+ raise ImportError(
47
+ "PyTorch is required for PyTorchAdapter. "
48
+ "Install it with: pip install torch"
49
+ )
50
+
51
+
52
+ class PyTorchAdapter(BaseModelAdapter):
53
+ """
54
+ Adapter for PyTorch neural network models.
55
+
56
+ Wraps a PyTorch nn.Module to provide a consistent interface for
57
+ explainability methods. Handles device management, tensor/numpy
58
+ conversions, and supports both classification and regression tasks.
59
+
60
+ Attributes:
61
+ model: The PyTorch model (nn.Module)
62
+ task: "classification" or "regression"
63
+ device: torch.device for computation
64
+ class_names: List of class names (for classification)
65
+ feature_names: List of feature names
66
+ output_activation: Optional activation function for outputs
67
+
68
+ Example:
69
+ >>> model = MyNeuralNetwork()
70
+ >>> adapter = PyTorchAdapter(model, task="classification")
71
+ >>> probs = adapter.predict(X_numpy) # Returns probabilities
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ model,
77
+ task: str = "classification",
78
+ feature_names: Optional[List[str]] = None,
79
+ class_names: Optional[List[str]] = None,
80
+ device: Optional[str] = None,
81
+ output_activation: Optional[str] = "auto",
82
+ batch_size: int = 32
83
+ ):
84
+ """
85
+ Initialize the PyTorch adapter.
86
+
87
+ Args:
88
+ model: A PyTorch nn.Module model.
89
+ task: "classification" or "regression".
90
+ feature_names: List of input feature names.
91
+ class_names: List of output class names (classification only).
92
+ device: Device to run on ("cpu", "cuda", "cuda:0", etc.).
93
+ If None, auto-detects based on model parameters.
94
+ output_activation: Activation for output layer:
95
+ - "auto": softmax for classification, none for regression
96
+ - "softmax": Apply softmax (classification)
97
+ - "sigmoid": Apply sigmoid (binary classification)
98
+ - "none" or None: No activation (raw logits/values)
99
+ batch_size: Batch size for large inputs (default: 32).
100
+ """
101
+ _check_torch_available()
102
+
103
+ if not isinstance(model, nn.Module):
104
+ raise TypeError(
105
+ f"Expected nn.Module, got {type(model).__name__}. "
106
+ "For sklearn models, use SklearnAdapter instead."
107
+ )
108
+
109
+ super().__init__(model, feature_names)
110
+
111
+ self.task = task
112
+ self.class_names = list(class_names) if class_names else None
113
+ self.batch_size = batch_size
114
+
115
+ # Determine device
116
+ if device is not None:
117
+ self.device = torch.device(device)
118
+ else:
119
+ # Auto-detect from model parameters
120
+ try:
121
+ param = next(model.parameters())
122
+ self.device = param.device
123
+ except StopIteration:
124
+ # Model has no parameters, use CPU
125
+ self.device = torch.device("cpu")
126
+
127
+ # Move model to device and set to eval mode
128
+ self.model = model.to(self.device)
129
+ self.model.eval()
130
+
131
+ # Configure output activation
132
+ if output_activation == "auto":
133
+ if task == "classification":
134
+ self.output_activation = "softmax"
135
+ else:
136
+ self.output_activation = None
137
+ else:
138
+ self.output_activation = output_activation if output_activation != "none" else None
139
+
140
+ def _to_tensor(self, data: np.ndarray) -> "torch.Tensor":
141
+ """Convert numpy array to tensor on the correct device."""
142
+ if isinstance(data, torch.Tensor):
143
+ return data.to(self.device).float()
144
+ return torch.tensor(data, dtype=torch.float32, device=self.device)
145
+
146
+ def _to_numpy(self, tensor: "torch.Tensor") -> np.ndarray:
147
+ """Convert tensor to numpy array."""
148
+ return tensor.detach().cpu().numpy()
149
+
150
+ def _apply_activation(self, output: "torch.Tensor") -> "torch.Tensor":
151
+ """Apply output activation function."""
152
+ if self.output_activation == "softmax":
153
+ return torch.softmax(output, dim=-1)
154
+ elif self.output_activation == "sigmoid":
155
+ return torch.sigmoid(output)
156
+ return output
157
+
158
+ def predict(self, data: np.ndarray) -> np.ndarray:
159
+ """
160
+ Generate predictions for input data.
161
+
162
+ Args:
163
+ data: Input data as numpy array. Shape: (n_samples, n_features)
164
+ or (n_samples, channels, height, width) for images.
165
+
166
+ Returns:
167
+ Predictions as numpy array:
168
+ - Classification: probabilities of shape (n_samples, n_classes)
169
+ - Regression: values of shape (n_samples, n_outputs)
170
+ """
171
+ data = np.array(data)
172
+
173
+ # Handle single instance
174
+ if data.ndim == 1:
175
+ data = data.reshape(1, -1)
176
+
177
+ n_samples = data.shape[0]
178
+ outputs = []
179
+
180
+ with torch.no_grad():
181
+ for i in range(0, n_samples, self.batch_size):
182
+ batch = data[i:i + self.batch_size]
183
+ tensor_batch = self._to_tensor(batch)
184
+
185
+ output = self.model(tensor_batch)
186
+ output = self._apply_activation(output)
187
+ outputs.append(self._to_numpy(output))
188
+
189
+ return np.vstack(outputs)
190
+
191
+ def predict_with_gradients(
192
+ self,
193
+ data: np.ndarray,
194
+ target_class: Optional[int] = None
195
+ ) -> tuple:
196
+ """
197
+ Generate predictions and compute gradients w.r.t. inputs.
198
+
199
+ This is essential for gradient-based attribution methods like
200
+ Integrated Gradients, GradCAM, and Saliency Maps.
201
+
202
+ Args:
203
+ data: Input data as numpy array.
204
+ target_class: Class index for gradient computation.
205
+ If None, uses the predicted class.
206
+
207
+ Returns:
208
+ Tuple of (predictions, gradients) as numpy arrays.
209
+ """
210
+ data = np.array(data)
211
+ if data.ndim == 1:
212
+ data = data.reshape(1, -1)
213
+
214
+ # Convert to tensor with gradient tracking
215
+ tensor_data = self._to_tensor(data)
216
+ tensor_data.requires_grad_(True)
217
+
218
+ # Forward pass
219
+ output = self.model(tensor_data)
220
+ activated_output = self._apply_activation(output)
221
+
222
+ # Determine target for gradient
223
+ if self.task == "classification":
224
+ if target_class is None:
225
+ target_class = output.argmax(dim=-1)
226
+ elif isinstance(target_class, int):
227
+ target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
228
+
229
+ # Select target class scores for gradient
230
+ target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
231
+ else:
232
+ # Regression: gradient w.r.t. output
233
+ target_scores = output.squeeze()
234
+
235
+ # Backward pass
236
+ if target_scores.dim() == 0:
237
+ target_scores.backward()
238
+ else:
239
+ target_scores.sum().backward()
240
+
241
+ gradients = tensor_data.grad
242
+
243
+ return (
244
+ self._to_numpy(activated_output),
245
+ self._to_numpy(gradients)
246
+ )
247
+
248
+ def get_layer_output(
249
+ self,
250
+ data: np.ndarray,
251
+ layer_name: str
252
+ ) -> np.ndarray:
253
+ """
254
+ Get intermediate layer activations.
255
+
256
+ Useful for methods like GradCAM that need feature map activations.
257
+
258
+ Args:
259
+ data: Input data as numpy array.
260
+ layer_name: Name of the layer to extract (as registered in model).
261
+
262
+ Returns:
263
+ Layer activations as numpy array.
264
+ """
265
+ data = np.array(data)
266
+ if data.ndim == 1:
267
+ data = data.reshape(1, -1)
268
+
269
+ activations = {}
270
+
271
+ def hook_fn(module, input, output):
272
+ activations['output'] = output
273
+
274
+ # Find and hook the layer
275
+ layer = dict(self.model.named_modules()).get(layer_name)
276
+ if layer is None:
277
+ available = list(dict(self.model.named_modules()).keys())
278
+ raise ValueError(
279
+ f"Layer '{layer_name}' not found. Available layers: {available}"
280
+ )
281
+
282
+ handle = layer.register_forward_hook(hook_fn)
283
+
284
+ try:
285
+ with torch.no_grad():
286
+ tensor_data = self._to_tensor(data)
287
+ _ = self.model(tensor_data)
288
+ finally:
289
+ handle.remove()
290
+
291
+ return self._to_numpy(activations['output'])
292
+
293
+ def get_layer_gradients(
294
+ self,
295
+ data: np.ndarray,
296
+ layer_name: str,
297
+ target_class: Optional[int] = None
298
+ ) -> tuple:
299
+ """
300
+ Get gradients of output w.r.t. a specific layer's activations.
301
+
302
+ Essential for GradCAM and similar visualization methods.
303
+
304
+ Args:
305
+ data: Input data as numpy array.
306
+ layer_name: Name of the layer for gradient computation.
307
+ target_class: Target class for gradient (classification).
308
+
309
+ Returns:
310
+ Tuple of (layer_activations, layer_gradients) as numpy arrays.
311
+ """
312
+ data = np.array(data)
313
+ if data.ndim == 1:
314
+ data = data.reshape(1, -1)
315
+
316
+ activations = {}
317
+ gradients = {}
318
+
319
+ def forward_hook(module, input, output):
320
+ activations['output'] = output
321
+
322
+ def backward_hook(module, grad_input, grad_output):
323
+ gradients['output'] = grad_output[0]
324
+
325
+ # Find and hook the layer
326
+ layer = dict(self.model.named_modules()).get(layer_name)
327
+ if layer is None:
328
+ available = list(dict(self.model.named_modules()).keys())
329
+ raise ValueError(
330
+ f"Layer '{layer_name}' not found. Available layers: {available}"
331
+ )
332
+
333
+ forward_handle = layer.register_forward_hook(forward_hook)
334
+ backward_handle = layer.register_full_backward_hook(backward_hook)
335
+
336
+ try:
337
+ tensor_data = self._to_tensor(data)
338
+ tensor_data.requires_grad_(True)
339
+
340
+ output = self.model(tensor_data)
341
+
342
+ if self.task == "classification":
343
+ if target_class is None:
344
+ target_class = output.argmax(dim=-1)
345
+ elif isinstance(target_class, int):
346
+ target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
347
+
348
+ target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
349
+ else:
350
+ target_scores = output.squeeze()
351
+
352
+ if target_scores.dim() == 0:
353
+ target_scores.backward()
354
+ else:
355
+ target_scores.sum().backward()
356
+ finally:
357
+ forward_handle.remove()
358
+ backward_handle.remove()
359
+
360
+ return (
361
+ self._to_numpy(activations['output']),
362
+ self._to_numpy(gradients['output'])
363
+ )
364
+
365
+ def list_layers(self) -> List[str]:
366
+ """
367
+ List all named layers/modules in the model.
368
+
369
+ Returns:
370
+ List of layer names that can be used with get_layer_output/gradients.
371
+ """
372
+ return [name for name, _ in self.model.named_modules() if name]
373
+
374
+ def to(self, device: str) -> "PyTorchAdapter":
375
+ """
376
+ Move the model to a different device.
377
+
378
+ Args:
379
+ device: Target device ("cpu", "cuda", "cuda:0", etc.)
380
+
381
+ Returns:
382
+ Self for chaining.
383
+ """
384
+ self.device = torch.device(device)
385
+ self.model = self.model.to(self.device)
386
+ return self
387
+
388
+ def train_mode(self) -> "PyTorchAdapter":
389
+ """Set model to training mode (enables dropout, batchnorm updates)."""
390
+ self.model.train()
391
+ return self
392
+
393
+ def eval_mode(self) -> "PyTorchAdapter":
394
+ """Set model to evaluation mode (disables dropout, freezes batchnorm)."""
395
+ self.model.eval()
396
+ return self
@@ -369,6 +369,7 @@ def _create_default_registry() -> ExplainerRegistry:
369
369
  from explainiverse.explainers.global_explainers.ale import ALEExplainer
370
370
  from explainiverse.explainers.global_explainers.sage import SAGEExplainer
371
371
  from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
372
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
372
373
 
373
374
  registry = ExplainerRegistry()
374
375
 
@@ -461,6 +462,23 @@ def _create_default_registry() -> ExplainerRegistry:
461
462
  )
462
463
  )
463
464
 
465
+ # Register Integrated Gradients (for neural networks)
466
+ registry.register(
467
+ name="integrated_gradients",
468
+ explainer_class=IntegratedGradientsExplainer,
469
+ meta=ExplainerMeta(
470
+ scope="local",
471
+ model_types=["neural"],
472
+ data_types=["tabular", "image"],
473
+ task_types=["classification", "regression"],
474
+ description="Integrated Gradients - axiomatic attributions for neural networks (requires PyTorch)",
475
+ paper_reference="Sundararajan et al., 2017 - 'Axiomatic Attribution for Deep Networks' (ICML)",
476
+ complexity="O(n_steps * forward_pass)",
477
+ requires_training_data=False,
478
+ supports_batching=True
479
+ )
480
+ )
481
+
464
482
  # =========================================================================
465
483
  # Global Explainers (model-level)
466
484
  # =========================================================================
@@ -8,6 +8,7 @@ Local Explainers (instance-level):
8
8
  - TreeSHAP: Optimized exact SHAP for tree-based models
9
9
  - Anchors: High-precision rule-based explanations
10
10
  - Counterfactual: Diverse counterfactual explanations
11
+ - Integrated Gradients: Gradient-based attributions for neural networks
11
12
 
12
13
  Global Explainers (model-level):
13
14
  - Permutation Importance: Feature importance via permutation
@@ -25,6 +26,7 @@ from explainiverse.explainers.global_explainers.permutation_importance import Pe
25
26
  from explainiverse.explainers.global_explainers.partial_dependence import PartialDependenceExplainer
26
27
  from explainiverse.explainers.global_explainers.ale import ALEExplainer
27
28
  from explainiverse.explainers.global_explainers.sage import SAGEExplainer
29
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
28
30
 
29
31
  __all__ = [
30
32
  # Local explainers
@@ -33,6 +35,7 @@ __all__ = [
33
35
  "TreeShapExplainer",
34
36
  "AnchorsExplainer",
35
37
  "CounterfactualExplainer",
38
+ "IntegratedGradientsExplainer",
36
39
  # Global explainers
37
40
  "PermutationImportanceExplainer",
38
41
  "PartialDependenceExplainer",
@@ -0,0 +1,11 @@
1
+ # src/explainiverse/explainers/gradient/__init__.py
2
+ """
3
+ Gradient-based explainers for neural networks.
4
+
5
+ These explainers require models that support gradient computation,
6
+ typically via the PyTorchAdapter.
7
+ """
8
+
9
+ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
10
+
11
+ __all__ = ["IntegratedGradientsExplainer"]
@@ -0,0 +1,348 @@
1
+ # src/explainiverse/explainers/gradient/integrated_gradients.py
2
+ """
3
+ Integrated Gradients - Axiomatic Attribution for Deep Networks.
4
+
5
+ Integrated Gradients computes feature attributions by accumulating gradients
6
+ along a straight-line path from a baseline to the input. It satisfies two
7
+ key axioms:
8
+ - Sensitivity: If a feature differs between input and baseline and changes
9
+ the prediction, it receives non-zero attribution.
10
+ - Implementation Invariance: Attributions are identical for functionally
11
+ equivalent networks.
12
+
13
+ Reference:
14
+ Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic Attribution for
15
+ Deep Networks. ICML 2017. https://arxiv.org/abs/1703.01365
16
+
17
+ Example:
18
+ from explainiverse.explainers.gradient import IntegratedGradientsExplainer
19
+ from explainiverse.adapters import PyTorchAdapter
20
+
21
+ adapter = PyTorchAdapter(model, task="classification")
22
+
23
+ explainer = IntegratedGradientsExplainer(
24
+ model=adapter,
25
+ feature_names=feature_names,
26
+ n_steps=50
27
+ )
28
+
29
+ explanation = explainer.explain(instance)
30
+ """
31
+
32
+ import numpy as np
33
+ from typing import List, Optional, Union, Callable
34
+
35
+ from explainiverse.core.explainer import BaseExplainer
36
+ from explainiverse.core.explanation import Explanation
37
+
38
+
39
+ class IntegratedGradientsExplainer(BaseExplainer):
40
+ """
41
+ Integrated Gradients explainer for neural networks.
42
+
43
+ Computes attributions by integrating gradients along the path from
44
+ a baseline (default: zero vector) to the input. The integral is
45
+ approximated using the Riemann sum.
46
+
47
+ Attributes:
48
+ model: Model adapter with predict_with_gradients() method
49
+ feature_names: List of feature names
50
+ class_names: List of class names (for classification)
51
+ n_steps: Number of steps for integral approximation
52
+ baseline: Baseline input (default: zeros)
53
+ method: Integration method ("riemann_middle", "riemann_left", "riemann_right", "riemann_trapezoid")
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ model,
59
+ feature_names: List[str],
60
+ class_names: Optional[List[str]] = None,
61
+ n_steps: int = 50,
62
+ baseline: Optional[np.ndarray] = None,
63
+ method: str = "riemann_middle"
64
+ ):
65
+ """
66
+ Initialize the Integrated Gradients explainer.
67
+
68
+ Args:
69
+ model: A model adapter with predict_with_gradients() method.
70
+ Use PyTorchAdapter for PyTorch models.
71
+ feature_names: List of input feature names.
72
+ class_names: List of class names (for classification tasks).
73
+ n_steps: Number of steps for approximating the integral.
74
+ More steps = more accurate but slower. Default: 50.
75
+ baseline: Baseline input for comparison. If None, uses zeros.
76
+ Can also be "random" for random baseline or a callable.
77
+ method: Integration method:
78
+ - "riemann_middle": Middle Riemann sum (default, most accurate)
79
+ - "riemann_left": Left Riemann sum
80
+ - "riemann_right": Right Riemann sum
81
+ - "riemann_trapezoid": Trapezoidal rule
82
+ """
83
+ super().__init__(model)
84
+
85
+ # Validate model has gradient capability
86
+ if not hasattr(model, 'predict_with_gradients'):
87
+ raise TypeError(
88
+ "Model adapter must have predict_with_gradients() method. "
89
+ "Use PyTorchAdapter for PyTorch models."
90
+ )
91
+
92
+ self.feature_names = list(feature_names)
93
+ self.class_names = list(class_names) if class_names else None
94
+ self.n_steps = n_steps
95
+ self.baseline = baseline
96
+ self.method = method
97
+
98
+ def _get_baseline(self, instance: np.ndarray) -> np.ndarray:
99
+ """Get the baseline for a given input shape."""
100
+ if self.baseline is None:
101
+ # Default: zero baseline
102
+ return np.zeros_like(instance)
103
+ elif isinstance(self.baseline, str) and self.baseline == "random":
104
+ # Random baseline (useful for images)
105
+ return np.random.uniform(
106
+ low=instance.min(),
107
+ high=instance.max(),
108
+ size=instance.shape
109
+ ).astype(instance.dtype)
110
+ elif callable(self.baseline):
111
+ return self.baseline(instance)
112
+ else:
113
+ return np.array(self.baseline).reshape(instance.shape)
114
+
115
+ def _get_interpolation_alphas(self) -> np.ndarray:
116
+ """Get interpolation points based on method."""
117
+ if self.method == "riemann_left":
118
+ return np.linspace(0, 1 - 1/self.n_steps, self.n_steps)
119
+ elif self.method == "riemann_right":
120
+ return np.linspace(1/self.n_steps, 1, self.n_steps)
121
+ elif self.method == "riemann_middle":
122
+ return np.linspace(0.5/self.n_steps, 1 - 0.5/self.n_steps, self.n_steps)
123
+ elif self.method == "riemann_trapezoid":
124
+ return np.linspace(0, 1, self.n_steps + 1)
125
+ else:
126
+ raise ValueError(f"Unknown method: {self.method}")
127
+
128
+ def _compute_integrated_gradients(
129
+ self,
130
+ instance: np.ndarray,
131
+ baseline: np.ndarray,
132
+ target_class: Optional[int] = None
133
+ ) -> np.ndarray:
134
+ """
135
+ Compute integrated gradients for a single instance.
136
+
137
+ The integral is approximated as:
138
+ IG_i = (x_i - x'_i) * sum_{k=1}^{m} grad_i(x' + k/m * (x - x')) / m
139
+
140
+ where x is the input, x' is the baseline, and m is n_steps.
141
+ """
142
+ # Get interpolation points
143
+ alphas = self._get_interpolation_alphas()
144
+
145
+ # Compute path from baseline to input
146
+ # Shape: (n_steps, n_features)
147
+ delta = instance - baseline
148
+ interpolated_inputs = baseline + alphas[:, np.newaxis] * delta
149
+
150
+ # Compute gradients at each interpolation point
151
+ all_gradients = []
152
+ for interp_input in interpolated_inputs:
153
+ _, gradients = self.model.predict_with_gradients(
154
+ interp_input.reshape(1, -1),
155
+ target_class=target_class
156
+ )
157
+ all_gradients.append(gradients.flatten())
158
+
159
+ all_gradients = np.array(all_gradients) # Shape: (n_steps, n_features)
160
+
161
+ # Approximate the integral
162
+ if self.method == "riemann_trapezoid":
163
+ # Trapezoidal rule: (f(0) + 2*f(1) + ... + 2*f(n-1) + f(n)) / (2n)
164
+ weights = np.ones(self.n_steps + 1)
165
+ weights[0] = 0.5
166
+ weights[-1] = 0.5
167
+ avg_gradients = np.average(all_gradients, axis=0, weights=weights)
168
+ else:
169
+ # Standard Riemann sum: average of gradients
170
+ avg_gradients = np.mean(all_gradients, axis=0)
171
+
172
+ # Scale by input - baseline difference
173
+ integrated_gradients = delta * avg_gradients
174
+
175
+ return integrated_gradients
176
+
177
+ def explain(
178
+ self,
179
+ instance: np.ndarray,
180
+ target_class: Optional[int] = None,
181
+ baseline: Optional[np.ndarray] = None,
182
+ return_convergence_delta: bool = False
183
+ ) -> Explanation:
184
+ """
185
+ Generate Integrated Gradients explanation for an instance.
186
+
187
+ Args:
188
+ instance: 1D numpy array of input features.
189
+ target_class: For classification, which class to explain.
190
+ If None, uses the predicted class.
191
+ baseline: Override the default baseline for this explanation.
192
+ return_convergence_delta: If True, include the convergence delta
193
+ (difference between sum of attributions
194
+ and prediction difference).
195
+
196
+ Returns:
197
+ Explanation object with feature attributions.
198
+ """
199
+ instance = np.array(instance).flatten().astype(np.float32)
200
+
201
+ # Get baseline
202
+ if baseline is not None:
203
+ bl = np.array(baseline).flatten().astype(np.float32)
204
+ else:
205
+ bl = self._get_baseline(instance)
206
+
207
+ # Determine target class if not specified
208
+ if target_class is None and self.class_names:
209
+ predictions = self.model.predict(instance.reshape(1, -1))
210
+ target_class = int(np.argmax(predictions))
211
+
212
+ # Compute integrated gradients
213
+ ig_attributions = self._compute_integrated_gradients(
214
+ instance, bl, target_class
215
+ )
216
+
217
+ # Build attributions dict
218
+ attributions = {
219
+ fname: float(ig_attributions[i])
220
+ for i, fname in enumerate(self.feature_names)
221
+ }
222
+
223
+ # Determine class name
224
+ if self.class_names and target_class is not None:
225
+ label_name = self.class_names[target_class]
226
+ else:
227
+ label_name = f"class_{target_class}" if target_class is not None else "output"
228
+
229
+ explanation_data = {
230
+ "feature_attributions": attributions,
231
+ "attributions_raw": ig_attributions.tolist(),
232
+ "baseline": bl.tolist(),
233
+ "n_steps": self.n_steps,
234
+ "method": self.method
235
+ }
236
+
237
+ # Optionally compute convergence delta
238
+ if return_convergence_delta:
239
+ # The sum of attributions should equal F(x) - F(baseline)
240
+ pred_input = self.model.predict(instance.reshape(1, -1))
241
+ pred_baseline = self.model.predict(bl.reshape(1, -1))
242
+
243
+ if target_class is not None:
244
+ pred_diff = pred_input[0, target_class] - pred_baseline[0, target_class]
245
+ else:
246
+ pred_diff = pred_input[0, 0] - pred_baseline[0, 0]
247
+
248
+ attribution_sum = np.sum(ig_attributions)
249
+ convergence_delta = abs(pred_diff - attribution_sum)
250
+
251
+ explanation_data["convergence_delta"] = float(convergence_delta)
252
+ explanation_data["prediction_difference"] = float(pred_diff)
253
+ explanation_data["attribution_sum"] = float(attribution_sum)
254
+
255
+ return Explanation(
256
+ explainer_name="IntegratedGradients",
257
+ target_class=label_name,
258
+ explanation_data=explanation_data
259
+ )
260
+
261
+ def explain_batch(
262
+ self,
263
+ X: np.ndarray,
264
+ target_class: Optional[int] = None
265
+ ) -> List[Explanation]:
266
+ """
267
+ Generate explanations for multiple instances.
268
+
269
+ Note: This is not optimized for batching - it processes
270
+ instances sequentially. For large batches, consider using
271
+ the batched gradient computation in a custom implementation.
272
+
273
+ Args:
274
+ X: 2D numpy array of instances (n_samples, n_features).
275
+ target_class: Target class for all instances.
276
+
277
+ Returns:
278
+ List of Explanation objects.
279
+ """
280
+ X = np.array(X)
281
+ if X.ndim == 1:
282
+ X = X.reshape(1, -1)
283
+
284
+ return [
285
+ self.explain(X[i], target_class=target_class)
286
+ for i in range(X.shape[0])
287
+ ]
288
+
289
+ def compute_attributions_with_noise(
290
+ self,
291
+ instance: np.ndarray,
292
+ target_class: Optional[int] = None,
293
+ n_samples: int = 5,
294
+ noise_scale: float = 0.1
295
+ ) -> Explanation:
296
+ """
297
+ Compute attributions averaged over noisy baselines (SmoothGrad-style).
298
+
299
+ This can help reduce noise in the attributions by averaging over
300
+ multiple baselines sampled around the zero baseline.
301
+
302
+ Args:
303
+ instance: Input instance.
304
+ target_class: Target class for attribution.
305
+ n_samples: Number of noisy baselines to average.
306
+ noise_scale: Standard deviation of Gaussian noise.
307
+
308
+ Returns:
309
+ Explanation with averaged attributions.
310
+ """
311
+ instance = np.array(instance).flatten().astype(np.float32)
312
+
313
+ all_attributions = []
314
+ for _ in range(n_samples):
315
+ # Create noisy baseline
316
+ noise = np.random.normal(0, noise_scale, instance.shape).astype(np.float32)
317
+ noisy_baseline = noise # Noise around zero
318
+
319
+ ig = self._compute_integrated_gradients(
320
+ instance, noisy_baseline, target_class
321
+ )
322
+ all_attributions.append(ig)
323
+
324
+ # Average attributions
325
+ avg_attributions = np.mean(all_attributions, axis=0)
326
+ std_attributions = np.std(all_attributions, axis=0)
327
+
328
+ attributions = {
329
+ fname: float(avg_attributions[i])
330
+ for i, fname in enumerate(self.feature_names)
331
+ }
332
+
333
+ if self.class_names and target_class is not None:
334
+ label_name = self.class_names[target_class]
335
+ else:
336
+ label_name = f"class_{target_class}" if target_class is not None else "output"
337
+
338
+ return Explanation(
339
+ explainer_name="IntegratedGradients_Smooth",
340
+ target_class=label_name,
341
+ explanation_data={
342
+ "feature_attributions": attributions,
343
+ "attributions_raw": avg_attributions.tolist(),
344
+ "attributions_std": std_attributions.tolist(),
345
+ "n_samples": n_samples,
346
+ "noise_scale": noise_scale
347
+ }
348
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -17,11 +17,13 @@ Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Provides-Extra: torch
20
21
  Requires-Dist: lime (>=0.2.0.1,<0.3.0.0)
21
22
  Requires-Dist: numpy (>=1.24,<2.0)
22
23
  Requires-Dist: scikit-learn (>=1.1,<1.6)
23
24
  Requires-Dist: scipy (>=1.10,<2.0)
24
25
  Requires-Dist: shap (>=0.48.0,<0.49.0)
26
+ Requires-Dist: torch (>=2.0) ; extra == "torch"
25
27
  Requires-Dist: xgboost (>=1.7,<3.0)
26
28
  Project-URL: Repository, https://github.com/jemsbhai/explainiverse
27
29
  Description-Content-Type: text/markdown
@@ -29,7 +31,7 @@ Description-Content-Type: text/markdown
29
31
  # Explainiverse
30
32
 
31
33
  **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI).
32
- It provides a standardized interface for model-agnostic explainability with 8 state-of-the-art XAI methods, evaluation metrics, and a plugin registry for easy extensibility.
34
+ It provides a standardized interface for model-agnostic explainability with 10 state-of-the-art XAI methods, evaluation metrics, and a plugin registry for easy extensibility.
33
35
 
34
36
  ---
35
37
 
@@ -40,6 +42,8 @@ It provides a standardized interface for model-agnostic explainability with 8 st
40
42
  **Local Explainers** (instance-level explanations):
41
43
  - **LIME** - Local Interpretable Model-agnostic Explanations ([Ribeiro et al., 2016](https://arxiv.org/abs/1602.04938))
42
44
  - **SHAP** - SHapley Additive exPlanations via KernelSHAP ([Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874))
45
+ - **TreeSHAP** - Exact SHAP values for tree models, 10x+ faster ([Lundberg et al., 2018](https://arxiv.org/abs/1802.03888))
46
+ - **Integrated Gradients** - Axiomatic attributions for neural networks ([Sundararajan et al., 2017](https://arxiv.org/abs/1703.01365))
43
47
  - **Anchors** - High-precision rule-based explanations ([Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491))
44
48
  - **Counterfactual** - DiCE-style diverse counterfactual explanations ([Mothilal et al., 2020](https://arxiv.org/abs/1905.07697))
45
49
 
@@ -62,7 +66,7 @@ It provides a standardized interface for model-agnostic explainability with 8 st
62
66
  ### 🧪 Standardized Interface
63
67
  - Consistent `BaseExplainer` API
64
68
  - Unified `Explanation` output format
65
- - Model adapters for sklearn and more
69
+ - Model adapters for sklearn and PyTorch
66
70
 
67
71
  ---
68
72
 
@@ -74,6 +78,12 @@ From PyPI:
74
78
  pip install explainiverse
75
79
  ```
76
80
 
81
+ With PyTorch support (for neural network explanations):
82
+
83
+ ```bash
84
+ pip install explainiverse[torch]
85
+ ```
86
+
77
87
  For development:
78
88
 
79
89
  ```bash
@@ -100,7 +110,7 @@ adapter = SklearnAdapter(model, class_names=iris.target_names.tolist())
100
110
 
101
111
  # List available explainers
102
112
  print(default_registry.list_explainers())
103
- # ['lime', 'shap', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
113
+ # ['lime', 'shap', 'treeshap', 'integrated_gradients', 'anchors', 'counterfactual', 'permutation_importance', 'partial_dependence', 'ale', 'sage']
104
114
 
105
115
  # Create and use an explainer
106
116
  explainer = default_registry.create(
@@ -119,11 +129,11 @@ print(explanation.explanation_data["feature_attributions"])
119
129
  ```python
120
130
  # Find local explainers for tabular data
121
131
  local_tabular = default_registry.filter(scope="local", data_type="tabular")
122
- print(local_tabular) # ['lime', 'shap', 'anchors', 'counterfactual']
132
+ print(local_tabular) # ['lime', 'shap', 'treeshap', 'integrated_gradients', 'anchors', 'counterfactual']
123
133
 
124
- # Find global explainers
125
- global_explainers = default_registry.filter(scope="global")
126
- print(global_explainers) # ['permutation_importance', 'partial_dependence', 'ale', 'sage']
134
+ # Find explainers optimized for tree models
135
+ tree_explainers = default_registry.filter(model_type="tree")
136
+ print(tree_explainers) # ['treeshap']
127
137
 
128
138
  # Get recommendations
129
139
  recommendations = default_registry.recommend(
@@ -133,6 +143,90 @@ recommendations = default_registry.recommend(
133
143
  )
134
144
  ```
135
145
 
146
+ ### TreeSHAP for Tree Models (10x+ Faster)
147
+
148
+ ```python
149
+ from explainiverse.explainers import TreeShapExplainer
150
+ from sklearn.ensemble import RandomForestClassifier
151
+
152
+ # Train a tree-based model
153
+ model = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
154
+
155
+ # TreeSHAP works directly with the model (no adapter needed)
156
+ explainer = TreeShapExplainer(
157
+ model=model,
158
+ feature_names=feature_names,
159
+ class_names=class_names
160
+ )
161
+
162
+ # Single instance explanation
163
+ explanation = explainer.explain(X_test[0])
164
+ print(explanation.explanation_data["feature_attributions"])
165
+
166
+ # Batch explanations (efficient)
167
+ explanations = explainer.explain_batch(X_test[:10])
168
+
169
+ # Feature interactions
170
+ interactions = explainer.explain_interactions(X_test[0])
171
+ print(interactions.explanation_data["interaction_matrix"])
172
+ ```
173
+
174
+ ### PyTorch Adapter for Neural Networks
175
+
176
+ ```python
177
+ from explainiverse import PyTorchAdapter
178
+ import torch.nn as nn
179
+
180
+ # Define a PyTorch model
181
+ model = nn.Sequential(
182
+ nn.Linear(10, 64),
183
+ nn.ReLU(),
184
+ nn.Linear(64, 3)
185
+ )
186
+
187
+ # Wrap with adapter
188
+ adapter = PyTorchAdapter(
189
+ model,
190
+ task="classification",
191
+ class_names=["cat", "dog", "bird"]
192
+ )
193
+
194
+ # Use with any explainer
195
+ predictions = adapter.predict(X) # Returns numpy array
196
+
197
+ # Get gradients for attribution methods
198
+ predictions, gradients = adapter.predict_with_gradients(X)
199
+
200
+ # Access intermediate layers
201
+ activations = adapter.get_layer_output(X, layer_name="0")
202
+ ```
203
+
204
+ ### Integrated Gradients for Neural Networks
205
+
206
+ ```python
207
+ from explainiverse.explainers import IntegratedGradientsExplainer
208
+ from explainiverse import PyTorchAdapter
209
+
210
+ # Wrap your PyTorch model
211
+ adapter = PyTorchAdapter(model, task="classification", class_names=class_names)
212
+
213
+ # Create IG explainer
214
+ explainer = IntegratedGradientsExplainer(
215
+ model=adapter,
216
+ feature_names=feature_names,
217
+ class_names=class_names,
218
+ n_steps=50 # More steps = more accurate
219
+ )
220
+
221
+ # Explain a prediction
222
+ explanation = explainer.explain(X_test[0])
223
+ print(explanation.explanation_data["feature_attributions"])
224
+
225
+ # Check convergence (sum of attributions ≈ F(x) - F(baseline))
226
+ explanation = explainer.explain(X_test[0], return_convergence_delta=True)
227
+ print(f"Convergence delta: {explanation.explanation_data['convergence_delta']}")
228
+ ```
229
+
136
230
  ### Using Specific Explainers
137
231
 
138
232
  ```python
@@ -233,12 +327,14 @@ poetry run pytest tests/test_new_explainers.py -v
233
327
  ## Roadmap
234
328
 
235
329
  - [x] LIME, SHAP (KernelSHAP)
330
+ - [x] TreeSHAP (optimized for tree models) ✅
236
331
  - [x] Anchors, Counterfactuals
237
332
  - [x] Permutation Importance, PDP, ALE, SAGE
238
333
  - [x] Explainer Registry with filtering
239
- - [ ] TreeSHAP (optimized for tree models)
240
- - [ ] Integrated Gradients (gradient-based for neural nets)
241
- - [ ] PyTorch/TensorFlow adapters
334
+ - [x] PyTorch Adapter
335
+ - [x] Integrated Gradients NEW
336
+ - [ ] GradCAM for CNNs
337
+ - [ ] TensorFlow adapter
242
338
  - [ ] Interactive visualization dashboard
243
339
 
244
340
  ---
@@ -1,16 +1,17 @@
1
- explainiverse/__init__.py,sha256=XXf936B0hTTgCgBHnBAGYcNvYsU31yQrYDDaEABk8kQ,1207
2
- explainiverse/adapters/__init__.py,sha256=fNlWQ0VDjNqi4G4lwaJRTtL0wGVgvEE-4pZt6vOOjYU,322
1
+ explainiverse/__init__.py,sha256=NmrLPOGZZPZTq1vY0G4gid5ZJWxsVGd3CfTXVIDvjaQ,1612
2
+ explainiverse/adapters/__init__.py,sha256=HcQGISyp-YQ4jEj2IYveX_c9X5otLcTNWRnVRRhzRik,781
3
3
  explainiverse/adapters/base_adapter.py,sha256=Nqt0GeDn_-PjTyJcZsE8dRTulavqFQsv8sMYWS_ps-M,603
4
+ explainiverse/adapters/pytorch_adapter.py,sha256=GTilJAR1VF_OgWG88qZoqlqefHaSXB3i9iOwCJkyHTg,13318
4
5
  explainiverse/adapters/sklearn_adapter.py,sha256=pzIBtMuqrG-6ZbUqUCMt7rSk3Ow0FgrY268FSweFvw4,958
5
6
  explainiverse/core/__init__.py,sha256=P3jHMnH5coFqTTO1w-gT-rurkCM1-9r3pF-055pbXMg,474
6
7
  explainiverse/core/explainer.py,sha256=Z9on-9VblYDlQx9oBm1BHpmAf_NsQajZ3qr-u48Aejo,784
7
8
  explainiverse/core/explanation.py,sha256=6zxFh_TH8tFHc-r_H5-WHQ05Sp1Kp2TxLz3gyFek5jo,881
8
- explainiverse/core/registry.py,sha256=_BXWi1fJY3cGjYA1Xn1DwvY91jbpJrpX6_8EVzrRT20,19876
9
+ explainiverse/core/registry.py,sha256=AC8XDIdX2IGyx0KkmDajAjdo5YsrM3dcKvYoQu1vNCk,20711
9
10
  explainiverse/engine/__init__.py,sha256=1sZO8nH1mmwK2e-KUavBQm7zYDWUe27nyWoFy9tgsiA,197
10
11
  explainiverse/engine/suite.py,sha256=sq8SK_6Pf0qRckTmVJ7Mdosu9bhkjAGPGN8ymLGFP9E,4914
11
12
  explainiverse/evaluation/__init__.py,sha256=Y50L_b4HKthg4epwcayPHXh0l4i4MUuzvaNlqPmUNZY,212
12
13
  explainiverse/evaluation/metrics.py,sha256=tSBXtyA_-0zOGCGjlPZU6LdGKRH_QpWfgKa78sdlovs,7453
13
- explainiverse/explainers/__init__.py,sha256=Op-Z_BTJ7BdqA_9gTnruomN2-rKtrkPCt1Zq1iCzxr0,1758
14
+ explainiverse/explainers/__init__.py,sha256=3yhamu1E2hpb0vE_hg3xK621YJdZYcy7gsSGgCT4Km4,1962
14
15
  explainiverse/explainers/attribution/__init__.py,sha256=YeVs9bS_IWDtqGbp6T37V6Zp5ZDWzLdAXHxxyFGpiQM,431
15
16
  explainiverse/explainers/attribution/lime_wrapper.py,sha256=OnXIV7t6yd-vt38sIi7XmHFbgzlZfCEbRlFyGGd5XiE,3245
16
17
  explainiverse/explainers/attribution/shap_wrapper.py,sha256=tKie5AvN7mb55PWOYdMvW0lUAYjfHPzYosEloEY2ZzI,3210
@@ -22,9 +23,11 @@ explainiverse/explainers/global_explainers/ale.py,sha256=tgG3XTppCf8LiD7uKzBt4DI
22
23
  explainiverse/explainers/global_explainers/partial_dependence.py,sha256=dH6yMjpwZads3pACR3rSykTbssLGHH7e6HfMlpl-S3I,6745
23
24
  explainiverse/explainers/global_explainers/permutation_importance.py,sha256=bcgKz1S_D3lrBMgpqEF_Z6qw8Knxl_cfR50hrSO2tBc,4410
24
25
  explainiverse/explainers/global_explainers/sage.py,sha256=57Xw1SK529x5JXWt0TVrcFYUUP3C65LfUwgoM-Z3gaw,5839
26
+ explainiverse/explainers/gradient/__init__.py,sha256=Z4uSZcBhnHGp7DCd7bhcIMj_3f_uuCFw5AGA1JX6myQ,350
27
+ explainiverse/explainers/gradient/integrated_gradients.py,sha256=feBgY3Vw2rDti7fxRZtLkxse75m2dbP_R05ARqo2BRM,13367
25
28
  explainiverse/explainers/rule_based/__init__.py,sha256=gKzlFCAzwurAMLJcuYgal4XhDj1thteBGcaHWmN7iWk,243
26
29
  explainiverse/explainers/rule_based/anchors_wrapper.py,sha256=ML7W6aam-eMGZHy5ilol8qupZvNBJpYAFatEEPnuMyo,13254
27
- explainiverse-0.2.1.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
28
- explainiverse-0.2.1.dist-info/METADATA,sha256=k6JKGA2LJZaxxPVj_7e4cP99PoFMPEU0jBAY-fk2m3U,7731
29
- explainiverse-0.2.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
- explainiverse-0.2.1.dist-info/RECORD,,
30
+ explainiverse-0.2.3.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
31
+ explainiverse-0.2.3.dist-info/METADATA,sha256=TGuHUB9HZEcTbkQ7vmXl6ygm9arV5tlzufCHMoFqmdk,10465
32
+ explainiverse-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
33
+ explainiverse-0.2.3.dist-info/RECORD,,