explainiverse 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
explainiverse/__init__.py CHANGED
@@ -33,7 +33,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
33
33
  from explainiverse.adapters import TORCH_AVAILABLE
34
34
  from explainiverse.engine.suite import ExplanationSuite
35
35
 
36
- __version__ = "0.2.4"
36
+ __version__ = "0.2.5"
37
37
 
38
38
  __all__ = [
39
39
  # Core
@@ -371,6 +371,7 @@ def _create_default_registry() -> ExplainerRegistry:
371
371
  from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
372
372
  from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
373
373
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
374
+ from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
374
375
 
375
376
  registry = ExplainerRegistry()
376
377
 
@@ -497,6 +498,40 @@ def _create_default_registry() -> ExplainerRegistry:
497
498
  )
498
499
  )
499
500
 
501
+ # Register DeepLIFT (for neural networks)
502
+ registry.register(
503
+ name="deeplift",
504
+ explainer_class=DeepLIFTExplainer,
505
+ meta=ExplainerMeta(
506
+ scope="local",
507
+ model_types=["neural"],
508
+ data_types=["tabular", "image"],
509
+ task_types=["classification", "regression"],
510
+ description="DeepLIFT - reference-based attributions via activation differences (requires PyTorch)",
511
+ paper_reference="Shrikumar et al., 2017 - 'Learning Important Features Through Propagating Activation Differences' (ICML)",
512
+ complexity="O(forward_pass + backward_pass)",
513
+ requires_training_data=False,
514
+ supports_batching=True
515
+ )
516
+ )
517
+
518
+ # Register DeepSHAP (DeepLIFT + SHAP)
519
+ registry.register(
520
+ name="deepshap",
521
+ explainer_class=DeepLIFTShapExplainer,
522
+ meta=ExplainerMeta(
523
+ scope="local",
524
+ model_types=["neural"],
525
+ data_types=["tabular", "image"],
526
+ task_types=["classification", "regression"],
527
+ description="DeepSHAP - DeepLIFT averaged over background samples for SHAP values (requires PyTorch)",
528
+ paper_reference="Lundberg & Lee, 2017 - combines DeepLIFT with SHAP",
529
+ complexity="O(n_background * forward_pass)",
530
+ requires_training_data=True,
531
+ supports_batching=True
532
+ )
533
+ )
534
+
500
535
  # =========================================================================
501
536
  # Global Explainers (model-level)
502
537
  # =========================================================================
@@ -28,6 +28,7 @@ from explainiverse.explainers.global_explainers.ale import ALEExplainer
28
28
  from explainiverse.explainers.global_explainers.sage import SAGEExplainer
29
29
  from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
30
30
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
31
+ from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
31
32
 
32
33
  __all__ = [
33
34
  # Local explainers
@@ -38,6 +39,8 @@ __all__ = [
38
39
  "CounterfactualExplainer",
39
40
  "IntegratedGradientsExplainer",
40
41
  "GradCAMExplainer",
42
+ "DeepLIFTExplainer",
43
+ "DeepLIFTShapExplainer",
41
44
  # Global explainers
42
45
  "PermutationImportanceExplainer",
43
46
  "PartialDependenceExplainer",
@@ -8,5 +8,11 @@ typically via the PyTorchAdapter.
8
8
 
9
9
  from explainiverse.explainers.gradient.integrated_gradients import IntegratedGradientsExplainer
10
10
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
11
+ from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
11
12
 
12
- __all__ = ["IntegratedGradientsExplainer", "GradCAMExplainer"]
13
+ __all__ = [
14
+ "IntegratedGradientsExplainer",
15
+ "GradCAMExplainer",
16
+ "DeepLIFTExplainer",
17
+ "DeepLIFTShapExplainer",
18
+ ]
@@ -0,0 +1,745 @@
1
+ # src/explainiverse/explainers/gradient/deeplift.py
2
+ """
3
+ DeepLIFT - Deep Learning Important FeaTures.
4
+
5
+ DeepLIFT explains the difference in output from some reference output
6
+ in terms of the difference of the input from some reference input.
7
+ Unlike standard gradients which show the effect of infinitesimal changes,
8
+ DeepLIFT considers the actual change in activations from a reference.
9
+
10
+ Key Properties:
11
+ - Summation-to-delta: Sum of attributions equals output - reference_output
12
+ - Handles saturation: Works correctly even when gradients are zero
13
+ - Fast: Requires only one forward and one backward pass (vs. many for IG)
14
+
15
+ Rules:
16
+ - Rescale Rule: Simple proportional attribution (default)
17
+ - RevealCancel Rule: Separates positive/negative contributions (advanced)
18
+
19
+ Reference:
20
+ Shrikumar, A., Greenside, P., & Kundaje, A. (2017). Learning Important
21
+ Features Through Propagating Activation Differences. ICML 2017.
22
+ https://arxiv.org/abs/1704.02685
23
+
24
+ Example:
25
+ from explainiverse.explainers.gradient import DeepLIFTExplainer
26
+ from explainiverse.adapters import PyTorchAdapter
27
+
28
+ adapter = PyTorchAdapter(model, task="classification")
29
+
30
+ explainer = DeepLIFTExplainer(
31
+ model=adapter,
32
+ feature_names=feature_names
33
+ )
34
+
35
+ explanation = explainer.explain(instance)
36
+ """
37
+
38
+ import numpy as np
39
+ from typing import List, Optional, Union, Callable, Tuple
40
+
41
+ from explainiverse.core.explainer import BaseExplainer
42
+ from explainiverse.core.explanation import Explanation
43
+
44
+
45
+ # Check if PyTorch is available
46
+ try:
47
+ import torch
48
+ import torch.nn as nn
49
+ TORCH_AVAILABLE = True
50
+ except ImportError:
51
+ TORCH_AVAILABLE = False
52
+ torch = None
53
+ nn = None
54
+
55
+
56
+ class DeepLIFTExplainer(BaseExplainer):
57
+ """
58
+ DeepLIFT explainer for neural networks.
59
+
60
+ Computes attributions by propagating the difference between the
61
+ network's output and a reference output back to the inputs, using
62
+ the difference between input activations and reference activations.
63
+
64
+ DeepLIFT is faster than Integrated Gradients (single forward/backward
65
+ pass) while providing similar quality attributions for most networks.
66
+
67
+ Attributes:
68
+ model: Model adapter with gradient computation capability
69
+ feature_names: List of feature names
70
+ class_names: List of class names (for classification)
71
+ baseline: Reference input for comparison
72
+ multiply_by_inputs: If True, multiply attributions by (input - baseline)
73
+ eps: Small constant to avoid division by zero
74
+
75
+ Example:
76
+ >>> explainer = DeepLIFTExplainer(adapter, feature_names)
77
+ >>> explanation = explainer.explain(instance)
78
+ >>> print(explanation.get_attributions())
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ model,
84
+ feature_names: List[str],
85
+ class_names: Optional[List[str]] = None,
86
+ baseline: Optional[Union[np.ndarray, str, Callable]] = None,
87
+ multiply_by_inputs: bool = True,
88
+ eps: float = 1e-10
89
+ ):
90
+ """
91
+ Initialize the DeepLIFT explainer.
92
+
93
+ Args:
94
+ model: A model adapter with predict_with_gradients() method.
95
+ Use PyTorchAdapter for PyTorch models.
96
+ feature_names: List of input feature names.
97
+ class_names: List of class names (for classification tasks).
98
+ baseline: Reference input for comparison:
99
+ - None: Use zeros (default)
100
+ - "random": Sample from uniform distribution
101
+ - "mean": Placeholder for dataset mean (set via set_baseline)
102
+ - np.ndarray: Specific baseline values
103
+ - Callable: Function that takes instance and returns baseline
104
+ multiply_by_inputs: If True (default), compute
105
+ (input - baseline) * multipliers. If False, return raw
106
+ multipliers (useful for debugging).
107
+ eps: Small constant to prevent division by zero in
108
+ multiplier computation. Default: 1e-10
109
+ """
110
+ super().__init__(model)
111
+
112
+ # Validate model has required methods
113
+ if not hasattr(model, 'predict_with_gradients'):
114
+ raise TypeError(
115
+ "Model adapter must have predict_with_gradients() method. "
116
+ "Use PyTorchAdapter for PyTorch models."
117
+ )
118
+
119
+ self.feature_names = list(feature_names)
120
+ self.class_names = list(class_names) if class_names else None
121
+ self.baseline = baseline
122
+ self.multiply_by_inputs = multiply_by_inputs
123
+ self.eps = eps
124
+
125
+ # For advanced usage: store reference for layer-wise computation
126
+ self._reference_activations = None
127
+
128
+ def _get_baseline(self, instance: np.ndarray) -> np.ndarray:
129
+ """
130
+ Get the baseline/reference input for a given instance.
131
+
132
+ Args:
133
+ instance: The input instance
134
+
135
+ Returns:
136
+ Baseline array with same shape as instance
137
+ """
138
+ if self.baseline is None:
139
+ # Default: zero baseline
140
+ return np.zeros_like(instance)
141
+ elif isinstance(self.baseline, str):
142
+ if self.baseline == "random":
143
+ # Random baseline from uniform distribution
144
+ return np.random.uniform(
145
+ low=instance.min(),
146
+ high=instance.max(),
147
+ size=instance.shape
148
+ ).astype(instance.dtype)
149
+ elif self.baseline == "mean":
150
+ # This requires set_baseline to have been called with data
151
+ raise ValueError(
152
+ "Baseline 'mean' requires calling set_baseline() with "
153
+ "training data first."
154
+ )
155
+ else:
156
+ raise ValueError(f"Unknown baseline type: {self.baseline}")
157
+ elif callable(self.baseline):
158
+ return self.baseline(instance)
159
+ else:
160
+ baseline = np.array(self.baseline)
161
+ if baseline.shape != instance.shape:
162
+ baseline = baseline.reshape(instance.shape)
163
+ return baseline.astype(instance.dtype)
164
+
165
+ def set_baseline(self, data: np.ndarray, method: str = "mean") -> "DeepLIFTExplainer":
166
+ """
167
+ Set the baseline from training data.
168
+
169
+ Args:
170
+ data: Training data array of shape (n_samples, n_features)
171
+ method: Method to compute baseline:
172
+ - "mean": Use mean of training data
173
+ - "median": Use median of training data
174
+ - "zeros": Use zeros (same as default)
175
+
176
+ Returns:
177
+ Self for method chaining
178
+ """
179
+ data = np.array(data)
180
+ if data.ndim == 1:
181
+ data = data.reshape(1, -1)
182
+
183
+ if method == "mean":
184
+ self.baseline = np.mean(data, axis=0).astype(np.float32)
185
+ elif method == "median":
186
+ self.baseline = np.median(data, axis=0).astype(np.float32)
187
+ elif method == "zeros":
188
+ self.baseline = None
189
+ else:
190
+ raise ValueError(f"Unknown method: {method}")
191
+
192
+ return self
193
+
194
+ def _compute_deeplift_rescale(
195
+ self,
196
+ instance: np.ndarray,
197
+ baseline: np.ndarray,
198
+ target_class: Optional[int] = None
199
+ ) -> np.ndarray:
200
+ """
201
+ Compute DeepLIFT attributions using the Rescale rule.
202
+
203
+ The Rescale rule computes multipliers as:
204
+ m = (activation - reference_activation) / (input - reference_input)
205
+
206
+ For layers where input equals reference (delta = 0), we use the
207
+ gradient as the multiplier (the limit as delta -> 0).
208
+
209
+ This implementation uses the gradient formulation from Ancona et al.
210
+ which shows that DeepLIFT-Rescale can be computed efficiently using
211
+ modified gradients.
212
+
213
+ Args:
214
+ instance: Input instance
215
+ baseline: Reference/baseline input
216
+ target_class: Target class for attribution
217
+
218
+ Returns:
219
+ Array of attribution scores for each input feature
220
+ """
221
+ instance = instance.flatten().astype(np.float32)
222
+ baseline = baseline.flatten().astype(np.float32)
223
+
224
+ # Compute delta (difference from reference)
225
+ delta = instance - baseline
226
+
227
+ # For DeepLIFT with Rescale rule, we need to compute multipliers
228
+ # that satisfy: attribution = delta * multiplier
229
+ # where sum(attributions) = f(x) - f(x')
230
+
231
+ # Method 1: Gradient at midpoint approximation
232
+ # DeepLIFT ≈ delta * gradient(baseline + 0.5 * delta)
233
+ # This is a good approximation for smooth functions
234
+
235
+ midpoint = baseline + 0.5 * delta
236
+ _, gradients = self.model.predict_with_gradients(
237
+ midpoint.reshape(1, -1),
238
+ target_class=target_class
239
+ )
240
+ gradients = gradients.flatten()
241
+
242
+ if self.multiply_by_inputs:
243
+ attributions = delta * gradients
244
+ else:
245
+ attributions = gradients
246
+
247
+ return attributions
248
+
249
+ def _compute_deeplift_exact(
250
+ self,
251
+ instance: np.ndarray,
252
+ baseline: np.ndarray,
253
+ target_class: Optional[int] = None
254
+ ) -> np.ndarray:
255
+ """
256
+ Compute DeepLIFT attributions using exact multiplier computation.
257
+
258
+ This method computes the true DeepLIFT multipliers by evaluating
259
+ at multiple points along the path and averaging, providing more
260
+ accurate results at the cost of additional computation.
261
+
262
+ For networks with ReLU activations, this is equivalent to:
263
+ 1. Forward pass on input to get activations
264
+ 2. Forward pass on baseline to get reference activations
265
+ 3. Backward pass computing multipliers based on activation differences
266
+
267
+ Args:
268
+ instance: Input instance
269
+ baseline: Reference/baseline input
270
+ target_class: Target class for attribution
271
+
272
+ Returns:
273
+ Array of attribution scores for each input feature
274
+ """
275
+ instance = instance.flatten().astype(np.float32)
276
+ baseline = baseline.flatten().astype(np.float32)
277
+
278
+ delta = instance - baseline
279
+
280
+ # Use multiple evaluation points for more accurate multipliers
281
+ # This is essentially Integrated Gradients with few steps, but
282
+ # the key insight is that DeepLIFT's Rescale rule gives the same
283
+ # result as IG for piecewise linear functions (like ReLU networks)
284
+
285
+ n_points = 10
286
+ alphas = np.linspace(0, 1, n_points)
287
+
288
+ all_gradients = []
289
+ for alpha in alphas:
290
+ point = baseline + alpha * delta
291
+ _, grads = self.model.predict_with_gradients(
292
+ point.reshape(1, -1),
293
+ target_class=target_class
294
+ )
295
+ all_gradients.append(grads.flatten())
296
+
297
+ # Average gradients (trapezoidal rule approximation)
298
+ avg_gradients = np.mean(all_gradients, axis=0)
299
+
300
+ if self.multiply_by_inputs:
301
+ attributions = delta * avg_gradients
302
+ else:
303
+ attributions = avg_gradients
304
+
305
+ return attributions
306
+
307
+ def explain(
308
+ self,
309
+ instance: np.ndarray,
310
+ target_class: Optional[int] = None,
311
+ baseline: Optional[np.ndarray] = None,
312
+ method: str = "rescale",
313
+ return_convergence_delta: bool = False
314
+ ) -> Explanation:
315
+ """
316
+ Generate DeepLIFT explanation for an instance.
317
+
318
+ Args:
319
+ instance: 1D numpy array of input features.
320
+ target_class: For classification, which class to explain.
321
+ If None, uses the predicted class.
322
+ baseline: Override the default baseline for this explanation.
323
+ method: Attribution method:
324
+ - "rescale": Fast rescale rule (default, recommended)
325
+ - "rescale_exact": More accurate but slower rescale
326
+ return_convergence_delta: If True, include the convergence delta
327
+ (difference between sum of attributions and prediction
328
+ difference). Should be close to 0 for correct attributions.
329
+
330
+ Returns:
331
+ Explanation object with feature attributions.
332
+ """
333
+ instance = np.array(instance).flatten().astype(np.float32)
334
+
335
+ # Get baseline
336
+ if baseline is not None:
337
+ bl = np.array(baseline).flatten().astype(np.float32)
338
+ else:
339
+ bl = self._get_baseline(instance)
340
+
341
+ # Determine target class if not specified
342
+ if target_class is None and self.class_names:
343
+ predictions = self.model.predict(instance.reshape(1, -1))
344
+ target_class = int(np.argmax(predictions))
345
+
346
+ # Compute DeepLIFT attributions
347
+ if method == "rescale":
348
+ attributions_raw = self._compute_deeplift_rescale(
349
+ instance, bl, target_class
350
+ )
351
+ elif method == "rescale_exact":
352
+ attributions_raw = self._compute_deeplift_exact(
353
+ instance, bl, target_class
354
+ )
355
+ else:
356
+ raise ValueError(f"Unknown method: {method}. Use 'rescale' or 'rescale_exact'.")
357
+
358
+ # Build attributions dict
359
+ attributions = {
360
+ fname: float(attributions_raw[i])
361
+ for i, fname in enumerate(self.feature_names)
362
+ }
363
+
364
+ # Determine class name
365
+ if self.class_names and target_class is not None:
366
+ label_name = self.class_names[target_class]
367
+ else:
368
+ label_name = f"class_{target_class}" if target_class is not None else "output"
369
+
370
+ explanation_data = {
371
+ "feature_attributions": attributions,
372
+ "attributions_raw": attributions_raw.tolist(),
373
+ "baseline": bl.tolist(),
374
+ "method": method,
375
+ "multiply_by_inputs": self.multiply_by_inputs
376
+ }
377
+
378
+ # Optionally compute convergence delta (summation-to-delta property)
379
+ if return_convergence_delta:
380
+ pred_input = self.model.predict(instance.reshape(1, -1))
381
+ pred_baseline = self.model.predict(bl.reshape(1, -1))
382
+
383
+ if target_class is not None:
384
+ pred_diff = pred_input[0, target_class] - pred_baseline[0, target_class]
385
+ else:
386
+ pred_diff = pred_input[0, 0] - pred_baseline[0, 0]
387
+
388
+ attribution_sum = np.sum(attributions_raw)
389
+ convergence_delta = abs(pred_diff - attribution_sum)
390
+
391
+ explanation_data["convergence_delta"] = float(convergence_delta)
392
+ explanation_data["prediction_difference"] = float(pred_diff)
393
+ explanation_data["attribution_sum"] = float(attribution_sum)
394
+
395
+ return Explanation(
396
+ explainer_name="DeepLIFT",
397
+ target_class=label_name,
398
+ explanation_data=explanation_data
399
+ )
400
+
401
+ def explain_batch(
402
+ self,
403
+ X: np.ndarray,
404
+ target_class: Optional[int] = None,
405
+ method: str = "rescale"
406
+ ) -> List[Explanation]:
407
+ """
408
+ Generate explanations for multiple instances.
409
+
410
+ Args:
411
+ X: 2D numpy array of instances (n_samples, n_features).
412
+ target_class: Target class for all instances. If None,
413
+ uses predicted class for each instance.
414
+ method: Attribution method ("rescale" or "rescale_exact").
415
+
416
+ Returns:
417
+ List of Explanation objects.
418
+ """
419
+ X = np.array(X)
420
+ if X.ndim == 1:
421
+ X = X.reshape(1, -1)
422
+
423
+ return [
424
+ self.explain(X[i], target_class=target_class, method=method)
425
+ for i in range(X.shape[0])
426
+ ]
427
+
428
+ def explain_with_multiple_baselines(
429
+ self,
430
+ instance: np.ndarray,
431
+ baselines: np.ndarray,
432
+ target_class: Optional[int] = None,
433
+ method: str = "rescale"
434
+ ) -> Explanation:
435
+ """
436
+ Compute DeepLIFT with multiple reference baselines and average.
437
+
438
+ Using multiple baselines can provide more robust attributions,
439
+ especially when the choice of single baseline is uncertain.
440
+ This is similar to DeepSHAP (DeepLIFT + SHAP) approach.
441
+
442
+ Args:
443
+ instance: Input instance to explain.
444
+ baselines: Array of baseline instances (n_baselines, n_features).
445
+ target_class: Target class for attribution.
446
+ method: Attribution method.
447
+
448
+ Returns:
449
+ Explanation with averaged attributions across all baselines.
450
+ """
451
+ instance = np.array(instance).flatten().astype(np.float32)
452
+ baselines = np.array(baselines)
453
+
454
+ if baselines.ndim == 1:
455
+ baselines = baselines.reshape(1, -1)
456
+
457
+ # Compute attributions for each baseline
458
+ all_attributions = []
459
+ for bl in baselines:
460
+ if method == "rescale":
461
+ attr = self._compute_deeplift_rescale(
462
+ instance, bl.flatten(), target_class
463
+ )
464
+ else:
465
+ attr = self._compute_deeplift_exact(
466
+ instance, bl.flatten(), target_class
467
+ )
468
+ all_attributions.append(attr)
469
+
470
+ # Average attributions
471
+ avg_attributions = np.mean(all_attributions, axis=0)
472
+ std_attributions = np.std(all_attributions, axis=0)
473
+
474
+ attributions = {
475
+ fname: float(avg_attributions[i])
476
+ for i, fname in enumerate(self.feature_names)
477
+ }
478
+
479
+ if self.class_names and target_class is not None:
480
+ label_name = self.class_names[target_class]
481
+ else:
482
+ label_name = f"class_{target_class}" if target_class is not None else "output"
483
+
484
+ return Explanation(
485
+ explainer_name="DeepLIFT_MultiBaseline",
486
+ target_class=label_name,
487
+ explanation_data={
488
+ "feature_attributions": attributions,
489
+ "attributions_raw": avg_attributions.tolist(),
490
+ "attributions_std": std_attributions.tolist(),
491
+ "n_baselines": len(baselines),
492
+ "method": method
493
+ }
494
+ )
495
+
496
+ def compare_with_integrated_gradients(
497
+ self,
498
+ instance: np.ndarray,
499
+ target_class: Optional[int] = None,
500
+ baseline: Optional[np.ndarray] = None,
501
+ ig_steps: int = 50
502
+ ) -> dict:
503
+ """
504
+ Compare DeepLIFT attributions with Integrated Gradients.
505
+
506
+ Useful for validating that DeepLIFT provides similar results
507
+ to IG (they should be very similar for ReLU networks).
508
+
509
+ Args:
510
+ instance: Input instance.
511
+ target_class: Target class for attribution.
512
+ baseline: Baseline for comparison.
513
+ ig_steps: Number of steps for Integrated Gradients.
514
+
515
+ Returns:
516
+ Dictionary with both attributions and comparison metrics.
517
+ """
518
+ instance = np.array(instance).flatten().astype(np.float32)
519
+
520
+ if baseline is not None:
521
+ bl = np.array(baseline).flatten().astype(np.float32)
522
+ else:
523
+ bl = self._get_baseline(instance)
524
+
525
+ # Determine target class
526
+ if target_class is None and self.class_names:
527
+ predictions = self.model.predict(instance.reshape(1, -1))
528
+ target_class = int(np.argmax(predictions))
529
+
530
+ # DeepLIFT attributions (fast)
531
+ dl_attr = self._compute_deeplift_rescale(instance, bl, target_class)
532
+
533
+ # Integrated Gradients (more computation)
534
+ delta = instance - bl
535
+ alphas = np.linspace(0, 1, ig_steps)
536
+
537
+ all_gradients = []
538
+ for alpha in alphas:
539
+ point = bl + alpha * delta
540
+ _, grads = self.model.predict_with_gradients(
541
+ point.reshape(1, -1),
542
+ target_class=target_class
543
+ )
544
+ all_gradients.append(grads.flatten())
545
+
546
+ avg_gradients = np.mean(all_gradients, axis=0)
547
+ ig_attr = delta * avg_gradients
548
+
549
+ # Compute comparison metrics
550
+ correlation = np.corrcoef(dl_attr, ig_attr)[0, 1]
551
+ mse = np.mean((dl_attr - ig_attr) ** 2)
552
+ max_diff = np.max(np.abs(dl_attr - ig_attr))
553
+
554
+ return {
555
+ "deeplift_attributions": dl_attr.tolist(),
556
+ "integrated_gradients_attributions": ig_attr.tolist(),
557
+ "correlation": float(correlation),
558
+ "mse": float(mse),
559
+ "max_difference": float(max_diff),
560
+ "ig_steps": ig_steps
561
+ }
562
+
563
+
564
+ class DeepLIFTShapExplainer(DeepLIFTExplainer):
565
+ """
566
+ DeepSHAP explainer - DeepLIFT combined with Shapley values.
567
+
568
+ This is essentially DeepLIFT averaged over a distribution of baselines
569
+ (usually samples from the training data), which approximates SHAP values.
570
+
571
+ DeepSHAP inherits all benefits of DeepLIFT (speed, handling saturation)
572
+ while providing the game-theoretic guarantees of Shapley values.
573
+
574
+ Reference:
575
+ Lundberg, S. M., & Lee, S. I. (2017). A Unified Approach to
576
+ Interpreting Model Predictions. NeurIPS 2017.
577
+ https://arxiv.org/abs/1705.07874
578
+
579
+ Example:
580
+ >>> explainer = DeepLIFTShapExplainer(adapter, feature_names)
581
+ >>> explainer.set_background(X_train[:100]) # Set background samples
582
+ >>> explanation = explainer.explain(instance)
583
+ """
584
+
585
+ def __init__(
586
+ self,
587
+ model,
588
+ feature_names: List[str],
589
+ class_names: Optional[List[str]] = None,
590
+ background_data: Optional[np.ndarray] = None,
591
+ n_background_samples: int = 100,
592
+ eps: float = 1e-10
593
+ ):
594
+ """
595
+ Initialize DeepSHAP explainer.
596
+
597
+ Args:
598
+ model: Model adapter with gradient computation capability.
599
+ feature_names: List of input feature names.
600
+ class_names: List of class names (for classification).
601
+ background_data: Background dataset for computing expectations.
602
+ If None, must call set_background() before explain().
603
+ n_background_samples: Number of background samples to use.
604
+ More samples = more accurate but slower.
605
+ eps: Small constant for numerical stability.
606
+ """
607
+ super().__init__(
608
+ model=model,
609
+ feature_names=feature_names,
610
+ class_names=class_names,
611
+ baseline=None,
612
+ multiply_by_inputs=True,
613
+ eps=eps
614
+ )
615
+
616
+ self.n_background_samples = n_background_samples
617
+ self._background_data = None
618
+
619
+ if background_data is not None:
620
+ self.set_background(background_data)
621
+
622
+ def set_background(self, data: np.ndarray) -> "DeepLIFTShapExplainer":
623
+ """
624
+ Set the background dataset for DeepSHAP.
625
+
626
+ The background dataset is used to compute the expected output
627
+ and expected attributions, which are the baseline for Shapley
628
+ value computation.
629
+
630
+ Args:
631
+ data: Background data of shape (n_samples, n_features).
632
+ Typically a subset of the training data.
633
+
634
+ Returns:
635
+ Self for method chaining.
636
+ """
637
+ data = np.array(data).astype(np.float32)
638
+ if data.ndim == 1:
639
+ data = data.reshape(1, -1)
640
+
641
+ # Subsample if necessary
642
+ if len(data) > self.n_background_samples:
643
+ indices = np.random.choice(
644
+ len(data),
645
+ size=self.n_background_samples,
646
+ replace=False
647
+ )
648
+ data = data[indices]
649
+
650
+ self._background_data = data
651
+ return self
652
+
653
+ def explain(
654
+ self,
655
+ instance: np.ndarray,
656
+ target_class: Optional[int] = None,
657
+ method: str = "rescale",
658
+ return_convergence_delta: bool = False
659
+ ) -> Explanation:
660
+ """
661
+ Generate DeepSHAP explanation by averaging DeepLIFT over backgrounds.
662
+
663
+ Args:
664
+ instance: Input instance to explain.
665
+ target_class: Target class for attribution.
666
+ method: DeepLIFT method ("rescale" or "rescale_exact").
667
+ return_convergence_delta: Include convergence delta in output.
668
+
669
+ Returns:
670
+ Explanation with SHAP-style attributions.
671
+ """
672
+ if self._background_data is None:
673
+ raise ValueError(
674
+ "Background data not set. Call set_background() first."
675
+ )
676
+
677
+ instance = np.array(instance).flatten().astype(np.float32)
678
+
679
+ # Determine target class
680
+ if target_class is None and self.class_names:
681
+ predictions = self.model.predict(instance.reshape(1, -1))
682
+ target_class = int(np.argmax(predictions))
683
+
684
+ # Compute DeepLIFT attributions for each background sample
685
+ all_attributions = []
686
+ for baseline in self._background_data:
687
+ if method == "rescale":
688
+ attr = self._compute_deeplift_rescale(
689
+ instance, baseline.flatten(), target_class
690
+ )
691
+ else:
692
+ attr = self._compute_deeplift_exact(
693
+ instance, baseline.flatten(), target_class
694
+ )
695
+ all_attributions.append(attr)
696
+
697
+ # Average to get SHAP values
698
+ shap_values = np.mean(all_attributions, axis=0)
699
+ std_values = np.std(all_attributions, axis=0)
700
+
701
+ # Build attributions dict
702
+ attributions = {
703
+ fname: float(shap_values[i])
704
+ for i, fname in enumerate(self.feature_names)
705
+ }
706
+
707
+ # Determine class name
708
+ if self.class_names and target_class is not None:
709
+ label_name = self.class_names[target_class]
710
+ else:
711
+ label_name = f"class_{target_class}" if target_class is not None else "output"
712
+
713
+ explanation_data = {
714
+ "feature_attributions": attributions,
715
+ "attributions_raw": shap_values.tolist(),
716
+ "attributions_std": std_values.tolist(),
717
+ "n_background_samples": len(self._background_data),
718
+ "method": method
719
+ }
720
+
721
+ # Compute expected values for context
722
+ if return_convergence_delta:
723
+ pred_input = self.model.predict(instance.reshape(1, -1))
724
+ pred_background = self.model.predict(self._background_data)
725
+
726
+ if target_class is not None:
727
+ expected_output = np.mean(pred_background[:, target_class])
728
+ actual_output = pred_input[0, target_class]
729
+ else:
730
+ expected_output = np.mean(pred_background[:, 0])
731
+ actual_output = pred_input[0, 0]
732
+
733
+ pred_diff = actual_output - expected_output
734
+ attribution_sum = np.sum(shap_values)
735
+ convergence_delta = abs(pred_diff - attribution_sum)
736
+
737
+ explanation_data["expected_output"] = float(expected_output)
738
+ explanation_data["actual_output"] = float(actual_output)
739
+ explanation_data["convergence_delta"] = float(convergence_delta)
740
+
741
+ return Explanation(
742
+ explainer_name="DeepSHAP",
743
+ target_class=label_name,
744
+ explanation_data=explanation_data
745
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- explainiverse/__init__.py,sha256=EEo8Stx-Lau5WZLQ5wqA4ESGY9HA_j-dqnpyp9MgV90,1612
1
+ explainiverse/__init__.py,sha256=UZDDS7fWplzGgUdRw_Fi9I6WmTGZ1QM3fo3ZPOPujg8,1612
2
2
  explainiverse/adapters/__init__.py,sha256=HcQGISyp-YQ4jEj2IYveX_c9X5otLcTNWRnVRRhzRik,781
3
3
  explainiverse/adapters/base_adapter.py,sha256=Nqt0GeDn_-PjTyJcZsE8dRTulavqFQsv8sMYWS_ps-M,603
4
4
  explainiverse/adapters/pytorch_adapter.py,sha256=GTilJAR1VF_OgWG88qZoqlqefHaSXB3i9iOwCJkyHTg,13318
@@ -6,12 +6,12 @@ explainiverse/adapters/sklearn_adapter.py,sha256=pzIBtMuqrG-6ZbUqUCMt7rSk3Ow0Fgr
6
6
  explainiverse/core/__init__.py,sha256=P3jHMnH5coFqTTO1w-gT-rurkCM1-9r3pF-055pbXMg,474
7
7
  explainiverse/core/explainer.py,sha256=Z9on-9VblYDlQx9oBm1BHpmAf_NsQajZ3qr-u48Aejo,784
8
8
  explainiverse/core/explanation.py,sha256=6zxFh_TH8tFHc-r_H5-WHQ05Sp1Kp2TxLz3gyFek5jo,881
9
- explainiverse/core/registry.py,sha256=BjyPhcxWC8zaiRflDF3JCaUMPsBmHHDNfwM13bTWxjE,21476
9
+ explainiverse/core/registry.py,sha256=f1GAo2tg6Sjyz-uOPyLukYYSUgMmpb95pI3B6O-5jjo,22992
10
10
  explainiverse/engine/__init__.py,sha256=1sZO8nH1mmwK2e-KUavBQm7zYDWUe27nyWoFy9tgsiA,197
11
11
  explainiverse/engine/suite.py,sha256=sq8SK_6Pf0qRckTmVJ7Mdosu9bhkjAGPGN8ymLGFP9E,4914
12
12
  explainiverse/evaluation/__init__.py,sha256=Y50L_b4HKthg4epwcayPHXh0l4i4MUuzvaNlqPmUNZY,212
13
13
  explainiverse/evaluation/metrics.py,sha256=tSBXtyA_-0zOGCGjlPZU6LdGKRH_QpWfgKa78sdlovs,7453
14
- explainiverse/explainers/__init__.py,sha256=epXDNoYIAxW0KNtGBCkjS28FmDSBYry59HdTY9vJXCs,2057
14
+ explainiverse/explainers/__init__.py,sha256=d7DTbUXzdVdN0l5GQnoJ4zzutI0TXNvx0UzwNXoWY9w,2207
15
15
  explainiverse/explainers/attribution/__init__.py,sha256=YeVs9bS_IWDtqGbp6T37V6Zp5ZDWzLdAXHxxyFGpiQM,431
16
16
  explainiverse/explainers/attribution/lime_wrapper.py,sha256=OnXIV7t6yd-vt38sIi7XmHFbgzlZfCEbRlFyGGd5XiE,3245
17
17
  explainiverse/explainers/attribution/shap_wrapper.py,sha256=tKie5AvN7mb55PWOYdMvW0lUAYjfHPzYosEloEY2ZzI,3210
@@ -23,12 +23,13 @@ explainiverse/explainers/global_explainers/ale.py,sha256=tgG3XTppCf8LiD7uKzBt4DI
23
23
  explainiverse/explainers/global_explainers/partial_dependence.py,sha256=dH6yMjpwZads3pACR3rSykTbssLGHH7e6HfMlpl-S3I,6745
24
24
  explainiverse/explainers/global_explainers/permutation_importance.py,sha256=bcgKz1S_D3lrBMgpqEF_Z6qw8Knxl_cfR50hrSO2tBc,4410
25
25
  explainiverse/explainers/global_explainers/sage.py,sha256=57Xw1SK529x5JXWt0TVrcFYUUP3C65LfUwgoM-Z3gaw,5839
26
- explainiverse/explainers/gradient/__init__.py,sha256=lVPiSGV_swSwV8k7Z4c6XETwDdTRO09D6bv8TSMsNd8,441
26
+ explainiverse/explainers/gradient/__init__.py,sha256=ZJtESEQ8NcZwyZ09jA3Gkjw93pAloL_vfn07LTLVIzM,603
27
+ explainiverse/explainers/gradient/deeplift.py,sha256=MWOlslizUeoZs31moy2iBgp02N08nBsVU-RoEpODg3M,27775
27
28
  explainiverse/explainers/gradient/gradcam.py,sha256=ywW_8PhALwegkpSUDQMFvvVFkA5NnMMW6BB5tb3i8bw,13721
28
29
  explainiverse/explainers/gradient/integrated_gradients.py,sha256=feBgY3Vw2rDti7fxRZtLkxse75m2dbP_R05ARqo2BRM,13367
29
30
  explainiverse/explainers/rule_based/__init__.py,sha256=gKzlFCAzwurAMLJcuYgal4XhDj1thteBGcaHWmN7iWk,243
30
31
  explainiverse/explainers/rule_based/anchors_wrapper.py,sha256=ML7W6aam-eMGZHy5ilol8qupZvNBJpYAFatEEPnuMyo,13254
31
- explainiverse-0.2.4.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
32
- explainiverse-0.2.4.dist-info/METADATA,sha256=5LUzE3WCwdp0QRyonWTkRyHnFmPjGcSlnzbs011f8ZA,11483
33
- explainiverse-0.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
34
- explainiverse-0.2.4.dist-info/RECORD,,
32
+ explainiverse-0.2.5.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
33
+ explainiverse-0.2.5.dist-info/METADATA,sha256=ih7rkKiPaVBmdYHwMRKB3QfrOaCm68m3iHztt6SO5S0,11483
34
+ explainiverse-0.2.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
+ explainiverse-0.2.5.dist-info/RECORD,,