explainiverse 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {explainiverse-0.5.0 → explainiverse-0.6.0}/PKG-INFO +41 -6
  2. {explainiverse-0.5.0 → explainiverse-0.6.0}/README.md +40 -5
  3. {explainiverse-0.5.0 → explainiverse-0.6.0}/pyproject.toml +1 -1
  4. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/__init__.py +1 -1
  5. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/registry.py +18 -0
  6. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/__init__.py +2 -0
  7. explainiverse-0.6.0/src/explainiverse/explainers/gradient/saliency.py +293 -0
  8. {explainiverse-0.5.0 → explainiverse-0.6.0}/LICENSE +0 -0
  9. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/__init__.py +0 -0
  10. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/base_adapter.py +0 -0
  11. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/pytorch_adapter.py +0 -0
  12. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/sklearn_adapter.py +0 -0
  13. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/__init__.py +0 -0
  14. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/explainer.py +0 -0
  15. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/explanation.py +0 -0
  16. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/engine/__init__.py +0 -0
  17. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/engine/suite.py +0 -0
  18. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/__init__.py +0 -0
  19. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/_utils.py +0 -0
  20. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/faithfulness.py +0 -0
  21. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/metrics.py +0 -0
  22. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/stability.py +0 -0
  23. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/__init__.py +0 -0
  24. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/__init__.py +0 -0
  25. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/lime_wrapper.py +0 -0
  26. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/shap_wrapper.py +0 -0
  27. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/treeshap_wrapper.py +0 -0
  28. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/counterfactual/__init__.py +0 -0
  29. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/counterfactual/dice_wrapper.py +0 -0
  30. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/__init__.py +0 -0
  31. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/protodash.py +0 -0
  32. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/__init__.py +0 -0
  33. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/ale.py +0 -0
  34. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/partial_dependence.py +0 -0
  35. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/permutation_importance.py +0 -0
  36. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/sage.py +0 -0
  37. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/deeplift.py +0 -0
  38. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/gradcam.py +0 -0
  39. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/integrated_gradients.py +0 -0
  40. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/smoothgrad.py +0 -0
  41. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/rule_based/__init__.py +0 -0
  42. {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/rule_based/anchors_wrapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -35,7 +35,7 @@ Description-Content-Type: text/markdown
35
35
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
36
36
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
37
37
 
38
- **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **15 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
38
+ **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **16 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
39
39
 
40
40
  ---
41
41
 
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
43
43
 
44
44
  | Feature | Description |
45
45
  |---------|-------------|
46
- | **15 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
46
+ | **16 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
47
47
  | **8 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation) and Stability (RIS, ROS, Lipschitz) |
48
48
  | **Unified API** | Consistent `BaseExplainer` interface with standardized `Explanation` output |
49
49
  | **Plugin Registry** | Filter explainers by scope, model type, data type; automatic recommendations |
@@ -64,6 +64,7 @@ Description-Content-Type: text/markdown
64
64
  | **DeepLIFT** | Gradient | [Shrikumar et al., 2017](https://arxiv.org/abs/1704.02685) |
65
65
  | **DeepSHAP** | Gradient + Shapley | [Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874) |
66
66
  | **SmoothGrad** | Gradient | [Smilkov et al., 2017](https://arxiv.org/abs/1706.03825) |
67
+ | **Saliency Maps** | Gradient | [Simonyan et al., 2014](https://arxiv.org/abs/1312.6034) |
67
68
  | **GradCAM / GradCAM++** | Gradient (CNN) | [Selvaraju et al., 2017](https://arxiv.org/abs/1610.02391) |
68
69
  | **Anchors** | Rule-Based | [Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491) |
69
70
  | **Counterfactual** | Contrastive | [Mothilal et al., 2020](https://arxiv.org/abs/1905.07697) |
@@ -233,6 +234,41 @@ deepshap = DeepLIFTShapExplainer(
233
234
  explanation = deepshap.explain(X[0])
234
235
  ```
235
236
 
237
+ ### Saliency Maps
238
+
239
+ ```python
240
+ from explainiverse.explainers.gradient import SaliencyExplainer
241
+
242
+ # Saliency Maps - simplest and fastest gradient method
243
+ explainer = SaliencyExplainer(
244
+ model=adapter,
245
+ feature_names=feature_names,
246
+ class_names=class_names,
247
+ absolute_value=True # Default: absolute gradient magnitudes
248
+ )
249
+
250
+ # Standard saliency (absolute gradients)
251
+ explanation = explainer.explain(X[0], method="saliency")
252
+
253
+ # Input × Gradient (gradient scaled by input values)
254
+ explanation = explainer.explain(X[0], method="input_times_gradient")
255
+
256
+ # Signed saliency (keep gradient direction)
257
+ explainer_signed = SaliencyExplainer(
258
+ model=adapter,
259
+ feature_names=feature_names,
260
+ class_names=class_names,
261
+ absolute_value=False
262
+ )
263
+ explanation = explainer_signed.explain(X[0])
264
+
265
+ # Compare all variants
266
+ variants = explainer.compute_all_variants(X[0])
267
+ print(variants["saliency_absolute"])
268
+ print(variants["saliency_signed"])
269
+ print(variants["input_times_gradient"])
270
+ ```
271
+
236
272
  ### SmoothGrad
237
273
 
238
274
  ```python
@@ -552,7 +588,7 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
552
588
  ### Completed ✅
553
589
  - [x] Core framework (BaseExplainer, Explanation, Registry)
554
590
  - [x] Perturbation methods: LIME, KernelSHAP, TreeSHAP
555
- - [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++
591
+ - [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++
556
592
  - [x] Rule-based: Anchors
557
593
  - [x] Counterfactual: DiCE-style
558
594
  - [x] Global: Permutation Importance, PDP, ALE, SAGE
@@ -562,7 +598,6 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
562
598
  - [x] PyTorch adapter with gradient support
563
599
 
564
600
  ### In Progress 🚧
565
- - [ ] Saliency Maps (vanilla gradients)
566
601
  - [ ] TCAV (Testing with Concept Activation Vectors)
567
602
  - [ ] Layer-wise Relevance Propagation (LRP)
568
603
 
@@ -585,7 +620,7 @@ If you use Explainiverse in your research, please cite:
585
620
  author = {Syed, Muntaser},
586
621
  year = {2025},
587
622
  url = {https://github.com/jemsbhai/explainiverse},
588
- version = {0.5.0}
623
+ version = {0.6.0}
589
624
  }
590
625
  ```
591
626
 
@@ -4,7 +4,7 @@
4
4
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
- **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **15 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
7
+ **Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **16 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
8
8
 
9
9
  ---
10
10
 
@@ -12,7 +12,7 @@
12
12
 
13
13
  | Feature | Description |
14
14
  |---------|-------------|
15
- | **15 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
15
+ | **16 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
16
16
  | **8 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation) and Stability (RIS, ROS, Lipschitz) |
17
17
  | **Unified API** | Consistent `BaseExplainer` interface with standardized `Explanation` output |
18
18
  | **Plugin Registry** | Filter explainers by scope, model type, data type; automatic recommendations |
@@ -33,6 +33,7 @@
33
33
  | **DeepLIFT** | Gradient | [Shrikumar et al., 2017](https://arxiv.org/abs/1704.02685) |
34
34
  | **DeepSHAP** | Gradient + Shapley | [Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874) |
35
35
  | **SmoothGrad** | Gradient | [Smilkov et al., 2017](https://arxiv.org/abs/1706.03825) |
36
+ | **Saliency Maps** | Gradient | [Simonyan et al., 2014](https://arxiv.org/abs/1312.6034) |
36
37
  | **GradCAM / GradCAM++** | Gradient (CNN) | [Selvaraju et al., 2017](https://arxiv.org/abs/1610.02391) |
37
38
  | **Anchors** | Rule-Based | [Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491) |
38
39
  | **Counterfactual** | Contrastive | [Mothilal et al., 2020](https://arxiv.org/abs/1905.07697) |
@@ -202,6 +203,41 @@ deepshap = DeepLIFTShapExplainer(
202
203
  explanation = deepshap.explain(X[0])
203
204
  ```
204
205
 
206
+ ### Saliency Maps
207
+
208
+ ```python
209
+ from explainiverse.explainers.gradient import SaliencyExplainer
210
+
211
+ # Saliency Maps - simplest and fastest gradient method
212
+ explainer = SaliencyExplainer(
213
+ model=adapter,
214
+ feature_names=feature_names,
215
+ class_names=class_names,
216
+ absolute_value=True # Default: absolute gradient magnitudes
217
+ )
218
+
219
+ # Standard saliency (absolute gradients)
220
+ explanation = explainer.explain(X[0], method="saliency")
221
+
222
+ # Input × Gradient (gradient scaled by input values)
223
+ explanation = explainer.explain(X[0], method="input_times_gradient")
224
+
225
+ # Signed saliency (keep gradient direction)
226
+ explainer_signed = SaliencyExplainer(
227
+ model=adapter,
228
+ feature_names=feature_names,
229
+ class_names=class_names,
230
+ absolute_value=False
231
+ )
232
+ explanation = explainer_signed.explain(X[0])
233
+
234
+ # Compare all variants
235
+ variants = explainer.compute_all_variants(X[0])
236
+ print(variants["saliency_absolute"])
237
+ print(variants["saliency_signed"])
238
+ print(variants["input_times_gradient"])
239
+ ```
240
+
205
241
  ### SmoothGrad
206
242
 
207
243
  ```python
@@ -521,7 +557,7 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
521
557
  ### Completed ✅
522
558
  - [x] Core framework (BaseExplainer, Explanation, Registry)
523
559
  - [x] Perturbation methods: LIME, KernelSHAP, TreeSHAP
524
- - [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++
560
+ - [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++
525
561
  - [x] Rule-based: Anchors
526
562
  - [x] Counterfactual: DiCE-style
527
563
  - [x] Global: Permutation Importance, PDP, ALE, SAGE
@@ -531,7 +567,6 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
531
567
  - [x] PyTorch adapter with gradient support
532
568
 
533
569
  ### In Progress 🚧
534
- - [ ] Saliency Maps (vanilla gradients)
535
570
  - [ ] TCAV (Testing with Concept Activation Vectors)
536
571
  - [ ] Layer-wise Relevance Propagation (LRP)
537
572
 
@@ -554,7 +589,7 @@ If you use Explainiverse in your research, please cite:
554
589
  author = {Syed, Muntaser},
555
590
  year = {2025},
556
591
  url = {https://github.com/jemsbhai/explainiverse},
557
- version = {0.5.0}
592
+ version = {0.6.0}
558
593
  }
559
594
  ```
560
595
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "explainiverse"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  description = "Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more"
5
5
  authors = ["Muntaser Syed <jemsbhai@gmail.com>"]
6
6
  license = "MIT"
@@ -33,7 +33,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
33
33
  from explainiverse.adapters import TORCH_AVAILABLE
34
34
  from explainiverse.engine.suite import ExplanationSuite
35
35
 
36
- __version__ = "0.5.0"
36
+ __version__ = "0.6.0"
37
37
 
38
38
  __all__ = [
39
39
  # Core
@@ -373,6 +373,7 @@ def _create_default_registry() -> ExplainerRegistry:
373
373
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
374
374
  from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
375
375
  from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
376
+ from explainiverse.explainers.gradient.saliency import SaliencyExplainer
376
377
  from explainiverse.explainers.example_based.protodash import ProtoDashExplainer
377
378
 
378
379
  registry = ExplainerRegistry()
@@ -551,6 +552,23 @@ def _create_default_registry() -> ExplainerRegistry:
551
552
  )
552
553
  )
553
554
 
555
+ # Register Saliency Maps (for neural networks)
556
+ registry.register(
557
+ name="saliency",
558
+ explainer_class=SaliencyExplainer,
559
+ meta=ExplainerMeta(
560
+ scope="local",
561
+ model_types=["neural"],
562
+ data_types=["tabular", "image"],
563
+ task_types=["classification", "regression"],
564
+ description="Saliency Maps - gradient-based feature attribution (requires PyTorch)",
565
+ paper_reference="Simonyan et al., 2014 - 'Deep Inside Convolutional Networks' (ICLR Workshop)",
566
+ complexity="O(forward_pass + backward_pass)",
567
+ requires_training_data=False,
568
+ supports_batching=True
569
+ )
570
+ )
571
+
554
572
  # =========================================================================
555
573
  # Global Explainers (model-level)
556
574
  # =========================================================================
@@ -10,6 +10,7 @@ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGra
10
10
  from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
11
11
  from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
12
12
  from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
13
+ from explainiverse.explainers.gradient.saliency import SaliencyExplainer
13
14
 
14
15
  __all__ = [
15
16
  "IntegratedGradientsExplainer",
@@ -17,4 +18,5 @@ __all__ = [
17
18
  "DeepLIFTExplainer",
18
19
  "DeepLIFTShapExplainer",
19
20
  "SmoothGradExplainer",
21
+ "SaliencyExplainer",
20
22
  ]
@@ -0,0 +1,293 @@
1
+ # src/explainiverse/explainers/gradient/saliency.py
2
+ """
3
+ Saliency Maps - Gradient-Based Feature Attribution.
4
+
5
+ Saliency Maps compute feature attributions using the gradient of the output
6
+ with respect to the input. This is one of the simplest and fastest gradient-based
7
+ attribution methods, requiring only a single forward and backward pass.
8
+
9
+ Key Properties:
10
+ - Simple: Just compute the gradient of output w.r.t. input
11
+ - Fast: Single forward + backward pass
12
+ - Foundation: Base method that other gradient methods build upon
13
+ - Variants: Absolute saliency, signed saliency, input × gradient
14
+
15
+ Variants:
16
+ - Saliency (absolute): |∂f(x)/∂x| - magnitude of sensitivity
17
+ - Saliency (signed): ∂f(x)/∂x - direction and magnitude
18
+ - Input × Gradient: x ⊙ ∂f(x)/∂x - scaled by input values
19
+
20
+ Reference:
21
+ Simonyan, K., Vedaldi, A., & Zisserman, A. (2014).
22
+ Deep Inside Convolutional Networks: Visualising Image Classification
23
+ Models and Saliency Maps.
24
+ ICLR Workshop 2014.
25
+ https://arxiv.org/abs/1312.6034
26
+
27
+ Example:
28
+ from explainiverse.explainers.gradient import SaliencyExplainer
29
+ from explainiverse.adapters import PyTorchAdapter
30
+
31
+ adapter = PyTorchAdapter(model, task="classification")
32
+
33
+ explainer = SaliencyExplainer(
34
+ model=adapter,
35
+ feature_names=feature_names
36
+ )
37
+
38
+ explanation = explainer.explain(instance)
39
+ """
40
+
41
+ import numpy as np
42
+ from typing import List, Optional
43
+
44
+ from explainiverse.core.explainer import BaseExplainer
45
+ from explainiverse.core.explanation import Explanation
46
+
47
+
48
+ class SaliencyExplainer(BaseExplainer):
49
+ """
50
+ Saliency Maps explainer for neural networks.
51
+
52
+ Computes attributions using the gradient of the model output with respect
53
+ to the input features. This is the simplest gradient-based attribution
54
+ method and serves as the foundation for more sophisticated techniques.
55
+
56
+ Algorithm:
57
+ Saliency(x) = ∂f(x)/∂x (signed)
58
+ Saliency(x) = |∂f(x)/∂x| (absolute, default)
59
+ InputTimesGradient(x) = x ⊙ ∂f(x)/∂x
60
+
61
+ Attributes:
62
+ model: Model adapter with predict_with_gradients() method
63
+ feature_names: List of feature names
64
+ class_names: List of class names (for classification)
65
+ absolute_value: Whether to take absolute value of gradients
66
+
67
+ Example:
68
+ >>> explainer = SaliencyExplainer(adapter, feature_names)
69
+ >>> explanation = explainer.explain(instance)
70
+ >>> print(explanation.explanation_data["feature_attributions"])
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ model,
76
+ feature_names: List[str],
77
+ class_names: Optional[List[str]] = None,
78
+ absolute_value: bool = True
79
+ ):
80
+ """
81
+ Initialize the Saliency explainer.
82
+
83
+ Args:
84
+ model: A model adapter with predict_with_gradients() method.
85
+ Use PyTorchAdapter for PyTorch models.
86
+ feature_names: List of input feature names.
87
+ class_names: List of class names (for classification tasks).
88
+ absolute_value: If True (default), return absolute value of
89
+ gradients. Set to False for signed saliency.
90
+
91
+ Raises:
92
+ TypeError: If model doesn't have predict_with_gradients method.
93
+ """
94
+ super().__init__(model)
95
+
96
+ # Validate model has gradient capability
97
+ if not hasattr(model, 'predict_with_gradients'):
98
+ raise TypeError(
99
+ "Model adapter must have predict_with_gradients() method. "
100
+ "Use PyTorchAdapter for PyTorch models."
101
+ )
102
+
103
+ self.feature_names = list(feature_names)
104
+ self.class_names = list(class_names) if class_names else None
105
+ self.absolute_value = absolute_value
106
+
107
+ def _compute_saliency(
108
+ self,
109
+ instance: np.ndarray,
110
+ target_class: Optional[int] = None,
111
+ method: str = "saliency"
112
+ ) -> np.ndarray:
113
+ """
114
+ Compute saliency attributions for a single instance.
115
+
116
+ Args:
117
+ instance: Input instance (1D array).
118
+ target_class: Target class for gradient computation.
119
+ method: Attribution method:
120
+ - "saliency": Raw gradient (default)
121
+ - "input_times_gradient": Gradient multiplied by input
122
+
123
+ Returns:
124
+ Array of attribution scores for each input feature.
125
+ """
126
+ instance = instance.flatten().astype(np.float32)
127
+
128
+ # Compute gradient
129
+ _, gradients = self.model.predict_with_gradients(
130
+ instance.reshape(1, -1),
131
+ target_class=target_class
132
+ )
133
+ gradients = gradients.flatten()
134
+
135
+ # Apply method
136
+ if method == "saliency":
137
+ attributions = gradients
138
+ elif method == "input_times_gradient":
139
+ attributions = instance * gradients
140
+ else:
141
+ raise ValueError(
142
+ f"Unknown method: '{method}'. "
143
+ f"Use 'saliency' or 'input_times_gradient'."
144
+ )
145
+
146
+ # Apply absolute value if configured
147
+ if self.absolute_value and method == "saliency":
148
+ attributions = np.abs(attributions)
149
+
150
+ return attributions
151
+
152
+ def explain(
153
+ self,
154
+ instance: np.ndarray,
155
+ target_class: Optional[int] = None,
156
+ method: str = "saliency"
157
+ ) -> Explanation:
158
+ """
159
+ Generate Saliency explanation for an instance.
160
+
161
+ Args:
162
+ instance: 1D numpy array of input features.
163
+ target_class: For classification, which class to explain.
164
+ If None, uses the predicted class.
165
+ method: Attribution method:
166
+ - "saliency": Gradient-based saliency (default)
167
+ - "input_times_gradient": Gradient × input
168
+
169
+ Returns:
170
+ Explanation object with feature attributions.
171
+
172
+ Example:
173
+ >>> explanation = explainer.explain(instance)
174
+ >>> print(explanation.explanation_data["feature_attributions"])
175
+ """
176
+ instance = np.array(instance).flatten().astype(np.float32)
177
+
178
+ # Determine target class if not specified
179
+ if target_class is None and self.class_names:
180
+ predictions = self.model.predict(instance.reshape(1, -1))
181
+ target_class = int(np.argmax(predictions))
182
+
183
+ # Compute saliency
184
+ attributions = self._compute_saliency(instance, target_class, method)
185
+
186
+ # Build attributions dict
187
+ attributions_dict = {
188
+ fname: float(attributions[i])
189
+ for i, fname in enumerate(self.feature_names)
190
+ }
191
+
192
+ # Determine explainer name based on method
193
+ if method == "saliency":
194
+ explainer_name = "Saliency"
195
+ elif method == "input_times_gradient":
196
+ explainer_name = "InputTimesGradient"
197
+ else:
198
+ explainer_name = f"Saliency_{method}"
199
+
200
+ # Determine class name
201
+ if self.class_names and target_class is not None:
202
+ label_name = self.class_names[target_class]
203
+ else:
204
+ label_name = f"class_{target_class}" if target_class is not None else "output"
205
+
206
+ explanation_data = {
207
+ "feature_attributions": attributions_dict,
208
+ "attributions_raw": attributions.tolist(),
209
+ "method": method,
210
+ "absolute_value": self.absolute_value if method == "saliency" else False
211
+ }
212
+
213
+ return Explanation(
214
+ explainer_name=explainer_name,
215
+ target_class=label_name,
216
+ explanation_data=explanation_data
217
+ )
218
+
219
+ def explain_batch(
220
+ self,
221
+ X: np.ndarray,
222
+ target_class: Optional[int] = None,
223
+ method: str = "saliency"
224
+ ) -> List[Explanation]:
225
+ """
226
+ Generate explanations for multiple instances.
227
+
228
+ Args:
229
+ X: 2D numpy array of instances (n_samples, n_features),
230
+ or 1D array for single instance.
231
+ target_class: Target class for all instances. If None,
232
+ uses predicted class for each instance.
233
+ method: Attribution method (see explain()).
234
+
235
+ Returns:
236
+ List of Explanation objects.
237
+
238
+ Example:
239
+ >>> explanations = explainer.explain_batch(X_test[:10])
240
+ >>> for exp in explanations:
241
+ ... print(exp.target_class)
242
+ """
243
+ X = np.array(X)
244
+ if X.ndim == 1:
245
+ X = X.reshape(1, -1)
246
+
247
+ return [
248
+ self.explain(X[i], target_class=target_class, method=method)
249
+ for i in range(X.shape[0])
250
+ ]
251
+
252
+ def compute_all_variants(
253
+ self,
254
+ instance: np.ndarray,
255
+ target_class: Optional[int] = None
256
+ ) -> dict:
257
+ """
258
+ Compute all saliency variants for comparison.
259
+
260
+ Useful for analyzing which variant provides the best explanation
261
+ for a given instance or model architecture.
262
+
263
+ Args:
264
+ instance: Input instance.
265
+ target_class: Target class for gradient computation.
266
+
267
+ Returns:
268
+ Dictionary containing:
269
+ - saliency_absolute: |∂f/∂x|
270
+ - saliency_signed: ∂f/∂x
271
+ - input_times_gradient: x ⊙ ∂f/∂x
272
+ """
273
+ instance = np.array(instance).flatten().astype(np.float32)
274
+
275
+ # Determine target class
276
+ if target_class is None and self.class_names:
277
+ predictions = self.model.predict(instance.reshape(1, -1))
278
+ target_class = int(np.argmax(predictions))
279
+
280
+ # Compute gradient (only once)
281
+ _, gradients = self.model.predict_with_gradients(
282
+ instance.reshape(1, -1),
283
+ target_class=target_class
284
+ )
285
+ gradients = gradients.flatten()
286
+
287
+ return {
288
+ "saliency_absolute": np.abs(gradients).tolist(),
289
+ "saliency_signed": gradients.tolist(),
290
+ "input_times_gradient": (instance * gradients).tolist(),
291
+ "feature_names": self.feature_names,
292
+ "target_class": target_class
293
+ }
File without changes