explainiverse 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {explainiverse-0.5.0 → explainiverse-0.6.0}/PKG-INFO +41 -6
- {explainiverse-0.5.0 → explainiverse-0.6.0}/README.md +40 -5
- {explainiverse-0.5.0 → explainiverse-0.6.0}/pyproject.toml +1 -1
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/__init__.py +1 -1
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/registry.py +18 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/__init__.py +2 -0
- explainiverse-0.6.0/src/explainiverse/explainers/gradient/saliency.py +293 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/LICENSE +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/base_adapter.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/pytorch_adapter.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/adapters/sklearn_adapter.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/explainer.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/core/explanation.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/engine/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/engine/suite.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/_utils.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/faithfulness.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/metrics.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/evaluation/stability.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/lime_wrapper.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/shap_wrapper.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/treeshap_wrapper.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/counterfactual/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/counterfactual/dice_wrapper.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/protodash.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/ale.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/partial_dependence.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/permutation_importance.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/sage.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/deeplift.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/gradcam.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/integrated_gradients.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/smoothgrad.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/rule_based/__init__.py +0 -0
- {explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/rule_based/anchors_wrapper.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: explainiverse
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
|
|
5
5
|
Home-page: https://github.com/jemsbhai/explainiverse
|
|
6
6
|
License: MIT
|
|
@@ -35,7 +35,7 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
[](https://www.python.org/downloads/)
|
|
36
36
|
[](https://opensource.org/licenses/MIT)
|
|
37
37
|
|
|
38
|
-
**Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **
|
|
38
|
+
**Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **16 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
|
|
39
39
|
|
|
40
40
|
---
|
|
41
41
|
|
|
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
|
|
|
43
43
|
|
|
44
44
|
| Feature | Description |
|
|
45
45
|
|---------|-------------|
|
|
46
|
-
| **
|
|
46
|
+
| **16 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
|
|
47
47
|
| **8 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation) and Stability (RIS, ROS, Lipschitz) |
|
|
48
48
|
| **Unified API** | Consistent `BaseExplainer` interface with standardized `Explanation` output |
|
|
49
49
|
| **Plugin Registry** | Filter explainers by scope, model type, data type; automatic recommendations |
|
|
@@ -64,6 +64,7 @@ Description-Content-Type: text/markdown
|
|
|
64
64
|
| **DeepLIFT** | Gradient | [Shrikumar et al., 2017](https://arxiv.org/abs/1704.02685) |
|
|
65
65
|
| **DeepSHAP** | Gradient + Shapley | [Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874) |
|
|
66
66
|
| **SmoothGrad** | Gradient | [Smilkov et al., 2017](https://arxiv.org/abs/1706.03825) |
|
|
67
|
+
| **Saliency Maps** | Gradient | [Simonyan et al., 2014](https://arxiv.org/abs/1312.6034) |
|
|
67
68
|
| **GradCAM / GradCAM++** | Gradient (CNN) | [Selvaraju et al., 2017](https://arxiv.org/abs/1610.02391) |
|
|
68
69
|
| **Anchors** | Rule-Based | [Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491) |
|
|
69
70
|
| **Counterfactual** | Contrastive | [Mothilal et al., 2020](https://arxiv.org/abs/1905.07697) |
|
|
@@ -233,6 +234,41 @@ deepshap = DeepLIFTShapExplainer(
|
|
|
233
234
|
explanation = deepshap.explain(X[0])
|
|
234
235
|
```
|
|
235
236
|
|
|
237
|
+
### Saliency Maps
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from explainiverse.explainers.gradient import SaliencyExplainer
|
|
241
|
+
|
|
242
|
+
# Saliency Maps - simplest and fastest gradient method
|
|
243
|
+
explainer = SaliencyExplainer(
|
|
244
|
+
model=adapter,
|
|
245
|
+
feature_names=feature_names,
|
|
246
|
+
class_names=class_names,
|
|
247
|
+
absolute_value=True # Default: absolute gradient magnitudes
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Standard saliency (absolute gradients)
|
|
251
|
+
explanation = explainer.explain(X[0], method="saliency")
|
|
252
|
+
|
|
253
|
+
# Input × Gradient (gradient scaled by input values)
|
|
254
|
+
explanation = explainer.explain(X[0], method="input_times_gradient")
|
|
255
|
+
|
|
256
|
+
# Signed saliency (keep gradient direction)
|
|
257
|
+
explainer_signed = SaliencyExplainer(
|
|
258
|
+
model=adapter,
|
|
259
|
+
feature_names=feature_names,
|
|
260
|
+
class_names=class_names,
|
|
261
|
+
absolute_value=False
|
|
262
|
+
)
|
|
263
|
+
explanation = explainer_signed.explain(X[0])
|
|
264
|
+
|
|
265
|
+
# Compare all variants
|
|
266
|
+
variants = explainer.compute_all_variants(X[0])
|
|
267
|
+
print(variants["saliency_absolute"])
|
|
268
|
+
print(variants["saliency_signed"])
|
|
269
|
+
print(variants["input_times_gradient"])
|
|
270
|
+
```
|
|
271
|
+
|
|
236
272
|
### SmoothGrad
|
|
237
273
|
|
|
238
274
|
```python
|
|
@@ -552,7 +588,7 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
|
|
|
552
588
|
### Completed ✅
|
|
553
589
|
- [x] Core framework (BaseExplainer, Explanation, Registry)
|
|
554
590
|
- [x] Perturbation methods: LIME, KernelSHAP, TreeSHAP
|
|
555
|
-
- [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++
|
|
591
|
+
- [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++
|
|
556
592
|
- [x] Rule-based: Anchors
|
|
557
593
|
- [x] Counterfactual: DiCE-style
|
|
558
594
|
- [x] Global: Permutation Importance, PDP, ALE, SAGE
|
|
@@ -562,7 +598,6 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
|
|
|
562
598
|
- [x] PyTorch adapter with gradient support
|
|
563
599
|
|
|
564
600
|
### In Progress 🚧
|
|
565
|
-
- [ ] Saliency Maps (vanilla gradients)
|
|
566
601
|
- [ ] TCAV (Testing with Concept Activation Vectors)
|
|
567
602
|
- [ ] Layer-wise Relevance Propagation (LRP)
|
|
568
603
|
|
|
@@ -585,7 +620,7 @@ If you use Explainiverse in your research, please cite:
|
|
|
585
620
|
author = {Syed, Muntaser},
|
|
586
621
|
year = {2025},
|
|
587
622
|
url = {https://github.com/jemsbhai/explainiverse},
|
|
588
|
-
version = {0.
|
|
623
|
+
version = {0.6.0}
|
|
589
624
|
}
|
|
590
625
|
```
|
|
591
626
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
[](https://www.python.org/downloads/)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
|
-
**Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **
|
|
7
|
+
**Explainiverse** is a unified, extensible Python framework for Explainable AI (XAI). It provides a standardized interface for **16 state-of-the-art explanation methods** across local, global, gradient-based, and example-based paradigms, along with **comprehensive evaluation metrics** for assessing explanation quality.
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
| Feature | Description |
|
|
14
14
|
|---------|-------------|
|
|
15
|
-
| **
|
|
15
|
+
| **16 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
|
|
16
16
|
| **8 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation) and Stability (RIS, ROS, Lipschitz) |
|
|
17
17
|
| **Unified API** | Consistent `BaseExplainer` interface with standardized `Explanation` output |
|
|
18
18
|
| **Plugin Registry** | Filter explainers by scope, model type, data type; automatic recommendations |
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
| **DeepLIFT** | Gradient | [Shrikumar et al., 2017](https://arxiv.org/abs/1704.02685) |
|
|
34
34
|
| **DeepSHAP** | Gradient + Shapley | [Lundberg & Lee, 2017](https://arxiv.org/abs/1705.07874) |
|
|
35
35
|
| **SmoothGrad** | Gradient | [Smilkov et al., 2017](https://arxiv.org/abs/1706.03825) |
|
|
36
|
+
| **Saliency Maps** | Gradient | [Simonyan et al., 2014](https://arxiv.org/abs/1312.6034) |
|
|
36
37
|
| **GradCAM / GradCAM++** | Gradient (CNN) | [Selvaraju et al., 2017](https://arxiv.org/abs/1610.02391) |
|
|
37
38
|
| **Anchors** | Rule-Based | [Ribeiro et al., 2018](https://ojs.aaai.org/index.php/AAAI/article/view/11491) |
|
|
38
39
|
| **Counterfactual** | Contrastive | [Mothilal et al., 2020](https://arxiv.org/abs/1905.07697) |
|
|
@@ -202,6 +203,41 @@ deepshap = DeepLIFTShapExplainer(
|
|
|
202
203
|
explanation = deepshap.explain(X[0])
|
|
203
204
|
```
|
|
204
205
|
|
|
206
|
+
### Saliency Maps
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from explainiverse.explainers.gradient import SaliencyExplainer
|
|
210
|
+
|
|
211
|
+
# Saliency Maps - simplest and fastest gradient method
|
|
212
|
+
explainer = SaliencyExplainer(
|
|
213
|
+
model=adapter,
|
|
214
|
+
feature_names=feature_names,
|
|
215
|
+
class_names=class_names,
|
|
216
|
+
absolute_value=True # Default: absolute gradient magnitudes
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Standard saliency (absolute gradients)
|
|
220
|
+
explanation = explainer.explain(X[0], method="saliency")
|
|
221
|
+
|
|
222
|
+
# Input × Gradient (gradient scaled by input values)
|
|
223
|
+
explanation = explainer.explain(X[0], method="input_times_gradient")
|
|
224
|
+
|
|
225
|
+
# Signed saliency (keep gradient direction)
|
|
226
|
+
explainer_signed = SaliencyExplainer(
|
|
227
|
+
model=adapter,
|
|
228
|
+
feature_names=feature_names,
|
|
229
|
+
class_names=class_names,
|
|
230
|
+
absolute_value=False
|
|
231
|
+
)
|
|
232
|
+
explanation = explainer_signed.explain(X[0])
|
|
233
|
+
|
|
234
|
+
# Compare all variants
|
|
235
|
+
variants = explainer.compute_all_variants(X[0])
|
|
236
|
+
print(variants["saliency_absolute"])
|
|
237
|
+
print(variants["saliency_signed"])
|
|
238
|
+
print(variants["input_times_gradient"])
|
|
239
|
+
```
|
|
240
|
+
|
|
205
241
|
### SmoothGrad
|
|
206
242
|
|
|
207
243
|
```python
|
|
@@ -521,7 +557,7 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
|
|
|
521
557
|
### Completed ✅
|
|
522
558
|
- [x] Core framework (BaseExplainer, Explanation, Registry)
|
|
523
559
|
- [x] Perturbation methods: LIME, KernelSHAP, TreeSHAP
|
|
524
|
-
- [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, GradCAM/GradCAM++
|
|
560
|
+
- [x] Gradient methods: Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++
|
|
525
561
|
- [x] Rule-based: Anchors
|
|
526
562
|
- [x] Counterfactual: DiCE-style
|
|
527
563
|
- [x] Global: Permutation Importance, PDP, ALE, SAGE
|
|
@@ -531,7 +567,6 @@ poetry run pytest tests/test_smoothgrad.py::TestSmoothGradBasic -v
|
|
|
531
567
|
- [x] PyTorch adapter with gradient support
|
|
532
568
|
|
|
533
569
|
### In Progress 🚧
|
|
534
|
-
- [ ] Saliency Maps (vanilla gradients)
|
|
535
570
|
- [ ] TCAV (Testing with Concept Activation Vectors)
|
|
536
571
|
- [ ] Layer-wise Relevance Propagation (LRP)
|
|
537
572
|
|
|
@@ -554,7 +589,7 @@ If you use Explainiverse in your research, please cite:
|
|
|
554
589
|
author = {Syed, Muntaser},
|
|
555
590
|
year = {2025},
|
|
556
591
|
url = {https://github.com/jemsbhai/explainiverse},
|
|
557
|
-
version = {0.
|
|
592
|
+
version = {0.6.0}
|
|
558
593
|
}
|
|
559
594
|
```
|
|
560
595
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "explainiverse"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.0"
|
|
4
4
|
description = "Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more"
|
|
5
5
|
authors = ["Muntaser Syed <jemsbhai@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -373,6 +373,7 @@ def _create_default_registry() -> ExplainerRegistry:
|
|
|
373
373
|
from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
|
|
374
374
|
from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
|
|
375
375
|
from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
|
|
376
|
+
from explainiverse.explainers.gradient.saliency import SaliencyExplainer
|
|
376
377
|
from explainiverse.explainers.example_based.protodash import ProtoDashExplainer
|
|
377
378
|
|
|
378
379
|
registry = ExplainerRegistry()
|
|
@@ -551,6 +552,23 @@ def _create_default_registry() -> ExplainerRegistry:
|
|
|
551
552
|
)
|
|
552
553
|
)
|
|
553
554
|
|
|
555
|
+
# Register Saliency Maps (for neural networks)
|
|
556
|
+
registry.register(
|
|
557
|
+
name="saliency",
|
|
558
|
+
explainer_class=SaliencyExplainer,
|
|
559
|
+
meta=ExplainerMeta(
|
|
560
|
+
scope="local",
|
|
561
|
+
model_types=["neural"],
|
|
562
|
+
data_types=["tabular", "image"],
|
|
563
|
+
task_types=["classification", "regression"],
|
|
564
|
+
description="Saliency Maps - gradient-based feature attribution (requires PyTorch)",
|
|
565
|
+
paper_reference="Simonyan et al., 2014 - 'Deep Inside Convolutional Networks' (ICLR Workshop)",
|
|
566
|
+
complexity="O(forward_pass + backward_pass)",
|
|
567
|
+
requires_training_data=False,
|
|
568
|
+
supports_batching=True
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
|
|
554
572
|
# =========================================================================
|
|
555
573
|
# Global Explainers (model-level)
|
|
556
574
|
# =========================================================================
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/__init__.py
RENAMED
|
@@ -10,6 +10,7 @@ from explainiverse.explainers.gradient.integrated_gradients import IntegratedGra
|
|
|
10
10
|
from explainiverse.explainers.gradient.gradcam import GradCAMExplainer
|
|
11
11
|
from explainiverse.explainers.gradient.deeplift import DeepLIFTExplainer, DeepLIFTShapExplainer
|
|
12
12
|
from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
|
|
13
|
+
from explainiverse.explainers.gradient.saliency import SaliencyExplainer
|
|
13
14
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"IntegratedGradientsExplainer",
|
|
@@ -17,4 +18,5 @@ __all__ = [
|
|
|
17
18
|
"DeepLIFTExplainer",
|
|
18
19
|
"DeepLIFTShapExplainer",
|
|
19
20
|
"SmoothGradExplainer",
|
|
21
|
+
"SaliencyExplainer",
|
|
20
22
|
]
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# src/explainiverse/explainers/gradient/saliency.py
|
|
2
|
+
"""
|
|
3
|
+
Saliency Maps - Gradient-Based Feature Attribution.
|
|
4
|
+
|
|
5
|
+
Saliency Maps compute feature attributions using the gradient of the output
|
|
6
|
+
with respect to the input. This is one of the simplest and fastest gradient-based
|
|
7
|
+
attribution methods, requiring only a single forward and backward pass.
|
|
8
|
+
|
|
9
|
+
Key Properties:
|
|
10
|
+
- Simple: Just compute the gradient of output w.r.t. input
|
|
11
|
+
- Fast: Single forward + backward pass
|
|
12
|
+
- Foundation: Base method that other gradient methods build upon
|
|
13
|
+
- Variants: Absolute saliency, signed saliency, input × gradient
|
|
14
|
+
|
|
15
|
+
Variants:
|
|
16
|
+
- Saliency (absolute): |∂f(x)/∂x| - magnitude of sensitivity
|
|
17
|
+
- Saliency (signed): ∂f(x)/∂x - direction and magnitude
|
|
18
|
+
- Input × Gradient: x ⊙ ∂f(x)/∂x - scaled by input values
|
|
19
|
+
|
|
20
|
+
Reference:
|
|
21
|
+
Simonyan, K., Vedaldi, A., & Zisserman, A. (2014).
|
|
22
|
+
Deep Inside Convolutional Networks: Visualising Image Classification
|
|
23
|
+
Models and Saliency Maps.
|
|
24
|
+
ICLR Workshop 2014.
|
|
25
|
+
https://arxiv.org/abs/1312.6034
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
from explainiverse.explainers.gradient import SaliencyExplainer
|
|
29
|
+
from explainiverse.adapters import PyTorchAdapter
|
|
30
|
+
|
|
31
|
+
adapter = PyTorchAdapter(model, task="classification")
|
|
32
|
+
|
|
33
|
+
explainer = SaliencyExplainer(
|
|
34
|
+
model=adapter,
|
|
35
|
+
feature_names=feature_names
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
explanation = explainer.explain(instance)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import numpy as np
|
|
42
|
+
from typing import List, Optional
|
|
43
|
+
|
|
44
|
+
from explainiverse.core.explainer import BaseExplainer
|
|
45
|
+
from explainiverse.core.explanation import Explanation
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SaliencyExplainer(BaseExplainer):
|
|
49
|
+
"""
|
|
50
|
+
Saliency Maps explainer for neural networks.
|
|
51
|
+
|
|
52
|
+
Computes attributions using the gradient of the model output with respect
|
|
53
|
+
to the input features. This is the simplest gradient-based attribution
|
|
54
|
+
method and serves as the foundation for more sophisticated techniques.
|
|
55
|
+
|
|
56
|
+
Algorithm:
|
|
57
|
+
Saliency(x) = ∂f(x)/∂x (signed)
|
|
58
|
+
Saliency(x) = |∂f(x)/∂x| (absolute, default)
|
|
59
|
+
InputTimesGradient(x) = x ⊙ ∂f(x)/∂x
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
model: Model adapter with predict_with_gradients() method
|
|
63
|
+
feature_names: List of feature names
|
|
64
|
+
class_names: List of class names (for classification)
|
|
65
|
+
absolute_value: Whether to take absolute value of gradients
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
>>> explainer = SaliencyExplainer(adapter, feature_names)
|
|
69
|
+
>>> explanation = explainer.explain(instance)
|
|
70
|
+
>>> print(explanation.explanation_data["feature_attributions"])
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
model,
|
|
76
|
+
feature_names: List[str],
|
|
77
|
+
class_names: Optional[List[str]] = None,
|
|
78
|
+
absolute_value: bool = True
|
|
79
|
+
):
|
|
80
|
+
"""
|
|
81
|
+
Initialize the Saliency explainer.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model: A model adapter with predict_with_gradients() method.
|
|
85
|
+
Use PyTorchAdapter for PyTorch models.
|
|
86
|
+
feature_names: List of input feature names.
|
|
87
|
+
class_names: List of class names (for classification tasks).
|
|
88
|
+
absolute_value: If True (default), return absolute value of
|
|
89
|
+
gradients. Set to False for signed saliency.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
TypeError: If model doesn't have predict_with_gradients method.
|
|
93
|
+
"""
|
|
94
|
+
super().__init__(model)
|
|
95
|
+
|
|
96
|
+
# Validate model has gradient capability
|
|
97
|
+
if not hasattr(model, 'predict_with_gradients'):
|
|
98
|
+
raise TypeError(
|
|
99
|
+
"Model adapter must have predict_with_gradients() method. "
|
|
100
|
+
"Use PyTorchAdapter for PyTorch models."
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self.feature_names = list(feature_names)
|
|
104
|
+
self.class_names = list(class_names) if class_names else None
|
|
105
|
+
self.absolute_value = absolute_value
|
|
106
|
+
|
|
107
|
+
def _compute_saliency(
|
|
108
|
+
self,
|
|
109
|
+
instance: np.ndarray,
|
|
110
|
+
target_class: Optional[int] = None,
|
|
111
|
+
method: str = "saliency"
|
|
112
|
+
) -> np.ndarray:
|
|
113
|
+
"""
|
|
114
|
+
Compute saliency attributions for a single instance.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
instance: Input instance (1D array).
|
|
118
|
+
target_class: Target class for gradient computation.
|
|
119
|
+
method: Attribution method:
|
|
120
|
+
- "saliency": Raw gradient (default)
|
|
121
|
+
- "input_times_gradient": Gradient multiplied by input
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Array of attribution scores for each input feature.
|
|
125
|
+
"""
|
|
126
|
+
instance = instance.flatten().astype(np.float32)
|
|
127
|
+
|
|
128
|
+
# Compute gradient
|
|
129
|
+
_, gradients = self.model.predict_with_gradients(
|
|
130
|
+
instance.reshape(1, -1),
|
|
131
|
+
target_class=target_class
|
|
132
|
+
)
|
|
133
|
+
gradients = gradients.flatten()
|
|
134
|
+
|
|
135
|
+
# Apply method
|
|
136
|
+
if method == "saliency":
|
|
137
|
+
attributions = gradients
|
|
138
|
+
elif method == "input_times_gradient":
|
|
139
|
+
attributions = instance * gradients
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Unknown method: '{method}'. "
|
|
143
|
+
f"Use 'saliency' or 'input_times_gradient'."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Apply absolute value if configured
|
|
147
|
+
if self.absolute_value and method == "saliency":
|
|
148
|
+
attributions = np.abs(attributions)
|
|
149
|
+
|
|
150
|
+
return attributions
|
|
151
|
+
|
|
152
|
+
def explain(
|
|
153
|
+
self,
|
|
154
|
+
instance: np.ndarray,
|
|
155
|
+
target_class: Optional[int] = None,
|
|
156
|
+
method: str = "saliency"
|
|
157
|
+
) -> Explanation:
|
|
158
|
+
"""
|
|
159
|
+
Generate Saliency explanation for an instance.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
instance: 1D numpy array of input features.
|
|
163
|
+
target_class: For classification, which class to explain.
|
|
164
|
+
If None, uses the predicted class.
|
|
165
|
+
method: Attribution method:
|
|
166
|
+
- "saliency": Gradient-based saliency (default)
|
|
167
|
+
- "input_times_gradient": Gradient × input
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Explanation object with feature attributions.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> explanation = explainer.explain(instance)
|
|
174
|
+
>>> print(explanation.explanation_data["feature_attributions"])
|
|
175
|
+
"""
|
|
176
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
177
|
+
|
|
178
|
+
# Determine target class if not specified
|
|
179
|
+
if target_class is None and self.class_names:
|
|
180
|
+
predictions = self.model.predict(instance.reshape(1, -1))
|
|
181
|
+
target_class = int(np.argmax(predictions))
|
|
182
|
+
|
|
183
|
+
# Compute saliency
|
|
184
|
+
attributions = self._compute_saliency(instance, target_class, method)
|
|
185
|
+
|
|
186
|
+
# Build attributions dict
|
|
187
|
+
attributions_dict = {
|
|
188
|
+
fname: float(attributions[i])
|
|
189
|
+
for i, fname in enumerate(self.feature_names)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
# Determine explainer name based on method
|
|
193
|
+
if method == "saliency":
|
|
194
|
+
explainer_name = "Saliency"
|
|
195
|
+
elif method == "input_times_gradient":
|
|
196
|
+
explainer_name = "InputTimesGradient"
|
|
197
|
+
else:
|
|
198
|
+
explainer_name = f"Saliency_{method}"
|
|
199
|
+
|
|
200
|
+
# Determine class name
|
|
201
|
+
if self.class_names and target_class is not None:
|
|
202
|
+
label_name = self.class_names[target_class]
|
|
203
|
+
else:
|
|
204
|
+
label_name = f"class_{target_class}" if target_class is not None else "output"
|
|
205
|
+
|
|
206
|
+
explanation_data = {
|
|
207
|
+
"feature_attributions": attributions_dict,
|
|
208
|
+
"attributions_raw": attributions.tolist(),
|
|
209
|
+
"method": method,
|
|
210
|
+
"absolute_value": self.absolute_value if method == "saliency" else False
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return Explanation(
|
|
214
|
+
explainer_name=explainer_name,
|
|
215
|
+
target_class=label_name,
|
|
216
|
+
explanation_data=explanation_data
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def explain_batch(
|
|
220
|
+
self,
|
|
221
|
+
X: np.ndarray,
|
|
222
|
+
target_class: Optional[int] = None,
|
|
223
|
+
method: str = "saliency"
|
|
224
|
+
) -> List[Explanation]:
|
|
225
|
+
"""
|
|
226
|
+
Generate explanations for multiple instances.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
X: 2D numpy array of instances (n_samples, n_features),
|
|
230
|
+
or 1D array for single instance.
|
|
231
|
+
target_class: Target class for all instances. If None,
|
|
232
|
+
uses predicted class for each instance.
|
|
233
|
+
method: Attribution method (see explain()).
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of Explanation objects.
|
|
237
|
+
|
|
238
|
+
Example:
|
|
239
|
+
>>> explanations = explainer.explain_batch(X_test[:10])
|
|
240
|
+
>>> for exp in explanations:
|
|
241
|
+
... print(exp.target_class)
|
|
242
|
+
"""
|
|
243
|
+
X = np.array(X)
|
|
244
|
+
if X.ndim == 1:
|
|
245
|
+
X = X.reshape(1, -1)
|
|
246
|
+
|
|
247
|
+
return [
|
|
248
|
+
self.explain(X[i], target_class=target_class, method=method)
|
|
249
|
+
for i in range(X.shape[0])
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
def compute_all_variants(
|
|
253
|
+
self,
|
|
254
|
+
instance: np.ndarray,
|
|
255
|
+
target_class: Optional[int] = None
|
|
256
|
+
) -> dict:
|
|
257
|
+
"""
|
|
258
|
+
Compute all saliency variants for comparison.
|
|
259
|
+
|
|
260
|
+
Useful for analyzing which variant provides the best explanation
|
|
261
|
+
for a given instance or model architecture.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
instance: Input instance.
|
|
265
|
+
target_class: Target class for gradient computation.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Dictionary containing:
|
|
269
|
+
- saliency_absolute: |∂f/∂x|
|
|
270
|
+
- saliency_signed: ∂f/∂x
|
|
271
|
+
- input_times_gradient: x ⊙ ∂f/∂x
|
|
272
|
+
"""
|
|
273
|
+
instance = np.array(instance).flatten().astype(np.float32)
|
|
274
|
+
|
|
275
|
+
# Determine target class
|
|
276
|
+
if target_class is None and self.class_names:
|
|
277
|
+
predictions = self.model.predict(instance.reshape(1, -1))
|
|
278
|
+
target_class = int(np.argmax(predictions))
|
|
279
|
+
|
|
280
|
+
# Compute gradient (only once)
|
|
281
|
+
_, gradients = self.model.predict_with_gradients(
|
|
282
|
+
instance.reshape(1, -1),
|
|
283
|
+
target_class=target_class
|
|
284
|
+
)
|
|
285
|
+
gradients = gradients.flatten()
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"saliency_absolute": np.abs(gradients).tolist(),
|
|
289
|
+
"saliency_signed": gradients.tolist(),
|
|
290
|
+
"input_times_gradient": (instance * gradients).tolist(),
|
|
291
|
+
"feature_names": self.feature_names,
|
|
292
|
+
"target_class": target_class
|
|
293
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/__init__.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/lime_wrapper.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/attribution/shap_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/counterfactual/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/__init__.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/example_based/protodash.py
RENAMED
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/ale.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/global_explainers/sage.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/deeplift.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/gradcam.py
RENAMED
|
File without changes
|
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/gradient/smoothgrad.py
RENAMED
|
File without changes
|
{explainiverse-0.5.0 → explainiverse-0.6.0}/src/explainiverse/explainers/rule_based/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|