explainiverse 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
explainiverse/__init__.py CHANGED
@@ -34,7 +34,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
34
34
  from explainiverse.adapters import TORCH_AVAILABLE
35
35
  from explainiverse.engine.suite import ExplanationSuite
36
36
 
37
- __version__ = "0.8.2"
37
+ __version__ = "0.8.4"
38
38
 
39
39
  __all__ = [
40
40
  # Core
@@ -41,6 +41,10 @@ from explainiverse.evaluation.faithfulness_extended import (
41
41
  compute_batch_faithfulness_estimate,
42
42
  compute_monotonicity,
43
43
  compute_batch_monotonicity,
44
+ compute_monotonicity_nguyen,
45
+ compute_batch_monotonicity_nguyen,
46
+ compute_pixel_flipping,
47
+ compute_batch_pixel_flipping,
44
48
  )
45
49
 
46
50
  __all__ = [
@@ -70,4 +74,8 @@ __all__ = [
70
74
  "compute_batch_faithfulness_estimate",
71
75
  "compute_monotonicity",
72
76
  "compute_batch_monotonicity",
77
+ "compute_monotonicity_nguyen",
78
+ "compute_batch_monotonicity_nguyen",
79
+ "compute_pixel_flipping",
80
+ "compute_batch_pixel_flipping",
73
81
  ]
@@ -256,6 +256,348 @@ def compute_batch_faithfulness_estimate(
256
256
  }
257
257
 
258
258
 
259
+ # =============================================================================
260
+ # Metric 4: Pixel Flipping (Bach et al., 2015)
261
+ # =============================================================================
262
+
263
+ def compute_pixel_flipping(
264
+ model,
265
+ instance: np.ndarray,
266
+ explanation: Explanation,
267
+ baseline: Union[str, float, np.ndarray, Callable] = "mean",
268
+ background_data: np.ndarray = None,
269
+ target_class: int = None,
270
+ use_absolute: bool = True,
271
+ return_curve: bool = False,
272
+ ) -> Union[float, Dict[str, Union[float, np.ndarray]]]:
273
+ """
274
+ Compute Pixel Flipping score (Bach et al., 2015).
275
+
276
+ Sequentially removes features in order of attributed importance (most
277
+ important first) and measures the cumulative prediction degradation.
278
+ A faithful explanation should cause rapid prediction drop when the
279
+ most important features are removed first.
280
+
281
+ The score is the Area Under the perturbation Curve (AUC), normalized
282
+ to [0, 1]. Lower AUC indicates better faithfulness (faster degradation).
283
+
284
+ Args:
285
+ model: Model adapter with predict/predict_proba method
286
+ instance: Input instance (1D array)
287
+ explanation: Explanation object with feature_attributions
288
+ baseline: Baseline for feature removal ("mean", "median", scalar, array, callable)
289
+ background_data: Reference data for computing baseline (required for "mean"/"median")
290
+ target_class: Target class index for probability (default: predicted class)
291
+ use_absolute: If True, sort features by absolute attribution value
292
+ return_curve: If True, return full degradation curve and predictions
293
+
294
+ Returns:
295
+ If return_curve=False: AUC score (float, 0 to 1, lower is better)
296
+ If return_curve=True: Dictionary with 'auc', 'curve', 'predictions', 'feature_order'
297
+
298
+ References:
299
+ Bach, S., et al. (2015). On Pixel-Wise Explanations for Non-Linear
300
+ Classifier Decisions by Layer-Wise Relevance Propagation. PLOS ONE.
301
+ """
302
+ instance = np.asarray(instance).flatten()
303
+ n_features = len(instance)
304
+
305
+ # Get baseline values
306
+ baseline_values = compute_baseline_values(
307
+ baseline, background_data, n_features
308
+ )
309
+
310
+ # Extract attributions as array
311
+ attr_array = _extract_attribution_array(explanation, n_features)
312
+
313
+ # Sort features by attribution (descending - most important first)
314
+ if use_absolute:
315
+ sorted_indices = np.argsort(-np.abs(attr_array))
316
+ else:
317
+ sorted_indices = np.argsort(-attr_array)
318
+
319
+ # Determine target class
320
+ if target_class is None:
321
+ pred = get_prediction_value(model, instance.reshape(1, -1))
322
+ if isinstance(pred, np.ndarray) and pred.ndim > 0:
323
+ target_class = int(np.argmax(pred))
324
+ else:
325
+ target_class = 0
326
+
327
+ # Get original prediction for the target class
328
+ original_pred = get_prediction_value(model, instance.reshape(1, -1))
329
+ if isinstance(original_pred, np.ndarray) and original_pred.ndim > 0 and len(original_pred) > target_class:
330
+ original_value = original_pred[target_class]
331
+ else:
332
+ original_value = float(original_pred)
333
+
334
+ # Start with original instance
335
+ current = instance.copy()
336
+
337
+ # Track predictions as features are removed
338
+ predictions = [original_value]
339
+
340
+ # Remove features one by one (most important first)
341
+ for idx in sorted_indices:
342
+ # Remove this feature (replace with baseline)
343
+ current[idx] = baseline_values[idx]
344
+
345
+ # Get prediction
346
+ pred = get_prediction_value(model, current.reshape(1, -1))
347
+ if isinstance(pred, np.ndarray) and pred.ndim > 0 and len(pred) > target_class:
348
+ predictions.append(pred[target_class])
349
+ else:
350
+ predictions.append(float(pred))
351
+
352
+ predictions = np.array(predictions)
353
+
354
+ # Normalize predictions to [0, 1] relative to original
355
+ # curve[i] = prediction after removing i features / original prediction
356
+ if abs(original_value) > 1e-10:
357
+ curve = predictions / original_value
358
+ else:
359
+ # Handle zero original prediction
360
+ curve = predictions
361
+
362
+ # Compute AUC using trapezoidal rule
363
+ # x-axis: fraction of features removed (0 to 1)
364
+ # y-axis: relative prediction value
365
+ x = np.linspace(0, 1, len(predictions))
366
+ auc = np.trapz(curve, x)
367
+
368
+ if return_curve:
369
+ return {
370
+ "auc": float(auc),
371
+ "curve": curve,
372
+ "predictions": predictions,
373
+ "feature_order": sorted_indices,
374
+ "n_features": n_features,
375
+ }
376
+
377
+ return float(auc)
378
+
379
+
380
+ def compute_batch_pixel_flipping(
381
+ model,
382
+ X: np.ndarray,
383
+ explanations: List[Explanation],
384
+ baseline: Union[str, float, np.ndarray, Callable] = "mean",
385
+ max_samples: int = None,
386
+ use_absolute: bool = True,
387
+ ) -> Dict[str, float]:
388
+ """
389
+ Compute average Pixel Flipping score over a batch of instances.
390
+
391
+ Args:
392
+ model: Model adapter
393
+ X: Input data (2D array)
394
+ explanations: List of Explanation objects (one per instance)
395
+ baseline: Baseline for feature removal
396
+ max_samples: Maximum number of samples to evaluate
397
+ use_absolute: If True, sort features by absolute attribution value
398
+
399
+ Returns:
400
+ Dictionary with mean, std, min, max, and count of valid scores
401
+ """
402
+ n_samples = len(explanations)
403
+ if max_samples:
404
+ n_samples = min(n_samples, max_samples)
405
+
406
+ scores = []
407
+
408
+ for i in range(n_samples):
409
+ try:
410
+ score = compute_pixel_flipping(
411
+ model, X[i], explanations[i],
412
+ baseline=baseline, background_data=X,
413
+ use_absolute=use_absolute
414
+ )
415
+ if not np.isnan(score):
416
+ scores.append(score)
417
+ except Exception:
418
+ continue
419
+
420
+ if not scores:
421
+ return {"mean": 0.0, "std": 0.0, "min": 0.0, "max": 0.0, "n_samples": 0}
422
+
423
+ return {
424
+ "mean": float(np.mean(scores)),
425
+ "std": float(np.std(scores)),
426
+ "min": float(np.min(scores)),
427
+ "max": float(np.max(scores)),
428
+ "n_samples": len(scores),
429
+ }
430
+
431
+
432
+ # =============================================================================
433
+ # Metric 3: Monotonicity-Nguyen (Nguyen et al., 2020)
434
+ # =============================================================================
435
+
436
+ def compute_monotonicity_nguyen(
437
+ model,
438
+ instance: np.ndarray,
439
+ explanation: Explanation,
440
+ baseline: Union[str, float, np.ndarray, Callable] = "mean",
441
+ background_data: np.ndarray = None,
442
+ target_class: int = None,
443
+ use_absolute: bool = True,
444
+ ) -> float:
445
+ """
446
+ Compute Monotonicity Correlation (Nguyen et al., 2020).
447
+
448
+ Measures the Spearman rank correlation between attribution magnitudes
449
+ and the prediction changes when each feature is individually removed
450
+ (replaced with baseline). A faithful explanation should show that
451
+ features with higher attributions cause larger prediction changes
452
+ when removed.
453
+
454
+ Unlike Arya's Monotonicity (sequential feature addition), this metric
455
+ evaluates each feature independently and uses rank correlation to
456
+ measure agreement between attributed importance and actual impact.
457
+
458
+ Args:
459
+ model: Model adapter with predict/predict_proba method
460
+ instance: Input instance (1D array)
461
+ explanation: Explanation object with feature_attributions
462
+ baseline: Baseline for feature removal ("mean", "median", scalar, array, callable)
463
+ background_data: Reference data for computing baseline (required for "mean"/"median")
464
+ target_class: Target class index for probability (default: predicted class)
465
+ use_absolute: If True, use absolute attribution values (default: True)
466
+
467
+ Returns:
468
+ Monotonicity correlation score (Spearman rho, -1 to 1, higher is better)
469
+
470
+ References:
471
+ Nguyen, A. P., & Martinez, M. R. (2020). Quantitative Evaluation of
472
+ Machine Learning Explanations: A Human-Grounded Benchmark.
473
+ arXiv:2010.07455.
474
+ """
475
+ instance = np.asarray(instance).flatten()
476
+ n_features = len(instance)
477
+
478
+ # Get baseline values
479
+ baseline_values = compute_baseline_values(
480
+ baseline, background_data, n_features
481
+ )
482
+
483
+ # Extract attributions as array
484
+ attr_array = _extract_attribution_array(explanation, n_features)
485
+
486
+ # Determine target class
487
+ if target_class is None:
488
+ pred = get_prediction_value(model, instance.reshape(1, -1))
489
+ if isinstance(pred, np.ndarray) and pred.ndim > 0:
490
+ target_class = int(np.argmax(pred))
491
+ else:
492
+ target_class = 0
493
+
494
+ # Get original prediction for the target class
495
+ original_pred = get_prediction_value(model, instance.reshape(1, -1))
496
+ if isinstance(original_pred, np.ndarray) and original_pred.ndim > 0 and len(original_pred) > target_class:
497
+ original_value = original_pred[target_class]
498
+ else:
499
+ original_value = float(original_pred)
500
+
501
+ # Compute prediction change for each feature when removed
502
+ prediction_changes = []
503
+ attribution_values = []
504
+
505
+ for i in range(n_features):
506
+ # Create perturbed instance with feature i replaced by baseline
507
+ perturbed = instance.copy()
508
+ perturbed[i] = baseline_values[i]
509
+
510
+ # Get prediction for perturbed instance
511
+ perturbed_pred = get_prediction_value(model, perturbed.reshape(1, -1))
512
+ if isinstance(perturbed_pred, np.ndarray) and perturbed_pred.ndim > 0 and len(perturbed_pred) > target_class:
513
+ perturbed_value = perturbed_pred[target_class]
514
+ else:
515
+ perturbed_value = float(perturbed_pred)
516
+
517
+ # Prediction change (drop in confidence when feature is removed)
518
+ # Positive change means removing the feature decreased prediction
519
+ change = original_value - perturbed_value
520
+ prediction_changes.append(abs(change))
521
+
522
+ # Attribution value
523
+ if use_absolute:
524
+ attribution_values.append(abs(attr_array[i]))
525
+ else:
526
+ attribution_values.append(attr_array[i])
527
+
528
+ prediction_changes = np.array(prediction_changes)
529
+ attribution_values = np.array(attribution_values)
530
+
531
+ # Handle edge cases
532
+ if len(prediction_changes) < 2:
533
+ return 0.0
534
+
535
+ # Check for constant arrays (would cause division by zero in correlation)
536
+ if np.std(prediction_changes) < 1e-10 or np.std(attribution_values) < 1e-10:
537
+ # If both are constant, consider it perfect correlation
538
+ if np.std(prediction_changes) < 1e-10 and np.std(attribution_values) < 1e-10:
539
+ return 1.0
540
+ # If only one is constant, correlation is undefined
541
+ return 0.0
542
+
543
+ # Compute Spearman rank correlation
544
+ corr, _ = stats.spearmanr(attribution_values, prediction_changes)
545
+
546
+ return float(corr) if not np.isnan(corr) else 0.0
547
+
548
+
549
+ def compute_batch_monotonicity_nguyen(
550
+ model,
551
+ X: np.ndarray,
552
+ explanations: List[Explanation],
553
+ baseline: Union[str, float, np.ndarray, Callable] = "mean",
554
+ max_samples: int = None,
555
+ use_absolute: bool = True,
556
+ ) -> Dict[str, float]:
557
+ """
558
+ Compute average Monotonicity-Nguyen over a batch of instances.
559
+
560
+ Args:
561
+ model: Model adapter
562
+ X: Input data (2D array)
563
+ explanations: List of Explanation objects (one per instance)
564
+ baseline: Baseline for feature removal
565
+ max_samples: Maximum number of samples to evaluate
566
+ use_absolute: If True, use absolute attribution values
567
+
568
+ Returns:
569
+ Dictionary with mean, std, min, max, and count of valid scores
570
+ """
571
+ n_samples = len(explanations)
572
+ if max_samples:
573
+ n_samples = min(n_samples, max_samples)
574
+
575
+ scores = []
576
+
577
+ for i in range(n_samples):
578
+ try:
579
+ score = compute_monotonicity_nguyen(
580
+ model, X[i], explanations[i],
581
+ baseline=baseline, background_data=X,
582
+ use_absolute=use_absolute
583
+ )
584
+ if not np.isnan(score):
585
+ scores.append(score)
586
+ except Exception:
587
+ continue
588
+
589
+ if not scores:
590
+ return {"mean": 0.0, "std": 0.0, "min": 0.0, "max": 0.0, "n_samples": 0}
591
+
592
+ return {
593
+ "mean": float(np.mean(scores)),
594
+ "std": float(np.std(scores)),
595
+ "min": float(np.min(scores)),
596
+ "max": float(np.max(scores)),
597
+ "n_samples": len(scores),
598
+ }
599
+
600
+
259
601
  # =============================================================================
260
602
  # Metric 2: Monotonicity (Arya et al., 2019)
261
603
  # =============================================================================
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: Unified, extensible explainability framework supporting 18 XAI methods including LIME, SHAP, LRP, TCAV, GradCAM, and more
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
44
44
  | Feature | Description |
45
45
  |---------|-------------|
46
46
  | **18 Explainers** | LIME, KernelSHAP, TreeSHAP, Integrated Gradients, DeepLIFT, DeepSHAP, SmoothGrad, Saliency Maps, GradCAM/GradCAM++, LRP, TCAV, Anchors, Counterfactual, Permutation Importance, PDP, ALE, SAGE, ProtoDash |
47
- | **8 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation) and Stability (RIS, ROS, Lipschitz) |
47
+ | **13 Evaluation Metrics** | Faithfulness (PGI, PGU, Comprehensiveness, Sufficiency, Correlation, Faithfulness Estimate, Monotonicity, Monotonicity-Nguyen, Pixel Flipping) and Stability (RIS, ROS, Lipschitz) |
48
48
  | **Unified API** | Consistent `BaseExplainer` interface with standardized `Explanation` output |
49
49
  | **Plugin Registry** | Filter explainers by scope, model type, data type; automatic recommendations |
50
50
  | **Framework Support** | Adapters for scikit-learn and PyTorch (with gradient computation) |
@@ -96,6 +96,10 @@ Explainiverse includes a comprehensive suite of evaluation metrics based on the
96
96
  | **Comprehensiveness** | Drop when removing top-k features | [DeYoung et al., 2020](https://arxiv.org/abs/1911.03429) |
97
97
  | **Sufficiency** | Prediction using only top-k features | [DeYoung et al., 2020](https://arxiv.org/abs/1911.03429) |
98
98
  | **Faithfulness Correlation** | Correlation between attribution and impact | [Bhatt et al., 2020](https://arxiv.org/abs/2005.00631) |
99
+ | **Faithfulness Estimate** | Correlation of attributions with single-feature perturbation impact | [Alvarez-Melis & Jaakkola, 2018](https://arxiv.org/abs/1806.08049) |
100
+ | **Monotonicity** | Sequential feature addition shows monotonic prediction increase | [Arya et al., 2019](https://arxiv.org/abs/1909.03012) |
101
+ | **Monotonicity-Nguyen** | Spearman correlation between attributions and feature removal impact | [Nguyen & Martinez, 2020](https://arxiv.org/abs/2010.07455) |
102
+ | **Pixel Flipping** | AUC of prediction degradation when removing features by importance | [Bach et al., 2015](https://doi.org/10.1371/journal.pone.0130140) |
99
103
 
100
104
  ### Stability Metrics
101
105
 
@@ -715,6 +719,16 @@ poetry run pytest tests/test_lrp.py::TestLRPConv2d -v
715
719
  - [x] Evaluation: Stability metrics (RIS, ROS, Lipschitz)
716
720
  - [x] PyTorch adapter with gradient support
717
721
 
722
+ ### In Progress 🔄
723
+ - [ ] **Evaluation metrics expansion** - Adding 42 more metrics across 7 categories to exceed Quantus (37 metrics)
724
+ - Phase 1: Faithfulness (+9 metrics) - 4/12 complete
725
+ - Phase 2: Robustness (+7 metrics)
726
+ - Phase 3: Localisation (+8 metrics)
727
+ - Phase 4: Complexity (+4 metrics)
728
+ - Phase 5: Randomisation (+5 metrics)
729
+ - Phase 6: Axiomatic (+4 metrics)
730
+ - Phase 7: Fairness (+4 metrics)
731
+
718
732
  ### Planned 📋
719
733
  - [ ] Attention-based explanations (for Transformers)
720
734
  - [ ] TensorFlow/Keras adapter
@@ -734,7 +748,7 @@ If you use Explainiverse in your research, please cite:
734
748
  author = {Syed, Muntaser},
735
749
  year = {2025},
736
750
  url = {https://github.com/jemsbhai/explainiverse},
737
- version = {0.8.0}
751
+ version = {0.8.4}
738
752
  }
739
753
  ```
740
754
 
@@ -1,4 +1,4 @@
1
- explainiverse/__init__.py,sha256=icvNmaSq0DAERqIrU60N60KCIspHbtEWTi3kt_YXTUI,1694
1
+ explainiverse/__init__.py,sha256=bi_M_46DTXxO2sTGol7RX7LrCajNZSw12CYg7I9WE90,1694
2
2
  explainiverse/adapters/__init__.py,sha256=HcQGISyp-YQ4jEj2IYveX_c9X5otLcTNWRnVRRhzRik,781
3
3
  explainiverse/adapters/base_adapter.py,sha256=Nqt0GeDn_-PjTyJcZsE8dRTulavqFQsv8sMYWS_ps-M,603
4
4
  explainiverse/adapters/pytorch_adapter.py,sha256=DLQKJ7gB0foPwAmcrru7QdZnPRnhqDKpFCT-EaD3420,15612
@@ -9,10 +9,10 @@ explainiverse/core/explanation.py,sha256=498BbRYrNR-BOql78sENOsyWxgqLsBVZXn14lh-
9
9
  explainiverse/core/registry.py,sha256=6HttL27Ty4jYtugRf-EDIKPy80M8BfvUppAKwwGDyQ8,27207
10
10
  explainiverse/engine/__init__.py,sha256=1sZO8nH1mmwK2e-KUavBQm7zYDWUe27nyWoFy9tgsiA,197
11
11
  explainiverse/engine/suite.py,sha256=G-7OjESisSTaQ1FQrlPl4YydX13uz8Bb70hJZNlcl2M,8918
12
- explainiverse/evaluation/__init__.py,sha256=XFVnmwrRtHHhtxI_yOw_nsR67pJvH-IBO_lEUVI-eDE,1957
12
+ explainiverse/evaluation/__init__.py,sha256=HicoR2_xVWQO6z7ckQj05jxa7djA7zpKozAwRyURYmA,2233
13
13
  explainiverse/evaluation/_utils.py,sha256=ej7YOPZ90gVHuuIMj45EXHq9Jx3QG7lhaj5sk26hRpg,10519
14
14
  explainiverse/evaluation/faithfulness.py,sha256=_40afOW6vJ3dQguHlJySlgWqiJF_xIvN-uVA3nPKRvI,14841
15
- explainiverse/evaluation/faithfulness_extended.py,sha256=0zHcmINNA88EJcKOY04Z384S3QhBMo7W2m3lGNkUiNQ,14690
15
+ explainiverse/evaluation/faithfulness_extended.py,sha256=uMcYO6FJmzDFPAr5Y7AGkU7gYbweaPnqEhRoC4URGm0,27264
16
16
  explainiverse/evaluation/metrics.py,sha256=snNK9Ua1VzHDT6DlrhYL4m2MmRF3X15vuuVXiHbeicU,9944
17
17
  explainiverse/evaluation/stability.py,sha256=q2d3rpxpp0X1s6ADST1iZA4tzksLJpR0mYBnA_U5FIs,12090
18
18
  explainiverse/explainers/__init__.py,sha256=-ncRXbFKahH3bR0oXM2UQM4LtTdTlvdeprL6cHeqNBs,2549
@@ -39,7 +39,7 @@ explainiverse/explainers/gradient/smoothgrad.py,sha256=COIKZSFcApmMkA62M0AForHiY
39
39
  explainiverse/explainers/gradient/tcav.py,sha256=zc-8wMsc2ZOhUeSZNBJ6H6BPXlVMJ9DRcAMiL25wU9I,32242
40
40
  explainiverse/explainers/rule_based/__init__.py,sha256=gKzlFCAzwurAMLJcuYgal4XhDj1thteBGcaHWmN7iWk,243
41
41
  explainiverse/explainers/rule_based/anchors_wrapper.py,sha256=ML7W6aam-eMGZHy5ilol8qupZvNBJpYAFatEEPnuMyo,13254
42
- explainiverse-0.8.2.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
43
- explainiverse-0.8.2.dist-info/METADATA,sha256=QSLwIr4RmoHpxqIfoarJX17alA-0esXfdNa1cemWu5s,23770
44
- explainiverse-0.8.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
- explainiverse-0.8.2.dist-info/RECORD,,
42
+ explainiverse-0.8.4.dist-info/LICENSE,sha256=28rbHe8rJgmUlRdxJACfq1Sj-MtCEhyHxkJedQd1ZYA,1070
43
+ explainiverse-0.8.4.dist-info/METADATA,sha256=-NAqFPbZ_fOqstOEIHUP8CQLplzFqzGGdeVAoP3l7Fg,24894
44
+ explainiverse-0.8.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
+ explainiverse-0.8.4.dist-info/RECORD,,