explainiverse 0.1.0a1__tar.gz → 0.1.1a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/PKG-INFO +2 -1
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/pyproject.toml +2 -1
- explainiverse-0.1.1a0/src/explainiverse/engine/suite.py +143 -0
- explainiverse-0.1.1a0/src/explainiverse/evaluation/metrics.py +233 -0
- explainiverse-0.1.1a0/src/explainiverse/explainers/__init__.py +0 -0
- explainiverse-0.1.1a0/src/explainiverse/explainers/attribution/__init__.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/LICENSE +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/README.md +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/__init__.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/__init__.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/base_adapter.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/sklearn_adapter.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/__init__.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/explainer.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/explanation.py +0 -0
- {explainiverse-0.1.0a1/src/explainiverse/explainers → explainiverse-0.1.1a0/src/explainiverse/engine}/__init__.py +0 -0
- {explainiverse-0.1.0a1/src/explainiverse/explainers/attribution → explainiverse-0.1.1a0/src/explainiverse/evaluation}/__init__.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/explainers/attribution/lime_wrapper.py +0 -0
- {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/explainers/attribution/shap_wrapper.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: explainiverse
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1a0
|
|
4
4
|
Summary: Unified, extensible explainability framework supporting LIME, SHAP, and custom adapters
|
|
5
5
|
Home-page: https://github.com/jemsbhai/explainiverse
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,7 @@ Requires-Dist: lime (>=0.2.0.1,<0.3.0.0)
|
|
|
16
16
|
Requires-Dist: numpy (==1.24.4)
|
|
17
17
|
Requires-Dist: scikit-learn (>=1.1,<1.4)
|
|
18
18
|
Requires-Dist: shap (>=0.48.0,<0.49.0)
|
|
19
|
+
Requires-Dist: xgboost (>=3.0.2,<4.0.0)
|
|
19
20
|
Project-URL: Repository, https://github.com/jemsbhai/explainiverse
|
|
20
21
|
Description-Content-Type: text/markdown
|
|
21
22
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "explainiverse"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.1a0"
|
|
4
4
|
description = "Unified, extensible explainability framework supporting LIME, SHAP, and custom adapters"
|
|
5
5
|
authors = ["Muntaser Syed <jemsbhai@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -15,6 +15,7 @@ numpy = "1.24.4"
|
|
|
15
15
|
lime = "^0.2.0.1"
|
|
16
16
|
scikit-learn = ">=1.1,<1.4"
|
|
17
17
|
shap = "^0.48.0"
|
|
18
|
+
xgboost = "^3.0.2"
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
[build-system]
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# src/explainiverse/engine/suite.py
|
|
2
|
+
|
|
3
|
+
from explainiverse.core.explanation import Explanation
|
|
4
|
+
from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
|
|
5
|
+
from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
|
|
6
|
+
from explainiverse.evaluation.metrics import compute_roar
|
|
7
|
+
from sklearn.metrics import accuracy_score
|
|
8
|
+
from sklearn.linear_model import LogisticRegression
|
|
9
|
+
|
|
10
|
+
class ExplanationSuite:
|
|
11
|
+
"""
|
|
12
|
+
Runs multiple explainers on a single instance and compares their outputs.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, model, explainer_configs, data_meta=None):
|
|
16
|
+
"""
|
|
17
|
+
Args:
|
|
18
|
+
model: a model adapter (e.g., SklearnAdapter)
|
|
19
|
+
explainer_configs: list of (name, kwargs) tuples for explainers
|
|
20
|
+
data_meta: optional metadata about the task, scope, or preference
|
|
21
|
+
"""
|
|
22
|
+
self.model = model
|
|
23
|
+
self.configs = explainer_configs
|
|
24
|
+
self.data_meta = data_meta or {}
|
|
25
|
+
self.explanations = {}
|
|
26
|
+
|
|
27
|
+
def run(self, instance):
|
|
28
|
+
"""
|
|
29
|
+
Run all configured explainers on a single instance.
|
|
30
|
+
"""
|
|
31
|
+
for name, params in self.configs:
|
|
32
|
+
explainer = self._load_explainer(name, **params)
|
|
33
|
+
explanation = explainer.explain(instance)
|
|
34
|
+
self.explanations[name] = explanation
|
|
35
|
+
return self.explanations
|
|
36
|
+
|
|
37
|
+
def compare(self):
|
|
38
|
+
"""
|
|
39
|
+
Print attribution scores side-by-side.
|
|
40
|
+
"""
|
|
41
|
+
keys = set()
|
|
42
|
+
for explanation in self.explanations.values():
|
|
43
|
+
keys.update(explanation.explanation_data.get("feature_attributions", {}).keys())
|
|
44
|
+
|
|
45
|
+
print("\nSide-by-Side Comparison:")
|
|
46
|
+
for key in sorted(keys):
|
|
47
|
+
row = [f"{key}"]
|
|
48
|
+
for name in self.explanations:
|
|
49
|
+
value = self.explanations[name].explanation_data.get("feature_attributions", {}).get(key, "—")
|
|
50
|
+
row.append(f"{name}: {value:.4f}" if isinstance(value, float) else f"{name}: {value}")
|
|
51
|
+
print(" | ".join(row))
|
|
52
|
+
|
|
53
|
+
def suggest_best(self):
|
|
54
|
+
"""
|
|
55
|
+
Suggest the best explainer based on model type, output structure, and task metadata.
|
|
56
|
+
"""
|
|
57
|
+
if "task" in self.data_meta:
|
|
58
|
+
task = self.data_meta["task"]
|
|
59
|
+
else:
|
|
60
|
+
task = "unknown"
|
|
61
|
+
|
|
62
|
+
model = self.model.model
|
|
63
|
+
|
|
64
|
+
# 1. Regression: SHAP preferred due to consistent output
|
|
65
|
+
if task == "regression":
|
|
66
|
+
return "shap"
|
|
67
|
+
|
|
68
|
+
# 2. Model with `predict_proba` → SHAP handles probabilistic outputs well
|
|
69
|
+
if hasattr(model, "predict_proba"):
|
|
70
|
+
try:
|
|
71
|
+
output = self.model.predict([[0] * model.n_features_in_])
|
|
72
|
+
if output.shape[1] > 2:
|
|
73
|
+
return "shap" # Multi-class, SHAP more stable
|
|
74
|
+
else:
|
|
75
|
+
return "lime" # Binary, both are okay
|
|
76
|
+
except Exception:
|
|
77
|
+
return "shap"
|
|
78
|
+
|
|
79
|
+
# 3. Tree-based models → prefer SHAP (TreeSHAP if available)
|
|
80
|
+
if "tree" in str(type(model)).lower():
|
|
81
|
+
return "shap"
|
|
82
|
+
|
|
83
|
+
# 4. Default fallback
|
|
84
|
+
return "lime"
|
|
85
|
+
|
|
86
|
+
def _load_explainer(self, name, **kwargs):
|
|
87
|
+
if name == "lime":
|
|
88
|
+
return LimeExplainer(model=self.model, **kwargs)
|
|
89
|
+
elif name == "shap":
|
|
90
|
+
return ShapExplainer(model=self.model, **kwargs)
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError(f"Unknown explainer: {name}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def evaluate_roar(
|
|
97
|
+
self,
|
|
98
|
+
X_train,
|
|
99
|
+
y_train,
|
|
100
|
+
X_test,
|
|
101
|
+
y_test,
|
|
102
|
+
top_k: int = 2,
|
|
103
|
+
model_class=None,
|
|
104
|
+
model_kwargs: dict = None
|
|
105
|
+
):
|
|
106
|
+
"""
|
|
107
|
+
Evaluate each explainer using ROAR (Remove And Retrain).
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
X_train, y_train: training data
|
|
111
|
+
X_test, y_test: test data
|
|
112
|
+
top_k: number of features to mask
|
|
113
|
+
model_class: model constructor with .fit() and .predict() (default: same as current model)
|
|
114
|
+
model_kwargs: optional keyword args for new model instance
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Dict of {explainer_name: accuracy drop (baseline - retrained)}
|
|
118
|
+
"""
|
|
119
|
+
from explainiverse.evaluation.metrics import compute_roar
|
|
120
|
+
|
|
121
|
+
model_kwargs = model_kwargs or {}
|
|
122
|
+
|
|
123
|
+
# Default to type(self.model.model) if not provided
|
|
124
|
+
if model_class is None:
|
|
125
|
+
model_class = type(self.model.model)
|
|
126
|
+
|
|
127
|
+
roar_scores = {}
|
|
128
|
+
|
|
129
|
+
for name, explanation in self.explanations.items():
|
|
130
|
+
print(f"[ROAR] Evaluating explainer: {name}")
|
|
131
|
+
roar = compute_roar(
|
|
132
|
+
model_class=model_class,
|
|
133
|
+
X_train=X_train,
|
|
134
|
+
y_train=y_train,
|
|
135
|
+
X_test=X_test,
|
|
136
|
+
y_test=y_test,
|
|
137
|
+
explanations=[explanation], # single-instance for now
|
|
138
|
+
top_k=top_k,
|
|
139
|
+
model_kwargs=model_kwargs
|
|
140
|
+
)
|
|
141
|
+
roar_scores[name] = roar
|
|
142
|
+
|
|
143
|
+
return roar_scores
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from explainiverse.core.explanation import Explanation
|
|
3
|
+
from sklearn.metrics import accuracy_score
|
|
4
|
+
import copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compute_aopc(
|
|
8
|
+
model,
|
|
9
|
+
instance: np.ndarray,
|
|
10
|
+
explanation: Explanation,
|
|
11
|
+
num_steps: int = 10,
|
|
12
|
+
baseline_value: float = 0.0
|
|
13
|
+
) -> float:
|
|
14
|
+
"""
|
|
15
|
+
Computes Area Over the Perturbation Curve (AOPC) by iteratively removing top features.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
model: wrapped model with .predict() method
|
|
19
|
+
instance: input sample (1D array)
|
|
20
|
+
explanation: Explanation object
|
|
21
|
+
num_steps: number of top features to remove
|
|
22
|
+
baseline_value: value to replace removed features with (e.g., 0, mean)
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
AOPC score (higher means explanation is more faithful)
|
|
26
|
+
"""
|
|
27
|
+
base_pred = model.predict(instance.reshape(1, -1))[0]
|
|
28
|
+
attributions = explanation.explanation_data.get("feature_attributions", {})
|
|
29
|
+
|
|
30
|
+
if not attributions:
|
|
31
|
+
raise ValueError("No feature attributions found in explanation.")
|
|
32
|
+
|
|
33
|
+
# Sort features by abs importance
|
|
34
|
+
sorted_features = sorted(
|
|
35
|
+
attributions.items(),
|
|
36
|
+
key=lambda x: abs(x[1]),
|
|
37
|
+
reverse=True
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Try to map feature names to indices
|
|
41
|
+
feature_indices = []
|
|
42
|
+
for i, (fname, _) in enumerate(sorted_features):
|
|
43
|
+
try:
|
|
44
|
+
idx = explanation.feature_names.index(fname)
|
|
45
|
+
except Exception:
|
|
46
|
+
idx = i # fallback: assume order
|
|
47
|
+
feature_indices.append(idx)
|
|
48
|
+
|
|
49
|
+
deltas = []
|
|
50
|
+
modified = instance.copy()
|
|
51
|
+
|
|
52
|
+
for i in range(min(num_steps, len(feature_indices))):
|
|
53
|
+
idx = feature_indices[i]
|
|
54
|
+
modified[idx] = baseline_value
|
|
55
|
+
new_pred = model.predict(modified.reshape(1, -1))[0]
|
|
56
|
+
delta = abs(base_pred - new_pred)
|
|
57
|
+
deltas.append(delta)
|
|
58
|
+
|
|
59
|
+
return np.mean(deltas)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def compute_batch_aopc(
|
|
63
|
+
model,
|
|
64
|
+
X: np.ndarray,
|
|
65
|
+
explanations: dict,
|
|
66
|
+
num_steps: int = 10,
|
|
67
|
+
baseline_value: float = 0.0
|
|
68
|
+
) -> dict:
|
|
69
|
+
"""
|
|
70
|
+
Compute average AOPC for multiple explainers over a batch of instances.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
model: wrapped model
|
|
74
|
+
X: 2D input array
|
|
75
|
+
explanations: dict of {explainer_name: list of Explanation objects}
|
|
76
|
+
num_steps: number of top features to remove
|
|
77
|
+
baseline_value: value to replace features with
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dict of {explainer_name: mean AOPC score}
|
|
81
|
+
"""
|
|
82
|
+
results = {}
|
|
83
|
+
|
|
84
|
+
for explainer_name, expl_list in explanations.items():
|
|
85
|
+
scores = []
|
|
86
|
+
for i, exp in enumerate(expl_list):
|
|
87
|
+
instance = X[i]
|
|
88
|
+
score = compute_aopc(model, instance, exp, num_steps, baseline_value)
|
|
89
|
+
scores.append(score)
|
|
90
|
+
results[explainer_name] = np.mean(scores)
|
|
91
|
+
|
|
92
|
+
return results
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def compute_roar(
|
|
96
|
+
model_class,
|
|
97
|
+
X_train: np.ndarray,
|
|
98
|
+
y_train: np.ndarray,
|
|
99
|
+
X_test: np.ndarray,
|
|
100
|
+
y_test: np.ndarray,
|
|
101
|
+
explanations: list,
|
|
102
|
+
top_k: int = 3,
|
|
103
|
+
baseline_value: float = 0.0,
|
|
104
|
+
model_kwargs: dict = None
|
|
105
|
+
) -> float:
|
|
106
|
+
"""
|
|
107
|
+
Compute ROAR (Remove And Retrain) using top-k important features from explanations.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
model_class: uninstantiated model class (e.g. LogisticRegression)
|
|
111
|
+
X_train: full training data
|
|
112
|
+
y_train: training labels
|
|
113
|
+
X_test: test features
|
|
114
|
+
y_test: test labels
|
|
115
|
+
explanations: list of Explanation objects (one per train instance)
|
|
116
|
+
top_k: number of top features to remove
|
|
117
|
+
baseline_value: what to set removed features to
|
|
118
|
+
model_kwargs: optional kwargs to pass to model_class
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Accuracy drop (baseline_acc - retrained_acc)
|
|
122
|
+
"""
|
|
123
|
+
model_kwargs = model_kwargs or {}
|
|
124
|
+
|
|
125
|
+
# Baseline model
|
|
126
|
+
baseline_model = model_class(**model_kwargs)
|
|
127
|
+
baseline_model.fit(X_train, y_train)
|
|
128
|
+
baseline_preds = baseline_model.predict(X_test)
|
|
129
|
+
baseline_acc = accuracy_score(y_test, baseline_preds)
|
|
130
|
+
|
|
131
|
+
# Compute top-k feature indices from attributions (use mode)
|
|
132
|
+
feature_counts = {}
|
|
133
|
+
for exp in explanations:
|
|
134
|
+
for fname, val in sorted(exp.explanation_data["feature_attributions"].items(), key=lambda x: abs(x[1]), reverse=True)[:top_k]:
|
|
135
|
+
try:
|
|
136
|
+
idx = exp.feature_names.index(fname)
|
|
137
|
+
feature_counts[idx] = feature_counts.get(idx, 0) + 1
|
|
138
|
+
except:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
top_features = sorted(feature_counts.items(), key=lambda x: x[1], reverse=True)[:top_k]
|
|
142
|
+
top_feature_indices = [idx for idx, _ in top_features]
|
|
143
|
+
|
|
144
|
+
# Remove top-k from training and test data
|
|
145
|
+
X_train_mod = copy.deepcopy(X_train)
|
|
146
|
+
X_test_mod = copy.deepcopy(X_test)
|
|
147
|
+
|
|
148
|
+
# Prepare feature-wise baselines
|
|
149
|
+
# Compute or assign feature-wise baseline values
|
|
150
|
+
if not isinstance(
|
|
151
|
+
baseline_value,
|
|
152
|
+
(str, float, int, np.number, np.ndarray)
|
|
153
|
+
) and not callable(baseline_value):
|
|
154
|
+
raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
|
|
155
|
+
|
|
156
|
+
if isinstance(baseline_value, str):
|
|
157
|
+
if baseline_value == "mean":
|
|
158
|
+
feature_baseline = np.mean(X_train, axis=0)
|
|
159
|
+
elif baseline_value == "median":
|
|
160
|
+
feature_baseline = np.median(X_train, axis=0)
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError(f"Unsupported string baseline: {baseline_value}")
|
|
163
|
+
elif callable(baseline_value):
|
|
164
|
+
feature_baseline = baseline_value(X_train)
|
|
165
|
+
elif isinstance(baseline_value, np.ndarray):
|
|
166
|
+
if baseline_value.shape != (X_train.shape[1],):
|
|
167
|
+
raise ValueError("baseline_value ndarray must match number of features")
|
|
168
|
+
feature_baseline = baseline_value
|
|
169
|
+
elif isinstance(baseline_value, (float, int, np.number)):
|
|
170
|
+
feature_baseline = np.full(X_train.shape[1], baseline_value)
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
|
|
173
|
+
|
|
174
|
+
for idx in top_feature_indices:
|
|
175
|
+
X_train_mod[:, idx] = feature_baseline[idx]
|
|
176
|
+
X_test_mod[:, idx] = feature_baseline[idx]
|
|
177
|
+
# X_train_mod[:, idx] = baseline_value
|
|
178
|
+
# X_test_mod[:, idx] = baseline_value
|
|
179
|
+
|
|
180
|
+
# Retrain and evaluate
|
|
181
|
+
retrained_model = model_class(**model_kwargs)
|
|
182
|
+
retrained_model.fit(X_train_mod, y_train)
|
|
183
|
+
retrained_preds = retrained_model.predict(X_test_mod)
|
|
184
|
+
retrained_acc = accuracy_score(y_test, retrained_preds)
|
|
185
|
+
|
|
186
|
+
return baseline_acc - retrained_acc
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def compute_roar_curve(
|
|
190
|
+
model_class,
|
|
191
|
+
X_train,
|
|
192
|
+
y_train,
|
|
193
|
+
X_test,
|
|
194
|
+
y_test,
|
|
195
|
+
explanations,
|
|
196
|
+
max_k=5,
|
|
197
|
+
baseline_value="mean",
|
|
198
|
+
model_kwargs=None
|
|
199
|
+
) -> dict:
|
|
200
|
+
"""
|
|
201
|
+
Compute ROAR accuracy drops across a range of top-k features removed.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
model_class: model type (e.g. LogisticRegression)
|
|
205
|
+
X_train, y_train, X_test, y_test: full dataset
|
|
206
|
+
explanations: list of Explanation objects
|
|
207
|
+
max_k: maximum top-k to try
|
|
208
|
+
baseline_value: string, scalar, ndarray, or callable
|
|
209
|
+
model_kwargs: passed to model class
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Dict of {k: accuracy drop} for k in 1..max_k
|
|
213
|
+
"""
|
|
214
|
+
from copy import deepcopy
|
|
215
|
+
|
|
216
|
+
model_kwargs = model_kwargs or {}
|
|
217
|
+
curve = {}
|
|
218
|
+
|
|
219
|
+
for k in range(1, max_k + 1):
|
|
220
|
+
acc_drop = compute_roar(
|
|
221
|
+
model_class=model_class,
|
|
222
|
+
X_train=deepcopy(X_train),
|
|
223
|
+
y_train=deepcopy(y_train),
|
|
224
|
+
X_test=deepcopy(X_test),
|
|
225
|
+
y_test=deepcopy(y_test),
|
|
226
|
+
explanations=deepcopy(explanations),
|
|
227
|
+
top_k=k,
|
|
228
|
+
baseline_value=baseline_value,
|
|
229
|
+
model_kwargs=deepcopy(model_kwargs)
|
|
230
|
+
)
|
|
231
|
+
curve[k] = acc_drop
|
|
232
|
+
|
|
233
|
+
return curve
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/sklearn_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|