explainiverse 0.1.0a1__tar.gz → 0.1.1a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/PKG-INFO +2 -1
  2. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/pyproject.toml +2 -1
  3. explainiverse-0.1.1a0/src/explainiverse/engine/suite.py +143 -0
  4. explainiverse-0.1.1a0/src/explainiverse/evaluation/metrics.py +233 -0
  5. explainiverse-0.1.1a0/src/explainiverse/explainers/__init__.py +0 -0
  6. explainiverse-0.1.1a0/src/explainiverse/explainers/attribution/__init__.py +0 -0
  7. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/LICENSE +0 -0
  8. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/README.md +0 -0
  9. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/__init__.py +0 -0
  10. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/__init__.py +0 -0
  11. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/base_adapter.py +0 -0
  12. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/adapters/sklearn_adapter.py +0 -0
  13. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/__init__.py +0 -0
  14. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/explainer.py +0 -0
  15. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/core/explanation.py +0 -0
  16. {explainiverse-0.1.0a1/src/explainiverse/explainers → explainiverse-0.1.1a0/src/explainiverse/engine}/__init__.py +0 -0
  17. {explainiverse-0.1.0a1/src/explainiverse/explainers/attribution → explainiverse-0.1.1a0/src/explainiverse/evaluation}/__init__.py +0 -0
  18. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/explainers/attribution/lime_wrapper.py +0 -0
  19. {explainiverse-0.1.0a1 → explainiverse-0.1.1a0}/src/explainiverse/explainers/attribution/shap_wrapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: explainiverse
3
- Version: 0.1.0a1
3
+ Version: 0.1.1a0
4
4
  Summary: Unified, extensible explainability framework supporting LIME, SHAP, and custom adapters
5
5
  Home-page: https://github.com/jemsbhai/explainiverse
6
6
  License: MIT
@@ -16,6 +16,7 @@ Requires-Dist: lime (>=0.2.0.1,<0.3.0.0)
16
16
  Requires-Dist: numpy (==1.24.4)
17
17
  Requires-Dist: scikit-learn (>=1.1,<1.4)
18
18
  Requires-Dist: shap (>=0.48.0,<0.49.0)
19
+ Requires-Dist: xgboost (>=3.0.2,<4.0.0)
19
20
  Project-URL: Repository, https://github.com/jemsbhai/explainiverse
20
21
  Description-Content-Type: text/markdown
21
22
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "explainiverse"
3
- version = "0.1.0a1"
3
+ version = "0.1.1a0"
4
4
  description = "Unified, extensible explainability framework supporting LIME, SHAP, and custom adapters"
5
5
  authors = ["Muntaser Syed <jemsbhai@gmail.com>"]
6
6
  license = "MIT"
@@ -15,6 +15,7 @@ numpy = "1.24.4"
15
15
  lime = "^0.2.0.1"
16
16
  scikit-learn = ">=1.1,<1.4"
17
17
  shap = "^0.48.0"
18
+ xgboost = "^3.0.2"
18
19
 
19
20
 
20
21
  [build-system]
@@ -0,0 +1,143 @@
1
+ # src/explainiverse/engine/suite.py
2
+
3
+ from explainiverse.core.explanation import Explanation
4
+ from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
5
+ from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
6
+ from explainiverse.evaluation.metrics import compute_roar
7
+ from sklearn.metrics import accuracy_score
8
+ from sklearn.linear_model import LogisticRegression
9
+
10
+ class ExplanationSuite:
11
+ """
12
+ Runs multiple explainers on a single instance and compares their outputs.
13
+ """
14
+
15
+ def __init__(self, model, explainer_configs, data_meta=None):
16
+ """
17
+ Args:
18
+ model: a model adapter (e.g., SklearnAdapter)
19
+ explainer_configs: list of (name, kwargs) tuples for explainers
20
+ data_meta: optional metadata about the task, scope, or preference
21
+ """
22
+ self.model = model
23
+ self.configs = explainer_configs
24
+ self.data_meta = data_meta or {}
25
+ self.explanations = {}
26
+
27
+ def run(self, instance):
28
+ """
29
+ Run all configured explainers on a single instance.
30
+ """
31
+ for name, params in self.configs:
32
+ explainer = self._load_explainer(name, **params)
33
+ explanation = explainer.explain(instance)
34
+ self.explanations[name] = explanation
35
+ return self.explanations
36
+
37
+ def compare(self):
38
+ """
39
+ Print attribution scores side-by-side.
40
+ """
41
+ keys = set()
42
+ for explanation in self.explanations.values():
43
+ keys.update(explanation.explanation_data.get("feature_attributions", {}).keys())
44
+
45
+ print("\nSide-by-Side Comparison:")
46
+ for key in sorted(keys):
47
+ row = [f"{key}"]
48
+ for name in self.explanations:
49
+ value = self.explanations[name].explanation_data.get("feature_attributions", {}).get(key, "—")
50
+ row.append(f"{name}: {value:.4f}" if isinstance(value, float) else f"{name}: {value}")
51
+ print(" | ".join(row))
52
+
53
+ def suggest_best(self):
54
+ """
55
+ Suggest the best explainer based on model type, output structure, and task metadata.
56
+ """
57
+ if "task" in self.data_meta:
58
+ task = self.data_meta["task"]
59
+ else:
60
+ task = "unknown"
61
+
62
+ model = self.model.model
63
+
64
+ # 1. Regression: SHAP preferred due to consistent output
65
+ if task == "regression":
66
+ return "shap"
67
+
68
+ # 2. Model with `predict_proba` → SHAP handles probabilistic outputs well
69
+ if hasattr(model, "predict_proba"):
70
+ try:
71
+ output = self.model.predict([[0] * model.n_features_in_])
72
+ if output.shape[1] > 2:
73
+ return "shap" # Multi-class, SHAP more stable
74
+ else:
75
+ return "lime" # Binary, both are okay
76
+ except Exception:
77
+ return "shap"
78
+
79
+ # 3. Tree-based models → prefer SHAP (TreeSHAP if available)
80
+ if "tree" in str(type(model)).lower():
81
+ return "shap"
82
+
83
+ # 4. Default fallback
84
+ return "lime"
85
+
86
+ def _load_explainer(self, name, **kwargs):
87
+ if name == "lime":
88
+ return LimeExplainer(model=self.model, **kwargs)
89
+ elif name == "shap":
90
+ return ShapExplainer(model=self.model, **kwargs)
91
+ else:
92
+ raise ValueError(f"Unknown explainer: {name}")
93
+
94
+
95
+
96
+ def evaluate_roar(
97
+ self,
98
+ X_train,
99
+ y_train,
100
+ X_test,
101
+ y_test,
102
+ top_k: int = 2,
103
+ model_class=None,
104
+ model_kwargs: dict = None
105
+ ):
106
+ """
107
+ Evaluate each explainer using ROAR (Remove And Retrain).
108
+
109
+ Args:
110
+ X_train, y_train: training data
111
+ X_test, y_test: test data
112
+ top_k: number of features to mask
113
+ model_class: model constructor with .fit() and .predict() (default: same as current model)
114
+ model_kwargs: optional keyword args for new model instance
115
+
116
+ Returns:
117
+ Dict of {explainer_name: accuracy drop (baseline - retrained)}
118
+ """
119
+ from explainiverse.evaluation.metrics import compute_roar
120
+
121
+ model_kwargs = model_kwargs or {}
122
+
123
+ # Default to type(self.model.model) if not provided
124
+ if model_class is None:
125
+ model_class = type(self.model.model)
126
+
127
+ roar_scores = {}
128
+
129
+ for name, explanation in self.explanations.items():
130
+ print(f"[ROAR] Evaluating explainer: {name}")
131
+ roar = compute_roar(
132
+ model_class=model_class,
133
+ X_train=X_train,
134
+ y_train=y_train,
135
+ X_test=X_test,
136
+ y_test=y_test,
137
+ explanations=[explanation], # single-instance for now
138
+ top_k=top_k,
139
+ model_kwargs=model_kwargs
140
+ )
141
+ roar_scores[name] = roar
142
+
143
+ return roar_scores
@@ -0,0 +1,233 @@
1
+ import numpy as np
2
+ from explainiverse.core.explanation import Explanation
3
+ from sklearn.metrics import accuracy_score
4
+ import copy
5
+
6
+
7
+ def compute_aopc(
8
+ model,
9
+ instance: np.ndarray,
10
+ explanation: Explanation,
11
+ num_steps: int = 10,
12
+ baseline_value: float = 0.0
13
+ ) -> float:
14
+ """
15
+ Computes Area Over the Perturbation Curve (AOPC) by iteratively removing top features.
16
+
17
+ Args:
18
+ model: wrapped model with .predict() method
19
+ instance: input sample (1D array)
20
+ explanation: Explanation object
21
+ num_steps: number of top features to remove
22
+ baseline_value: value to replace removed features with (e.g., 0, mean)
23
+
24
+ Returns:
25
+ AOPC score (higher means explanation is more faithful)
26
+ """
27
+ base_pred = model.predict(instance.reshape(1, -1))[0]
28
+ attributions = explanation.explanation_data.get("feature_attributions", {})
29
+
30
+ if not attributions:
31
+ raise ValueError("No feature attributions found in explanation.")
32
+
33
+ # Sort features by abs importance
34
+ sorted_features = sorted(
35
+ attributions.items(),
36
+ key=lambda x: abs(x[1]),
37
+ reverse=True
38
+ )
39
+
40
+ # Try to map feature names to indices
41
+ feature_indices = []
42
+ for i, (fname, _) in enumerate(sorted_features):
43
+ try:
44
+ idx = explanation.feature_names.index(fname)
45
+ except Exception:
46
+ idx = i # fallback: assume order
47
+ feature_indices.append(idx)
48
+
49
+ deltas = []
50
+ modified = instance.copy()
51
+
52
+ for i in range(min(num_steps, len(feature_indices))):
53
+ idx = feature_indices[i]
54
+ modified[idx] = baseline_value
55
+ new_pred = model.predict(modified.reshape(1, -1))[0]
56
+ delta = abs(base_pred - new_pred)
57
+ deltas.append(delta)
58
+
59
+ return np.mean(deltas)
60
+
61
+
62
+ def compute_batch_aopc(
63
+ model,
64
+ X: np.ndarray,
65
+ explanations: dict,
66
+ num_steps: int = 10,
67
+ baseline_value: float = 0.0
68
+ ) -> dict:
69
+ """
70
+ Compute average AOPC for multiple explainers over a batch of instances.
71
+
72
+ Args:
73
+ model: wrapped model
74
+ X: 2D input array
75
+ explanations: dict of {explainer_name: list of Explanation objects}
76
+ num_steps: number of top features to remove
77
+ baseline_value: value to replace features with
78
+
79
+ Returns:
80
+ Dict of {explainer_name: mean AOPC score}
81
+ """
82
+ results = {}
83
+
84
+ for explainer_name, expl_list in explanations.items():
85
+ scores = []
86
+ for i, exp in enumerate(expl_list):
87
+ instance = X[i]
88
+ score = compute_aopc(model, instance, exp, num_steps, baseline_value)
89
+ scores.append(score)
90
+ results[explainer_name] = np.mean(scores)
91
+
92
+ return results
93
+
94
+
95
+ def compute_roar(
96
+ model_class,
97
+ X_train: np.ndarray,
98
+ y_train: np.ndarray,
99
+ X_test: np.ndarray,
100
+ y_test: np.ndarray,
101
+ explanations: list,
102
+ top_k: int = 3,
103
+ baseline_value: float = 0.0,
104
+ model_kwargs: dict = None
105
+ ) -> float:
106
+ """
107
+ Compute ROAR (Remove And Retrain) using top-k important features from explanations.
108
+
109
+ Args:
110
+ model_class: uninstantiated model class (e.g. LogisticRegression)
111
+ X_train: full training data
112
+ y_train: training labels
113
+ X_test: test features
114
+ y_test: test labels
115
+ explanations: list of Explanation objects (one per train instance)
116
+ top_k: number of top features to remove
117
+ baseline_value: what to set removed features to
118
+ model_kwargs: optional kwargs to pass to model_class
119
+
120
+ Returns:
121
+ Accuracy drop (baseline_acc - retrained_acc)
122
+ """
123
+ model_kwargs = model_kwargs or {}
124
+
125
+ # Baseline model
126
+ baseline_model = model_class(**model_kwargs)
127
+ baseline_model.fit(X_train, y_train)
128
+ baseline_preds = baseline_model.predict(X_test)
129
+ baseline_acc = accuracy_score(y_test, baseline_preds)
130
+
131
+ # Compute top-k feature indices from attributions (use mode)
132
+ feature_counts = {}
133
+ for exp in explanations:
134
+ for fname, val in sorted(exp.explanation_data["feature_attributions"].items(), key=lambda x: abs(x[1]), reverse=True)[:top_k]:
135
+ try:
136
+ idx = exp.feature_names.index(fname)
137
+ feature_counts[idx] = feature_counts.get(idx, 0) + 1
138
+ except:
139
+ continue
140
+
141
+ top_features = sorted(feature_counts.items(), key=lambda x: x[1], reverse=True)[:top_k]
142
+ top_feature_indices = [idx for idx, _ in top_features]
143
+
144
+ # Remove top-k from training and test data
145
+ X_train_mod = copy.deepcopy(X_train)
146
+ X_test_mod = copy.deepcopy(X_test)
147
+
148
+ # Prepare feature-wise baselines
149
+ # Compute or assign feature-wise baseline values
150
+ if not isinstance(
151
+ baseline_value,
152
+ (str, float, int, np.number, np.ndarray)
153
+ ) and not callable(baseline_value):
154
+ raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
155
+
156
+ if isinstance(baseline_value, str):
157
+ if baseline_value == "mean":
158
+ feature_baseline = np.mean(X_train, axis=0)
159
+ elif baseline_value == "median":
160
+ feature_baseline = np.median(X_train, axis=0)
161
+ else:
162
+ raise ValueError(f"Unsupported string baseline: {baseline_value}")
163
+ elif callable(baseline_value):
164
+ feature_baseline = baseline_value(X_train)
165
+ elif isinstance(baseline_value, np.ndarray):
166
+ if baseline_value.shape != (X_train.shape[1],):
167
+ raise ValueError("baseline_value ndarray must match number of features")
168
+ feature_baseline = baseline_value
169
+ elif isinstance(baseline_value, (float, int, np.number)):
170
+ feature_baseline = np.full(X_train.shape[1], baseline_value)
171
+ else:
172
+ raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
173
+
174
+ for idx in top_feature_indices:
175
+ X_train_mod[:, idx] = feature_baseline[idx]
176
+ X_test_mod[:, idx] = feature_baseline[idx]
177
+ # X_train_mod[:, idx] = baseline_value
178
+ # X_test_mod[:, idx] = baseline_value
179
+
180
+ # Retrain and evaluate
181
+ retrained_model = model_class(**model_kwargs)
182
+ retrained_model.fit(X_train_mod, y_train)
183
+ retrained_preds = retrained_model.predict(X_test_mod)
184
+ retrained_acc = accuracy_score(y_test, retrained_preds)
185
+
186
+ return baseline_acc - retrained_acc
187
+
188
+
189
+ def compute_roar_curve(
190
+ model_class,
191
+ X_train,
192
+ y_train,
193
+ X_test,
194
+ y_test,
195
+ explanations,
196
+ max_k=5,
197
+ baseline_value="mean",
198
+ model_kwargs=None
199
+ ) -> dict:
200
+ """
201
+ Compute ROAR accuracy drops across a range of top-k features removed.
202
+
203
+ Args:
204
+ model_class: model type (e.g. LogisticRegression)
205
+ X_train, y_train, X_test, y_test: full dataset
206
+ explanations: list of Explanation objects
207
+ max_k: maximum top-k to try
208
+ baseline_value: string, scalar, ndarray, or callable
209
+ model_kwargs: passed to model class
210
+
211
+ Returns:
212
+ Dict of {k: accuracy drop} for k in 1..max_k
213
+ """
214
+ from copy import deepcopy
215
+
216
+ model_kwargs = model_kwargs or {}
217
+ curve = {}
218
+
219
+ for k in range(1, max_k + 1):
220
+ acc_drop = compute_roar(
221
+ model_class=model_class,
222
+ X_train=deepcopy(X_train),
223
+ y_train=deepcopy(y_train),
224
+ X_test=deepcopy(X_test),
225
+ y_test=deepcopy(y_test),
226
+ explanations=deepcopy(explanations),
227
+ top_k=k,
228
+ baseline_value=baseline_value,
229
+ model_kwargs=deepcopy(model_kwargs)
230
+ )
231
+ curve[k] = acc_drop
232
+
233
+ return curve
File without changes