explainiverse 0.1.1a1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. explainiverse/__init__.py +45 -1
  2. explainiverse/adapters/__init__.py +9 -0
  3. explainiverse/adapters/base_adapter.py +25 -25
  4. explainiverse/adapters/sklearn_adapter.py +32 -32
  5. explainiverse/core/__init__.py +22 -0
  6. explainiverse/core/explainer.py +31 -31
  7. explainiverse/core/explanation.py +24 -24
  8. explainiverse/core/registry.py +545 -0
  9. explainiverse/engine/__init__.py +8 -0
  10. explainiverse/engine/suite.py +142 -142
  11. explainiverse/evaluation/__init__.py +8 -0
  12. explainiverse/evaluation/metrics.py +232 -232
  13. explainiverse/explainers/__init__.py +38 -0
  14. explainiverse/explainers/attribution/__init__.py +9 -0
  15. explainiverse/explainers/attribution/lime_wrapper.py +90 -63
  16. explainiverse/explainers/attribution/shap_wrapper.py +89 -66
  17. explainiverse/explainers/counterfactual/__init__.py +8 -0
  18. explainiverse/explainers/counterfactual/dice_wrapper.py +302 -0
  19. explainiverse/explainers/global_explainers/__init__.py +23 -0
  20. explainiverse/explainers/global_explainers/ale.py +191 -0
  21. explainiverse/explainers/global_explainers/partial_dependence.py +192 -0
  22. explainiverse/explainers/global_explainers/permutation_importance.py +123 -0
  23. explainiverse/explainers/global_explainers/sage.py +164 -0
  24. explainiverse/explainers/rule_based/__init__.py +8 -0
  25. explainiverse/explainers/rule_based/anchors_wrapper.py +350 -0
  26. explainiverse-0.2.0.dist-info/METADATA +264 -0
  27. explainiverse-0.2.0.dist-info/RECORD +29 -0
  28. explainiverse-0.1.1a1.dist-info/METADATA +0 -128
  29. explainiverse-0.1.1a1.dist-info/RECORD +0 -19
  30. {explainiverse-0.1.1a1.dist-info → explainiverse-0.2.0.dist-info}/LICENSE +0 -0
  31. {explainiverse-0.1.1a1.dist-info → explainiverse-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,233 +1,233 @@
1
- import numpy as np
2
- from explainiverse.core.explanation import Explanation
3
- from sklearn.metrics import accuracy_score
4
- import copy
5
-
6
-
7
- def compute_aopc(
8
- model,
9
- instance: np.ndarray,
10
- explanation: Explanation,
11
- num_steps: int = 10,
12
- baseline_value: float = 0.0
13
- ) -> float:
14
- """
15
- Computes Area Over the Perturbation Curve (AOPC) by iteratively removing top features.
16
-
17
- Args:
18
- model: wrapped model with .predict() method
19
- instance: input sample (1D array)
20
- explanation: Explanation object
21
- num_steps: number of top features to remove
22
- baseline_value: value to replace removed features with (e.g., 0, mean)
23
-
24
- Returns:
25
- AOPC score (higher means explanation is more faithful)
26
- """
27
- base_pred = model.predict(instance.reshape(1, -1))[0]
28
- attributions = explanation.explanation_data.get("feature_attributions", {})
29
-
30
- if not attributions:
31
- raise ValueError("No feature attributions found in explanation.")
32
-
33
- # Sort features by abs importance
34
- sorted_features = sorted(
35
- attributions.items(),
36
- key=lambda x: abs(x[1]),
37
- reverse=True
38
- )
39
-
40
- # Try to map feature names to indices
41
- feature_indices = []
42
- for i, (fname, _) in enumerate(sorted_features):
43
- try:
44
- idx = explanation.feature_names.index(fname)
45
- except Exception:
46
- idx = i # fallback: assume order
47
- feature_indices.append(idx)
48
-
49
- deltas = []
50
- modified = instance.copy()
51
-
52
- for i in range(min(num_steps, len(feature_indices))):
53
- idx = feature_indices[i]
54
- modified[idx] = baseline_value
55
- new_pred = model.predict(modified.reshape(1, -1))[0]
56
- delta = abs(base_pred - new_pred)
57
- deltas.append(delta)
58
-
59
- return np.mean(deltas)
60
-
61
-
62
- def compute_batch_aopc(
63
- model,
64
- X: np.ndarray,
65
- explanations: dict,
66
- num_steps: int = 10,
67
- baseline_value: float = 0.0
68
- ) -> dict:
69
- """
70
- Compute average AOPC for multiple explainers over a batch of instances.
71
-
72
- Args:
73
- model: wrapped model
74
- X: 2D input array
75
- explanations: dict of {explainer_name: list of Explanation objects}
76
- num_steps: number of top features to remove
77
- baseline_value: value to replace features with
78
-
79
- Returns:
80
- Dict of {explainer_name: mean AOPC score}
81
- """
82
- results = {}
83
-
84
- for explainer_name, expl_list in explanations.items():
85
- scores = []
86
- for i, exp in enumerate(expl_list):
87
- instance = X[i]
88
- score = compute_aopc(model, instance, exp, num_steps, baseline_value)
89
- scores.append(score)
90
- results[explainer_name] = np.mean(scores)
91
-
92
- return results
93
-
94
-
95
- def compute_roar(
96
- model_class,
97
- X_train: np.ndarray,
98
- y_train: np.ndarray,
99
- X_test: np.ndarray,
100
- y_test: np.ndarray,
101
- explanations: list,
102
- top_k: int = 3,
103
- baseline_value: float = 0.0,
104
- model_kwargs: dict = None
105
- ) -> float:
106
- """
107
- Compute ROAR (Remove And Retrain) using top-k important features from explanations.
108
-
109
- Args:
110
- model_class: uninstantiated model class (e.g. LogisticRegression)
111
- X_train: full training data
112
- y_train: training labels
113
- X_test: test features
114
- y_test: test labels
115
- explanations: list of Explanation objects (one per train instance)
116
- top_k: number of top features to remove
117
- baseline_value: what to set removed features to
118
- model_kwargs: optional kwargs to pass to model_class
119
-
120
- Returns:
121
- Accuracy drop (baseline_acc - retrained_acc)
122
- """
123
- model_kwargs = model_kwargs or {}
124
-
125
- # Baseline model
126
- baseline_model = model_class(**model_kwargs)
127
- baseline_model.fit(X_train, y_train)
128
- baseline_preds = baseline_model.predict(X_test)
129
- baseline_acc = accuracy_score(y_test, baseline_preds)
130
-
131
- # Compute top-k feature indices from attributions (use mode)
132
- feature_counts = {}
133
- for exp in explanations:
134
- for fname, val in sorted(exp.explanation_data["feature_attributions"].items(), key=lambda x: abs(x[1]), reverse=True)[:top_k]:
135
- try:
136
- idx = exp.feature_names.index(fname)
137
- feature_counts[idx] = feature_counts.get(idx, 0) + 1
138
- except:
139
- continue
140
-
141
- top_features = sorted(feature_counts.items(), key=lambda x: x[1], reverse=True)[:top_k]
142
- top_feature_indices = [idx for idx, _ in top_features]
143
-
144
- # Remove top-k from training and test data
145
- X_train_mod = copy.deepcopy(X_train)
146
- X_test_mod = copy.deepcopy(X_test)
147
-
148
- # Prepare feature-wise baselines
149
- # Compute or assign feature-wise baseline values
150
- if not isinstance(
151
- baseline_value,
152
- (str, float, int, np.number, np.ndarray)
153
- ) and not callable(baseline_value):
154
- raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
155
-
156
- if isinstance(baseline_value, str):
157
- if baseline_value == "mean":
158
- feature_baseline = np.mean(X_train, axis=0)
159
- elif baseline_value == "median":
160
- feature_baseline = np.median(X_train, axis=0)
161
- else:
162
- raise ValueError(f"Unsupported string baseline: {baseline_value}")
163
- elif callable(baseline_value):
164
- feature_baseline = baseline_value(X_train)
165
- elif isinstance(baseline_value, np.ndarray):
166
- if baseline_value.shape != (X_train.shape[1],):
167
- raise ValueError("baseline_value ndarray must match number of features")
168
- feature_baseline = baseline_value
169
- elif isinstance(baseline_value, (float, int, np.number)):
170
- feature_baseline = np.full(X_train.shape[1], baseline_value)
171
- else:
172
- raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
173
-
174
- for idx in top_feature_indices:
175
- X_train_mod[:, idx] = feature_baseline[idx]
176
- X_test_mod[:, idx] = feature_baseline[idx]
177
- # X_train_mod[:, idx] = baseline_value
178
- # X_test_mod[:, idx] = baseline_value
179
-
180
- # Retrain and evaluate
181
- retrained_model = model_class(**model_kwargs)
182
- retrained_model.fit(X_train_mod, y_train)
183
- retrained_preds = retrained_model.predict(X_test_mod)
184
- retrained_acc = accuracy_score(y_test, retrained_preds)
185
-
186
- return baseline_acc - retrained_acc
187
-
188
-
189
- def compute_roar_curve(
190
- model_class,
191
- X_train,
192
- y_train,
193
- X_test,
194
- y_test,
195
- explanations,
196
- max_k=5,
197
- baseline_value="mean",
198
- model_kwargs=None
199
- ) -> dict:
200
- """
201
- Compute ROAR accuracy drops across a range of top-k features removed.
202
-
203
- Args:
204
- model_class: model type (e.g. LogisticRegression)
205
- X_train, y_train, X_test, y_test: full dataset
206
- explanations: list of Explanation objects
207
- max_k: maximum top-k to try
208
- baseline_value: string, scalar, ndarray, or callable
209
- model_kwargs: passed to model class
210
-
211
- Returns:
212
- Dict of {k: accuracy drop} for k in 1..max_k
213
- """
214
- from copy import deepcopy
215
-
216
- model_kwargs = model_kwargs or {}
217
- curve = {}
218
-
219
- for k in range(1, max_k + 1):
220
- acc_drop = compute_roar(
221
- model_class=model_class,
222
- X_train=deepcopy(X_train),
223
- y_train=deepcopy(y_train),
224
- X_test=deepcopy(X_test),
225
- y_test=deepcopy(y_test),
226
- explanations=deepcopy(explanations),
227
- top_k=k,
228
- baseline_value=baseline_value,
229
- model_kwargs=deepcopy(model_kwargs)
230
- )
231
- curve[k] = acc_drop
232
-
1
+ import numpy as np
2
+ from explainiverse.core.explanation import Explanation
3
+ from sklearn.metrics import accuracy_score
4
+ import copy
5
+
6
+
7
+ def compute_aopc(
8
+ model,
9
+ instance: np.ndarray,
10
+ explanation: Explanation,
11
+ num_steps: int = 10,
12
+ baseline_value: float = 0.0
13
+ ) -> float:
14
+ """
15
+ Computes Area Over the Perturbation Curve (AOPC) by iteratively removing top features.
16
+
17
+ Args:
18
+ model: wrapped model with .predict() method
19
+ instance: input sample (1D array)
20
+ explanation: Explanation object
21
+ num_steps: number of top features to remove
22
+ baseline_value: value to replace removed features with (e.g., 0, mean)
23
+
24
+ Returns:
25
+ AOPC score (higher means explanation is more faithful)
26
+ """
27
+ base_pred = model.predict(instance.reshape(1, -1))[0]
28
+ attributions = explanation.explanation_data.get("feature_attributions", {})
29
+
30
+ if not attributions:
31
+ raise ValueError("No feature attributions found in explanation.")
32
+
33
+ # Sort features by abs importance
34
+ sorted_features = sorted(
35
+ attributions.items(),
36
+ key=lambda x: abs(x[1]),
37
+ reverse=True
38
+ )
39
+
40
+ # Try to map feature names to indices
41
+ feature_indices = []
42
+ for i, (fname, _) in enumerate(sorted_features):
43
+ try:
44
+ idx = explanation.feature_names.index(fname)
45
+ except Exception:
46
+ idx = i # fallback: assume order
47
+ feature_indices.append(idx)
48
+
49
+ deltas = []
50
+ modified = instance.copy()
51
+
52
+ for i in range(min(num_steps, len(feature_indices))):
53
+ idx = feature_indices[i]
54
+ modified[idx] = baseline_value
55
+ new_pred = model.predict(modified.reshape(1, -1))[0]
56
+ delta = abs(base_pred - new_pred)
57
+ deltas.append(delta)
58
+
59
+ return np.mean(deltas)
60
+
61
+
62
+ def compute_batch_aopc(
63
+ model,
64
+ X: np.ndarray,
65
+ explanations: dict,
66
+ num_steps: int = 10,
67
+ baseline_value: float = 0.0
68
+ ) -> dict:
69
+ """
70
+ Compute average AOPC for multiple explainers over a batch of instances.
71
+
72
+ Args:
73
+ model: wrapped model
74
+ X: 2D input array
75
+ explanations: dict of {explainer_name: list of Explanation objects}
76
+ num_steps: number of top features to remove
77
+ baseline_value: value to replace features with
78
+
79
+ Returns:
80
+ Dict of {explainer_name: mean AOPC score}
81
+ """
82
+ results = {}
83
+
84
+ for explainer_name, expl_list in explanations.items():
85
+ scores = []
86
+ for i, exp in enumerate(expl_list):
87
+ instance = X[i]
88
+ score = compute_aopc(model, instance, exp, num_steps, baseline_value)
89
+ scores.append(score)
90
+ results[explainer_name] = np.mean(scores)
91
+
92
+ return results
93
+
94
+
95
+ def compute_roar(
96
+ model_class,
97
+ X_train: np.ndarray,
98
+ y_train: np.ndarray,
99
+ X_test: np.ndarray,
100
+ y_test: np.ndarray,
101
+ explanations: list,
102
+ top_k: int = 3,
103
+ baseline_value: float = 0.0,
104
+ model_kwargs: dict = None
105
+ ) -> float:
106
+ """
107
+ Compute ROAR (Remove And Retrain) using top-k important features from explanations.
108
+
109
+ Args:
110
+ model_class: uninstantiated model class (e.g. LogisticRegression)
111
+ X_train: full training data
112
+ y_train: training labels
113
+ X_test: test features
114
+ y_test: test labels
115
+ explanations: list of Explanation objects (one per train instance)
116
+ top_k: number of top features to remove
117
+ baseline_value: what to set removed features to
118
+ model_kwargs: optional kwargs to pass to model_class
119
+
120
+ Returns:
121
+ Accuracy drop (baseline_acc - retrained_acc)
122
+ """
123
+ model_kwargs = model_kwargs or {}
124
+
125
+ # Baseline model
126
+ baseline_model = model_class(**model_kwargs)
127
+ baseline_model.fit(X_train, y_train)
128
+ baseline_preds = baseline_model.predict(X_test)
129
+ baseline_acc = accuracy_score(y_test, baseline_preds)
130
+
131
+ # Compute top-k feature indices from attributions (use mode)
132
+ feature_counts = {}
133
+ for exp in explanations:
134
+ for fname, val in sorted(exp.explanation_data["feature_attributions"].items(), key=lambda x: abs(x[1]), reverse=True)[:top_k]:
135
+ try:
136
+ idx = exp.feature_names.index(fname)
137
+ feature_counts[idx] = feature_counts.get(idx, 0) + 1
138
+ except:
139
+ continue
140
+
141
+ top_features = sorted(feature_counts.items(), key=lambda x: x[1], reverse=True)[:top_k]
142
+ top_feature_indices = [idx for idx, _ in top_features]
143
+
144
+ # Remove top-k from training and test data
145
+ X_train_mod = copy.deepcopy(X_train)
146
+ X_test_mod = copy.deepcopy(X_test)
147
+
148
+ # Prepare feature-wise baselines
149
+ # Compute or assign feature-wise baseline values
150
+ if not isinstance(
151
+ baseline_value,
152
+ (str, float, int, np.number, np.ndarray)
153
+ ) and not callable(baseline_value):
154
+ raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
155
+
156
+ if isinstance(baseline_value, str):
157
+ if baseline_value == "mean":
158
+ feature_baseline = np.mean(X_train, axis=0)
159
+ elif baseline_value == "median":
160
+ feature_baseline = np.median(X_train, axis=0)
161
+ else:
162
+ raise ValueError(f"Unsupported string baseline: {baseline_value}")
163
+ elif callable(baseline_value):
164
+ feature_baseline = baseline_value(X_train)
165
+ elif isinstance(baseline_value, np.ndarray):
166
+ if baseline_value.shape != (X_train.shape[1],):
167
+ raise ValueError("baseline_value ndarray must match number of features")
168
+ feature_baseline = baseline_value
169
+ elif isinstance(baseline_value, (float, int, np.number)):
170
+ feature_baseline = np.full(X_train.shape[1], baseline_value)
171
+ else:
172
+ raise ValueError(f"Invalid baseline_value type: {type(baseline_value)}")
173
+
174
+ for idx in top_feature_indices:
175
+ X_train_mod[:, idx] = feature_baseline[idx]
176
+ X_test_mod[:, idx] = feature_baseline[idx]
177
+ # X_train_mod[:, idx] = baseline_value
178
+ # X_test_mod[:, idx] = baseline_value
179
+
180
+ # Retrain and evaluate
181
+ retrained_model = model_class(**model_kwargs)
182
+ retrained_model.fit(X_train_mod, y_train)
183
+ retrained_preds = retrained_model.predict(X_test_mod)
184
+ retrained_acc = accuracy_score(y_test, retrained_preds)
185
+
186
+ return baseline_acc - retrained_acc
187
+
188
+
189
+ def compute_roar_curve(
190
+ model_class,
191
+ X_train,
192
+ y_train,
193
+ X_test,
194
+ y_test,
195
+ explanations,
196
+ max_k=5,
197
+ baseline_value="mean",
198
+ model_kwargs=None
199
+ ) -> dict:
200
+ """
201
+ Compute ROAR accuracy drops across a range of top-k features removed.
202
+
203
+ Args:
204
+ model_class: model type (e.g. LogisticRegression)
205
+ X_train, y_train, X_test, y_test: full dataset
206
+ explanations: list of Explanation objects
207
+ max_k: maximum top-k to try
208
+ baseline_value: string, scalar, ndarray, or callable
209
+ model_kwargs: passed to model class
210
+
211
+ Returns:
212
+ Dict of {k: accuracy drop} for k in 1..max_k
213
+ """
214
+ from copy import deepcopy
215
+
216
+ model_kwargs = model_kwargs or {}
217
+ curve = {}
218
+
219
+ for k in range(1, max_k + 1):
220
+ acc_drop = compute_roar(
221
+ model_class=model_class,
222
+ X_train=deepcopy(X_train),
223
+ y_train=deepcopy(y_train),
224
+ X_test=deepcopy(X_test),
225
+ y_test=deepcopy(y_test),
226
+ explanations=deepcopy(explanations),
227
+ top_k=k,
228
+ baseline_value=baseline_value,
229
+ model_kwargs=deepcopy(model_kwargs)
230
+ )
231
+ curve[k] = acc_drop
232
+
233
233
  return curve
@@ -0,0 +1,38 @@
1
+ # src/explainiverse/explainers/__init__.py
2
+ """
3
+ Explainiverse Explainers - comprehensive XAI method implementations.
4
+
5
+ Local Explainers (instance-level):
6
+ - LIME: Local Interpretable Model-agnostic Explanations
7
+ - SHAP: SHapley Additive exPlanations
8
+ - Anchors: High-precision rule-based explanations
9
+ - Counterfactual: Diverse counterfactual explanations
10
+
11
+ Global Explainers (model-level):
12
+ - Permutation Importance: Feature importance via permutation
13
+ - Partial Dependence: Marginal feature effects (PDP)
14
+ - ALE: Accumulated Local Effects (unbiased for correlated features)
15
+ - SAGE: Shapley Additive Global importancE
16
+ """
17
+
18
+ from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
19
+ from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
20
+ from explainiverse.explainers.rule_based.anchors_wrapper import AnchorsExplainer
21
+ from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
22
+ from explainiverse.explainers.global_explainers.permutation_importance import PermutationImportanceExplainer
23
+ from explainiverse.explainers.global_explainers.partial_dependence import PartialDependenceExplainer
24
+ from explainiverse.explainers.global_explainers.ale import ALEExplainer
25
+ from explainiverse.explainers.global_explainers.sage import SAGEExplainer
26
+
27
+ __all__ = [
28
+ # Local explainers
29
+ "LimeExplainer",
30
+ "ShapExplainer",
31
+ "AnchorsExplainer",
32
+ "CounterfactualExplainer",
33
+ # Global explainers
34
+ "PermutationImportanceExplainer",
35
+ "PartialDependenceExplainer",
36
+ "ALEExplainer",
37
+ "SAGEExplainer",
38
+ ]
@@ -0,0 +1,9 @@
1
+ # src/explainiverse/explainers/attribution/__init__.py
2
+ """
3
+ Attribution-based explainers - feature importance explanations.
4
+ """
5
+
6
+ from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
7
+ from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
8
+
9
+ __all__ = ["LimeExplainer", "ShapExplainer"]
@@ -1,63 +1,90 @@
1
- # src/explainiverse/explainers/attribution/lime_wrapper.py
2
-
3
- import numpy as np
4
- from lime.lime_tabular import LimeTabularExplainer
5
-
6
- from explainiverse.core.explainer import BaseExplainer
7
- from explainiverse.core.explanation import Explanation
8
-
9
-
10
- class LimeExplainer(BaseExplainer):
11
- """
12
- Wrapper for LIME that conforms to the BaseExplainer API.
13
- """
14
-
15
- def __init__(self, model, training_data, feature_names, class_names, mode="classification"):
16
- """
17
- Args:
18
- model: A model adapter (implements .predict()).
19
- training_data: The data used to initialize LIME (2D np.ndarray).
20
- feature_names: List of feature names.
21
- class_names: List of class names.
22
- mode: 'classification' or 'regression'.
23
- """
24
- super().__init__(model)
25
- self.feature_names = feature_names
26
- self.class_names = class_names
27
- self.mode = mode
28
-
29
- self.explainer = LimeTabularExplainer(
30
- training_data=training_data,
31
- feature_names=feature_names,
32
- class_names=class_names,
33
- mode=mode
34
- )
35
-
36
- def explain(self, instance, num_features=5, top_labels=1):
37
- """
38
- Generate a local explanation for the given instance.
39
-
40
- Args:
41
- instance: 1D numpy array (single row)
42
- num_features: Number of top features to include
43
- top_labels: Number of top labels to explain
44
-
45
- Returns:
46
- Explanation object
47
- """
48
- lime_exp = self.explainer.explain_instance(
49
- data_row=instance,
50
- predict_fn=self.model.predict,
51
- num_features=num_features,
52
- top_labels=top_labels
53
- )
54
-
55
- label_index = lime_exp.top_labels[0]
56
- label_name = self.class_names[label_index]
57
- attributions = dict(lime_exp.as_list(label=label_index))
58
-
59
- return Explanation(
60
- explainer_name="LIME",
61
- target_class=label_name,
62
- explanation_data={"feature_attributions": attributions}
63
- )
1
+ # src/explainiverse/explainers/attribution/lime_wrapper.py
2
+ """
3
+ LIME Explainer - Local Interpretable Model-agnostic Explanations.
4
+
5
+ LIME explains individual predictions by fitting a simple interpretable
6
+ model (linear regression) to perturbed samples around the instance.
7
+
8
+ Reference:
9
+ Ribeiro, M.T., Singh, S., & Guestrin, C. (2016). "Why Should I Trust You?":
10
+ Explaining the Predictions of Any Classifier. KDD 2016.
11
+ """
12
+
13
+ import numpy as np
14
+ from lime.lime_tabular import LimeTabularExplainer
15
+
16
+ from explainiverse.core.explainer import BaseExplainer
17
+ from explainiverse.core.explanation import Explanation
18
+
19
+
20
+ class LimeExplainer(BaseExplainer):
21
+ """
22
+ LIME explainer for local, model-agnostic explanations.
23
+
24
+ LIME (Local Interpretable Model-agnostic Explanations) explains individual
25
+ predictions by approximating the model locally with an interpretable model.
26
+ It generates perturbed samples around the instance and fits a weighted
27
+ linear model to understand feature contributions.
28
+
29
+ This implementation wraps the official LIME library for tabular data.
30
+
31
+ Attributes:
32
+ model: Model adapter with .predict() method
33
+ feature_names: List of feature names
34
+ class_names: List of class names
35
+ mode: 'classification' or 'regression'
36
+ explainer: The underlying LimeTabularExplainer
37
+ """
38
+
39
+ def __init__(self, model, training_data, feature_names, class_names, mode="classification"):
40
+ """
41
+ Initialize the LIME explainer.
42
+
43
+ Args:
44
+ model: A model adapter (implements .predict()).
45
+ training_data: The data used to initialize LIME (2D np.ndarray).
46
+ Used to compute statistics for perturbation generation.
47
+ feature_names: List of feature names.
48
+ class_names: List of class names.
49
+ mode: 'classification' or 'regression'.
50
+ """
51
+ super().__init__(model)
52
+ self.feature_names = list(feature_names)
53
+ self.class_names = list(class_names)
54
+ self.mode = mode
55
+
56
+ self.explainer = LimeTabularExplainer(
57
+ training_data=training_data,
58
+ feature_names=feature_names,
59
+ class_names=class_names,
60
+ mode=mode
61
+ )
62
+
63
+ def explain(self, instance, num_features=5, top_labels=1):
64
+ """
65
+ Generate a local explanation for the given instance.
66
+
67
+ Args:
68
+ instance: 1D numpy array (single row) to explain
69
+ num_features: Number of top features to include in explanation
70
+ top_labels: Number of top predicted labels to explain
71
+
72
+ Returns:
73
+ Explanation object with feature attributions
74
+ """
75
+ lime_exp = self.explainer.explain_instance(
76
+ data_row=instance,
77
+ predict_fn=self.model.predict,
78
+ num_features=num_features,
79
+ top_labels=top_labels
80
+ )
81
+
82
+ label_index = lime_exp.top_labels[0]
83
+ label_name = self.class_names[label_index]
84
+ attributions = dict(lime_exp.as_list(label=label_index))
85
+
86
+ return Explanation(
87
+ explainer_name="LIME",
88
+ target_class=label_name,
89
+ explanation_data={"feature_attributions": attributions}
90
+ )