acfx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acfx-0.1.0/LICENSE +21 -0
- acfx-0.1.0/PKG-INFO +39 -0
- acfx-0.1.0/acfx/ACFX.py +174 -0
- acfx-0.1.0/acfx/AcfxCustom.py +81 -0
- acfx-0.1.0/acfx/AcfxEBM.py +32 -0
- acfx-0.1.0/acfx/AcfxLinear.py +32 -0
- acfx-0.1.0/acfx/__init__.py +6 -0
- acfx-0.1.0/acfx/abstract/ModelBasedCounterOptimizer.py +22 -0
- acfx-0.1.0/acfx/abstract/OptimizerType.py +6 -0
- acfx-0.1.0/acfx/abstract/__init__.py +3 -0
- acfx-0.1.0/acfx/app/__init__.py +0 -0
- acfx-0.1.0/acfx/evaluation/EBMCounterOptimizer.py +199 -0
- acfx-0.1.0/acfx/evaluation/LogisticRegressionCounterOptimizer.py +77 -0
- acfx-0.1.0/acfx/evaluation/__init__.py +5 -0
- acfx-0.1.0/acfx/evaluation/ccfs.py +269 -0
- acfx-0.1.0/acfx/evaluation/loss.py +124 -0
- acfx-0.1.0/acfx/evaluation/multi_dataset_evaluation.py +59 -0
- acfx-0.1.0/acfx/evaluation/utils.py +98 -0
- acfx-0.1.0/acfx.egg-info/PKG-INFO +39 -0
- acfx-0.1.0/acfx.egg-info/SOURCES.txt +23 -0
- acfx-0.1.0/acfx.egg-info/dependency_links.txt +1 -0
- acfx-0.1.0/acfx.egg-info/requires.txt +27 -0
- acfx-0.1.0/acfx.egg-info/top_level.txt +1 -0
- acfx-0.1.0/pyproject.toml +40 -0
- acfx-0.1.0/setup.cfg +4 -0
acfx-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Szymon Bobek
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
acfx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acfx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Actionable Counterfactual eXplanations
|
|
5
|
+
Author-email: Szymon Bobek <szymon.bobek@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/sbobek/acfx
|
|
8
|
+
Project-URL: Documentation, https://acfx.readthedocs.org
|
|
9
|
+
Project-URL: Issues, https://github.com/sbobek/acfx/issues
|
|
10
|
+
Keywords: xai,tabular data,explainability,model-agnostic,counterfactual,causal
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: numpy>=1.22.4
|
|
15
|
+
Requires-Dist: pandas>=1.4.3
|
|
16
|
+
Requires-Dist: scipy>=1.11.4
|
|
17
|
+
Requires-Dist: scikit-learn>=1.1.1
|
|
18
|
+
Requires-Dist: optuna>=4.2.0
|
|
19
|
+
Requires-Dist: interpret==0.6.9
|
|
20
|
+
Requires-Dist: interpret-core==0.6.9
|
|
21
|
+
Requires-Dist: overrides>=7.4.0
|
|
22
|
+
Provides-Extra: benchmark
|
|
23
|
+
Requires-Dist: tensorflow==2.14.0; extra == "benchmark"
|
|
24
|
+
Requires-Dist: lingam==1.9.1; extra == "benchmark"
|
|
25
|
+
Requires-Dist: openml==0.15.1; extra == "benchmark"
|
|
26
|
+
Requires-Dist: alibi==0.9.6; extra == "benchmark"
|
|
27
|
+
Requires-Dist: lux-explainer==1.3.2; extra == "benchmark"
|
|
28
|
+
Requires-Dist: networkx==3.4.2; extra == "benchmark"
|
|
29
|
+
Requires-Dist: cfnow==0.0.6; extra == "benchmark"
|
|
30
|
+
Requires-Dist: dice-ml==0.11; extra == "benchmark"
|
|
31
|
+
Requires-Dist: pydotplus==2.0.2; extra == "benchmark"
|
|
32
|
+
Requires-Dist: deap==1.4.2; extra == "benchmark"
|
|
33
|
+
Requires-Dist: pydot==3.0.4; extra == "benchmark"
|
|
34
|
+
Provides-Extra: streamlit-app
|
|
35
|
+
Requires-Dist: streamlit==1.48.0; extra == "streamlit-app"
|
|
36
|
+
Requires-Dist: streamlit-sortables==0.3.1; extra == "streamlit-app"
|
|
37
|
+
Requires-Dist: networkx==3.4.2; extra == "streamlit-app"
|
|
38
|
+
Requires-Dist: lingam==1.9.1; extra == "streamlit-app"
|
|
39
|
+
Dynamic: license-file
|
acfx-0.1.0/acfx/ACFX.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from typing import Sequence, Tuple, Dict, Optional, List, Self
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
6
|
+
from sklearn.base import ClassifierMixin
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from .abstract import OptimizerType
|
|
9
|
+
from .evaluation import generate_cfs
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ACFX(ABC, BaseEstimator, TransformerMixin):
|
|
13
|
+
"""
|
|
14
|
+
ACFX: A Counterfactual Explanation Model
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, blackbox:ClassifierMixin):
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
blackbox:
|
|
22
|
+
Blackbox explainer
|
|
23
|
+
"""
|
|
24
|
+
self.blackbox = blackbox
|
|
25
|
+
self.optimizer = None
|
|
26
|
+
self.optimizer_type = None
|
|
27
|
+
self.X = None
|
|
28
|
+
self.categorical_indicator = None
|
|
29
|
+
self.features_order = None
|
|
30
|
+
self.pbounds = None
|
|
31
|
+
self.adjacency_matrix = None
|
|
32
|
+
self.casual_order = None
|
|
33
|
+
self.masked_features = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
|
|
38
|
+
pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
|
|
39
|
+
categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
|
|
40
|
+
"""
|
|
41
|
+
Fits explainer to the sampled data and blackbox model provided in the constructor
|
|
42
|
+
|
|
43
|
+
:return:
|
|
44
|
+
self
|
|
45
|
+
Fitted estimator.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
X : {sparse matrix} of shape (n_samples, n_features)
|
|
50
|
+
Used for counterfactuals generation
|
|
51
|
+
|
|
52
|
+
adjacency_matrix:
|
|
53
|
+
The adjacency matrix representing the causal structure.
|
|
54
|
+
|
|
55
|
+
casual_order:
|
|
56
|
+
The order of variables in the causal graph.
|
|
57
|
+
|
|
58
|
+
pbounds:
|
|
59
|
+
The bounds for each feature to search over (dict with feature names as keys and tuple (min, max) as values).
|
|
60
|
+
|
|
61
|
+
y : array-like of shape (n_samples,).
|
|
62
|
+
Target values used for blackbox model fitting only. You can provide fitted blackbox to constructor or fit it in this method by providing this parameter
|
|
63
|
+
|
|
64
|
+
masked_features:
|
|
65
|
+
List of interchangeable features
|
|
66
|
+
|
|
67
|
+
categorical_indicator:
|
|
68
|
+
True at the index where the variable should be treated as categorical
|
|
69
|
+
|
|
70
|
+
features_order:
|
|
71
|
+
order of features in query instance
|
|
72
|
+
"""
|
|
73
|
+
if y is not None:
|
|
74
|
+
self.blackbox.fit(X, y)
|
|
75
|
+
self.X = X
|
|
76
|
+
self.categorical_indicator = categorical_indicator
|
|
77
|
+
self.features_order = features_order
|
|
78
|
+
self.adjacency_matrix = adjacency_matrix
|
|
79
|
+
self.casual_order = casual_order
|
|
80
|
+
self.pbounds = pbounds
|
|
81
|
+
self.masked_features = masked_features
|
|
82
|
+
return self
|
|
83
|
+
|
|
84
|
+
def predict(self, X):
|
|
85
|
+
"""
|
|
86
|
+
Predicts using blackbox model
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
91
|
+
Used for counterfactuals generation
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
Prediction class labels for samples in X by blackbox model
|
|
96
|
+
"""
|
|
97
|
+
return self.blackbox.predict(X)
|
|
98
|
+
|
|
99
|
+
def counterfactual(self, query_instance: np.ndarray, desired_class:int, num_counterfactuals: int =1, proximity_weight : float =1,
|
|
100
|
+
sparsity_weight : float =1, plausibility_weight : float =0, diversity_weight : float =1, init_points : int =10,
|
|
101
|
+
n_iter : int =1000, sampling_from_model : bool=True) -> np.ndarray:
|
|
102
|
+
"""
|
|
103
|
+
Generates counterfactuals
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
query_instance:
|
|
108
|
+
The instance to generate counterfactuals for.
|
|
109
|
+
desired_class:
|
|
110
|
+
The target class for the counterfactuals.
|
|
111
|
+
num_counterfactuals:
|
|
112
|
+
The number of counterfactual instances to generate.
|
|
113
|
+
proximity_weight:
|
|
114
|
+
Weight for proximity loss component
|
|
115
|
+
sparsity_weight:
|
|
116
|
+
Weight for sparsity loss component
|
|
117
|
+
plausibility_weight:
|
|
118
|
+
Weight for plausibility loss component
|
|
119
|
+
diversity_weight:
|
|
120
|
+
Weight for diversity loss component
|
|
121
|
+
init_points:
|
|
122
|
+
Number of initial points for Bayesian Optimization.
|
|
123
|
+
n_iter:
|
|
124
|
+
Number of iterations for Bayesian Optimization.
|
|
125
|
+
sampling_from_model:
|
|
126
|
+
true if you want to generate samples from model after sampling from data and generating with relationship graph
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
np.ndarray:
|
|
131
|
+
The generated counterfactuals that minimize the loss function.
|
|
132
|
+
"""
|
|
133
|
+
if plausibility_weight > 0:
|
|
134
|
+
if self.casual_order is None:
|
|
135
|
+
raise ValueError("Casual order must be provided if plausibility loss is on")
|
|
136
|
+
if self.adjacency_matrix is None:
|
|
137
|
+
raise ValueError("adjacency_matrix must be provided")
|
|
138
|
+
if self.adjacency_matrix.shape[0] != self.adjacency_matrix.shape[1]:
|
|
139
|
+
raise ValueError("adjacency matrix must have same number of rows and columns")
|
|
140
|
+
if self.adjacency_matrix.shape[0] != len(self.casual_order):
|
|
141
|
+
raise ValueError("adjacency matrix must be of same length as casual order")
|
|
142
|
+
|
|
143
|
+
if query_instance is None:
|
|
144
|
+
raise ValueError("query_instance must not be None")
|
|
145
|
+
if self.optimizer_type is None:
|
|
146
|
+
raise ValueError("optimizer_type must be set via fit() before calling counterfactual()")
|
|
147
|
+
if self.optimizer is None and self.optimizer_type is OptimizerType.Custom:
|
|
148
|
+
raise ValueError("optimizer must be set before calling counterfactual()")
|
|
149
|
+
if self.optimizer_type is OptimizerType.LinearAdditive:
|
|
150
|
+
if not hasattr(self.blackbox, 'coef_'):
|
|
151
|
+
raise AttributeError('optimizer requires model.coef_ as linear coefficients to be set')
|
|
152
|
+
return generate_cfs(query_instance=query_instance,
|
|
153
|
+
desired_class=desired_class,
|
|
154
|
+
adjacency_matrix=self.adjacency_matrix,
|
|
155
|
+
casual_order=self.casual_order,
|
|
156
|
+
proximity_weight=proximity_weight,
|
|
157
|
+
sparsity_weight=sparsity_weight,
|
|
158
|
+
plausibility_weight=plausibility_weight,
|
|
159
|
+
diversity_weight=diversity_weight,
|
|
160
|
+
bounds=self.pbounds,
|
|
161
|
+
model=self.blackbox,
|
|
162
|
+
features_order=self.features_order,
|
|
163
|
+
masked_features= self.masked_features,
|
|
164
|
+
categorical_indicator= self.categorical_indicator,
|
|
165
|
+
X=self.X,
|
|
166
|
+
num_cfs=num_counterfactuals,
|
|
167
|
+
init_points=init_points,
|
|
168
|
+
n_iter=n_iter,
|
|
169
|
+
sampling_from_model=sampling_from_model,
|
|
170
|
+
optimizer_type=self.optimizer_type,
|
|
171
|
+
optimizer=self.optimizer)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from overrides import overrides
|
|
4
|
+
from sklearn.base import ClassifierMixin
|
|
5
|
+
from typing import Sequence, Tuple, Dict, Optional, List, Self
|
|
6
|
+
from .ACFX import ACFX
|
|
7
|
+
from .abstract import OptimizerType, ModelBasedCounterOptimizer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AcfxCustom(ACFX):
|
|
11
|
+
"""
|
|
12
|
+
AcfxCustom: A Counterfactual Explanation Model (using custom blackbox)
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, blackbox: ClassifierMixin):
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
blackbox:
|
|
20
|
+
Custom blackbox explainer
|
|
21
|
+
"""
|
|
22
|
+
super().__init__(blackbox)
|
|
23
|
+
|
|
24
|
+
@overrides
|
|
25
|
+
def counterfactual(self, query_instance: np.ndarray, desired_class: int, num_counterfactuals: int = 1, proximity_weight: float = 1,
|
|
26
|
+
sparsity_weight: float = 1, plausibility_weight: float = 0, diversity_weight: float = 1,
|
|
27
|
+
init_points: int = 10,
|
|
28
|
+
n_iter: int = 1000, sampling_from_model: bool = True) -> np.ndarray:
|
|
29
|
+
|
|
30
|
+
if self.optimizer is None:
|
|
31
|
+
raise ValueError("Optimizer must be initialized in fit() before calling counterfactual().")
|
|
32
|
+
return super().counterfactual(query_instance, desired_class, num_counterfactuals, proximity_weight, sparsity_weight,
|
|
33
|
+
plausibility_weight, diversity_weight, init_points,
|
|
34
|
+
n_iter, sampling_from_model)
|
|
35
|
+
|
|
36
|
+
def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
|
|
37
|
+
pbounds:Dict[str, Tuple[float, float]],
|
|
38
|
+
optimizer : ModelBasedCounterOptimizer=None, y=None, masked_features:Optional[List[str]]=None,
|
|
39
|
+
categorical_indicator:Optional[List[bool]]=None, features_order:Optional[List[str]] =None) -> Self:
|
|
40
|
+
"""
|
|
41
|
+
Fits explainer to the sampled data and blackbox model provided in the constructor
|
|
42
|
+
|
|
43
|
+
:return:
|
|
44
|
+
self
|
|
45
|
+
Fitted estimator.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
X : {sparse matrix} of shape (n_samples, n_features)
|
|
50
|
+
Used for counterfactuals generation
|
|
51
|
+
|
|
52
|
+
adjacency_matrix:
|
|
53
|
+
The adjacency matrix representing the causal structure.
|
|
54
|
+
|
|
55
|
+
casual_order:
|
|
56
|
+
The order of variables in the causal graph.
|
|
57
|
+
|
|
58
|
+
pbounds:
|
|
59
|
+
The bounds for each feature to search over (dict with feature names as keys and tuple (min, max) as values).
|
|
60
|
+
|
|
61
|
+
optimizer:
|
|
62
|
+
Custom optimizer compliant with blackbox predictor
|
|
63
|
+
|
|
64
|
+
y : array-like of shape (n_samples,)
|
|
65
|
+
Target values used for blackbox model fitting only. You can provide fitted blackbox to constructor or fit it in this method by providing this parameter
|
|
66
|
+
|
|
67
|
+
masked_features:
|
|
68
|
+
List of interchangeable features
|
|
69
|
+
|
|
70
|
+
categorical_indicator:
|
|
71
|
+
True at the index where the variable should be treated as categorical
|
|
72
|
+
|
|
73
|
+
features_order:
|
|
74
|
+
order of features in query instance
|
|
75
|
+
"""
|
|
76
|
+
self.optimizer_type = OptimizerType.Custom
|
|
77
|
+
if optimizer is None:
|
|
78
|
+
raise ValueError("Optimizer must be given for AcfxCustom")
|
|
79
|
+
self.optimizer = optimizer
|
|
80
|
+
return super().fit(X, adjacency_matrix, casual_order, pbounds,
|
|
81
|
+
y, masked_features,categorical_indicator, features_order)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Sequence, Tuple, Dict, Optional, List, Self
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from interpret.glassbox import ExplainableBoostingClassifier
|
|
6
|
+
from overrides import overrides
|
|
7
|
+
|
|
8
|
+
from .ACFX import ACFX
|
|
9
|
+
from .abstract import OptimizerType
|
|
10
|
+
|
|
11
|
+
class AcfxEBM(ACFX):
|
|
12
|
+
"""
|
|
13
|
+
AcfxCustom: A Counterfactual Explanation Model (using EBM as blackbox)
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, blackbox: ExplainableBoostingClassifier):
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
blackbox:
|
|
21
|
+
EBM blackbox explainer
|
|
22
|
+
"""
|
|
23
|
+
super().__init__(blackbox)
|
|
24
|
+
|
|
25
|
+
@overrides
|
|
26
|
+
def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
|
|
27
|
+
pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
|
|
28
|
+
categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
|
|
29
|
+
self.optimizer_type = OptimizerType.EBM
|
|
30
|
+
return super().fit(X, adjacency_matrix, casual_order, pbounds,
|
|
31
|
+
y, masked_features,categorical_indicator, features_order)
|
|
32
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Sequence, Tuple, Dict, Optional, List, Self
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from overrides import overrides
|
|
6
|
+
from sklearn.linear_model._base import LinearClassifierMixin
|
|
7
|
+
|
|
8
|
+
from .ACFX import ACFX
|
|
9
|
+
from .abstract import OptimizerType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AcfxLinear(ACFX):
|
|
13
|
+
"""
|
|
14
|
+
AcfxCustom: A Counterfactual Explanation Model (using linear additive model as blackbox)
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, blackbox: LinearClassifierMixin):
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
blackbox:
|
|
22
|
+
Linear blackbox explainer
|
|
23
|
+
"""
|
|
24
|
+
super().__init__(blackbox)
|
|
25
|
+
|
|
26
|
+
@overrides
|
|
27
|
+
def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
|
|
28
|
+
pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
|
|
29
|
+
categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
|
|
30
|
+
self.optimizer_type = OptimizerType.LinearAdditive
|
|
31
|
+
return super().fit(X, adjacency_matrix, casual_order, pbounds,
|
|
32
|
+
y, masked_features,categorical_indicator, features_order)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Dict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ModelBasedCounterOptimizer(ABC):
|
|
6
|
+
@abstractmethod
|
|
7
|
+
def optimize_proba(self, target_class: int, feature_masked: List[str]) -> Dict[str, float]:
|
|
8
|
+
"""
|
|
9
|
+
Modifies the instance to increase the probability of the target class by adjusting feature values.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
-----------
|
|
13
|
+
target_class:
|
|
14
|
+
The desired class to optimize towards.
|
|
15
|
+
feature_masked:
|
|
16
|
+
List of interchangeable features
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
-------
|
|
20
|
+
Dictionary of feature names and their optimized values
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
from typing import List, Dict
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from interpret.glassbox import ExplainableBoostingClassifier
|
|
5
|
+
from overrides import overrides
|
|
6
|
+
from sklearn.utils.extmath import softmax
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from ..abstract import ModelBasedCounterOptimizer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EBMCounterOptimizer(ModelBasedCounterOptimizer):
|
|
13
|
+
# Y_TEST = '_eco_y_test'
|
|
14
|
+
# Y_PRED = '_eco_y_pred'
|
|
15
|
+
# IS_MODIFIABLE = '_eco_is_modifiable'
|
|
16
|
+
|
|
17
|
+
def __init__(self, model: ExplainableBoostingClassifier, X: pd.DataFrame):
|
|
18
|
+
self.model = model
|
|
19
|
+
self.X = X
|
|
20
|
+
self.updated_features = {}
|
|
21
|
+
|
|
22
|
+
def _get_optimized_feature_value(self, feature_name, feature_idx, feature_val, features, feature_masked, term_idx,
|
|
23
|
+
class_idx) -> Dict[str, float]:
|
|
24
|
+
"""
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
feature value with maximum score for given target class.
|
|
28
|
+
|
|
29
|
+
@Todo Needs changes to return optimized value due to given strategy.
|
|
30
|
+
"""
|
|
31
|
+
# if feature is modifiable and not yet optimized
|
|
32
|
+
if feature_name in feature_masked and feature_name not in self.updated_features:
|
|
33
|
+
# if multiclass classification take bins for term and class
|
|
34
|
+
if len(self.model.term_scores_[term_idx].shape) > 1:
|
|
35
|
+
class_term_scores = self.model.term_scores_[term_idx].T[class_idx]
|
|
36
|
+
else:
|
|
37
|
+
# else take score for class 1 or 1 - score for class 1
|
|
38
|
+
class_term_scores = self.model.term_scores_[term_idx] if class_idx == 1 else 1 - self.model.term_scores_[
|
|
39
|
+
term_idx]
|
|
40
|
+
# take term that gives best score for target class
|
|
41
|
+
class_max = np.max(class_term_scores)
|
|
42
|
+
try:
|
|
43
|
+
feature_score_idx = np.where(class_term_scores[1:-1] == class_max)[0][0] ##this is score, not value imho
|
|
44
|
+
except:
|
|
45
|
+
print(np.where(class_term_scores[1:-1] == class_max))
|
|
46
|
+
# we bin differently for main effects and pairs, so first
|
|
47
|
+
# get the list containing the bins for different resolutions
|
|
48
|
+
bin_levels = self.model.bins_[feature_idx]
|
|
49
|
+
# print(f'Feature score index for feature {feature_name} is {feature_score_idx} which represents score equal: {class_max} test: {class_term_scores[feature_score_idx+1]}')
|
|
50
|
+
# what resolution do we need for this term (main resolution, pair
|
|
51
|
+
# resolution, etc.), but limit to the last resolution available
|
|
52
|
+
bins = bin_levels[min(len(bin_levels), len(features)) - 1]
|
|
53
|
+
|
|
54
|
+
if len(bins) == 0:
|
|
55
|
+
feature_val = self.X[feature_name].sample(1).values[0]
|
|
56
|
+
else:
|
|
57
|
+
if isinstance(bins, dict):
|
|
58
|
+
# categorical feature
|
|
59
|
+
# 'unknown' category strings are in the last bin (-1)
|
|
60
|
+
feature_val = list(bins.values())[
|
|
61
|
+
feature_score_idx - 1] # if maxscore was 0, or -1 just assign random value
|
|
62
|
+
else:
|
|
63
|
+
# continuous feature
|
|
64
|
+
# Get the lower and upper bounds of the specified bin
|
|
65
|
+
lower_idx = feature_score_idx - 1
|
|
66
|
+
upper_idx = feature_score_idx
|
|
67
|
+
|
|
68
|
+
if lower_idx == -1:
|
|
69
|
+
lower = self.model.feature_bounds_[feature_idx][0]
|
|
70
|
+
else:
|
|
71
|
+
lower = bins[lower_idx]
|
|
72
|
+
|
|
73
|
+
if upper_idx == len(bins):
|
|
74
|
+
upper = self.model.feature_bounds_[feature_idx][1]
|
|
75
|
+
else:
|
|
76
|
+
upper = bins[upper_idx]
|
|
77
|
+
# print(f'Drawing randomly from :{lower} to {upper}')
|
|
78
|
+
|
|
79
|
+
# Draw a random number from the range defined by the bin
|
|
80
|
+
feature_val = np.random.uniform(lower, upper)
|
|
81
|
+
|
|
82
|
+
# print(f'This translates into feature value: {feature_val}')
|
|
83
|
+
|
|
84
|
+
self.updated_features.update({feature_name: feature_val})
|
|
85
|
+
elif feature_name in self.updated_features:
|
|
86
|
+
feature_val = self.updated_features.get(feature_name)
|
|
87
|
+
else:
|
|
88
|
+
self.updated_features.update({feature_name: feature_val})
|
|
89
|
+
|
|
90
|
+
return feature_val
|
|
91
|
+
|
|
92
|
+
@overrides
|
|
93
|
+
def optimize_proba(self, target_class : int, feature_masked: List[str]) -> Dict[str, float]:
|
|
94
|
+
"""
|
|
95
|
+
The method calculates probabilities taking into account the optimization of given parameters towards the target class.
|
|
96
|
+
Method is based on a default ebm's predict_proba
|
|
97
|
+
|
|
98
|
+
Parameters:
|
|
99
|
+
ebm:
|
|
100
|
+
Trained EBM model
|
|
101
|
+
X:
|
|
102
|
+
Dataset
|
|
103
|
+
target_class:
|
|
104
|
+
Target class from which we take the features
|
|
105
|
+
featured_masked:
|
|
106
|
+
List of interchangeable features
|
|
107
|
+
"""
|
|
108
|
+
if target_class not in self.model.classes_:
|
|
109
|
+
raise KeyError(f'Class "{target_class}" does not exists in given EBM model')
|
|
110
|
+
|
|
111
|
+
class_idx = np.where(self.model.classes_ == target_class)[0][0]
|
|
112
|
+
self.updated_features = {}
|
|
113
|
+
sample_scores = []
|
|
114
|
+
cf = {}
|
|
115
|
+
for index, sample in self.X.iterrows():
|
|
116
|
+
# start from the intercept for each sample
|
|
117
|
+
score = self.model.intercept_.copy()
|
|
118
|
+
if isinstance(score, float) or len(score) == 1:
|
|
119
|
+
# regression or binary classification
|
|
120
|
+
score = float(score)
|
|
121
|
+
|
|
122
|
+
# we have 2 terms, so add their score contributions
|
|
123
|
+
for term_idx, features in enumerate(self.model.term_features_):
|
|
124
|
+
# indexing into a tensor requires a multi-dimensional index
|
|
125
|
+
tensor_index = []
|
|
126
|
+
# main effects will have 1 feature, and pairs will have 2 features
|
|
127
|
+
for feature_idx in features:
|
|
128
|
+
feature_name = self.model.feature_names_in_[feature_idx] # Get the feature name by index
|
|
129
|
+
feature_val = sample[feature_name] # Use the feature name to get the correct value from the sample
|
|
130
|
+
bin_idx = 0 # if missing value, use bin index 0
|
|
131
|
+
|
|
132
|
+
if feature_val is not None and feature_val is not np.nan:
|
|
133
|
+
# we bin differently for main effects and pairs, so first
|
|
134
|
+
# get the list containing the bins for different resolutions
|
|
135
|
+
bin_levels = self.model.bins_[feature_idx]
|
|
136
|
+
|
|
137
|
+
# what resolution do we need for this term (main resolution, pair
|
|
138
|
+
# resolution, etc.), but limit to the last resolution available
|
|
139
|
+
bins = bin_levels[min(len(bin_levels), len(features)) - 1]
|
|
140
|
+
|
|
141
|
+
# here is where the magic is located
|
|
142
|
+
feature_val = self._get_optimized_feature_value(feature_name, feature_idx, feature_val,
|
|
143
|
+
features, feature_masked, term_idx, class_idx)
|
|
144
|
+
|
|
145
|
+
if isinstance(bins, dict):
|
|
146
|
+
# categorical feature
|
|
147
|
+
# 'unknown' category strings are in the last bin (-1)
|
|
148
|
+
bin_idx = bins.get(feature_val, -1)
|
|
149
|
+
if bin_idx == -1:
|
|
150
|
+
# check value as string
|
|
151
|
+
bin_idx = bins.get(str(feature_val), -1)
|
|
152
|
+
else:
|
|
153
|
+
# continuous feature
|
|
154
|
+
try:
|
|
155
|
+
# try converting to a float, if that fails it's 'unknown'
|
|
156
|
+
feature_val = float(feature_val)
|
|
157
|
+
# add 1 because the 0th bin is reserved for 'missing'
|
|
158
|
+
bin_idx = np.digitize(feature_val, bins) + 1
|
|
159
|
+
except ValueError:
|
|
160
|
+
# non-floats are 'unknown', which is in the last bin (-1)
|
|
161
|
+
bin_idx = -1
|
|
162
|
+
|
|
163
|
+
if len(self.model.term_scores_[term_idx].shape) > 1:
|
|
164
|
+
sc = self.model.term_scores_[term_idx].T[class_idx][bin_idx]
|
|
165
|
+
else:
|
|
166
|
+
sc = self.model.term_scores_[term_idx][bin_idx]
|
|
167
|
+
# print(f'And feature value {feature_val} translates back to bin index: {bin_idx} which represents score: {sc}')
|
|
168
|
+
|
|
169
|
+
tensor_index.append(bin_idx)
|
|
170
|
+
|
|
171
|
+
# local_score is also the local feature importance
|
|
172
|
+
local_score = self.model.term_scores_[term_idx][tuple(tensor_index)]
|
|
173
|
+
|
|
174
|
+
score += local_score
|
|
175
|
+
sample_scores.append(score)
|
|
176
|
+
|
|
177
|
+
predictions = np.array(sample_scores)
|
|
178
|
+
|
|
179
|
+
if hasattr(self.model, 'classes_'):
|
|
180
|
+
# classification
|
|
181
|
+
if len(self.model.classes_) == 2:
|
|
182
|
+
# binary classification
|
|
183
|
+
|
|
184
|
+
# softmax expects two logits for binary classification
|
|
185
|
+
# the first logit is always equivalent to 0 for binary classification
|
|
186
|
+
predictions = [[0, x] for x in predictions]
|
|
187
|
+
predictions = softmax(predictions)
|
|
188
|
+
|
|
189
|
+
#return predictions, self.updated_features
|
|
190
|
+
return self.updated_features
|
|
191
|
+
|
|
192
|
+
# def check_samples(self, target_class, y_test_key, feature_masked):
|
|
193
|
+
# X = self.X.copy()
|
|
194
|
+
# predictions = self.optimize_proba(target_class, feature_masked)
|
|
195
|
+
# X.loc[:, self.Y_TEST] = np.argmax(predictions, axis=1)
|
|
196
|
+
# X.loc[:, self.Y_PRED] = X[self.Y_TEST].map({key: val for key, val in enumerate(self.model.classes_)})
|
|
197
|
+
# X.loc[:, self.IS_MODIFIABLE] = np.where(X[y_test_key] != X[self.Y_PRED], 1, 0)
|
|
198
|
+
#
|
|
199
|
+
# return X
|