acfx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
acfx-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Szymon Bobek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
acfx-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: acfx
3
+ Version: 0.1.0
4
+ Summary: Actionable Counterfactual eXplanations
5
+ Author-email: Szymon Bobek <szymon.bobek@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/sbobek/acfx
8
+ Project-URL: Documentation, https://acfx.readthedocs.org
9
+ Project-URL: Issues, https://github.com/sbobek/acfx/issues
10
+ Keywords: xai,tabular data,explainability,model-agnostic,counterfactual,causal
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy>=1.22.4
15
+ Requires-Dist: pandas>=1.4.3
16
+ Requires-Dist: scipy>=1.11.4
17
+ Requires-Dist: scikit-learn>=1.1.1
18
+ Requires-Dist: optuna>=4.2.0
19
+ Requires-Dist: interpret==0.6.9
20
+ Requires-Dist: interpret-core==0.6.9
21
+ Requires-Dist: overrides>=7.4.0
22
+ Provides-Extra: benchmark
23
+ Requires-Dist: tensorflow==2.14.0; extra == "benchmark"
24
+ Requires-Dist: lingam==1.9.1; extra == "benchmark"
25
+ Requires-Dist: openml==0.15.1; extra == "benchmark"
26
+ Requires-Dist: alibi==0.9.6; extra == "benchmark"
27
+ Requires-Dist: lux-explainer==1.3.2; extra == "benchmark"
28
+ Requires-Dist: networkx==3.4.2; extra == "benchmark"
29
+ Requires-Dist: cfnow==0.0.6; extra == "benchmark"
30
+ Requires-Dist: dice-ml==0.11; extra == "benchmark"
31
+ Requires-Dist: pydotplus==2.0.2; extra == "benchmark"
32
+ Requires-Dist: deap==1.4.2; extra == "benchmark"
33
+ Requires-Dist: pydot==3.0.4; extra == "benchmark"
34
+ Provides-Extra: streamlit-app
35
+ Requires-Dist: streamlit==1.48.0; extra == "streamlit-app"
36
+ Requires-Dist: streamlit-sortables==0.3.1; extra == "streamlit-app"
37
+ Requires-Dist: networkx==3.4.2; extra == "streamlit-app"
38
+ Requires-Dist: lingam==1.9.1; extra == "streamlit-app"
39
+ Dynamic: license-file
@@ -0,0 +1,174 @@
1
+ from typing import Sequence, Tuple, Dict, Optional, List, Self
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.base import BaseEstimator, TransformerMixin
6
+ from sklearn.base import ClassifierMixin
7
+ from abc import ABC, abstractmethod
8
+ from .abstract import OptimizerType
9
+ from .evaluation import generate_cfs
10
+
11
+
12
+ class ACFX(ABC, BaseEstimator, TransformerMixin):
13
+ """
14
+ ACFX: A Counterfactual Explanation Model
15
+ """
16
+ def __init__(self, blackbox:ClassifierMixin):
17
+ """
18
+
19
+ Parameters
20
+ ----------
21
+ blackbox:
22
+ Blackbox explainer
23
+ """
24
+ self.blackbox = blackbox
25
+ self.optimizer = None
26
+ self.optimizer_type = None
27
+ self.X = None
28
+ self.categorical_indicator = None
29
+ self.features_order = None
30
+ self.pbounds = None
31
+ self.adjacency_matrix = None
32
+ self.casual_order = None
33
+ self.masked_features = None
34
+
35
+
36
+ @abstractmethod
37
+ def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
38
+ pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
39
+ categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
40
+ """
41
+ Fits explainer to the sampled data and blackbox model provided in the constructor
42
+
43
+ :return:
44
+ self
45
+ Fitted estimator.
46
+
47
+ Parameters
48
+ ----------
49
+ X : {sparse matrix} of shape (n_samples, n_features)
50
+ Used for counterfactuals generation
51
+
52
+ adjacency_matrix:
53
+ The adjacency matrix representing the causal structure.
54
+
55
+ casual_order:
56
+ The order of variables in the causal graph.
57
+
58
+ pbounds:
59
+ The bounds for each feature to search over (dict with feature names as keys and tuple (min, max) as values).
60
+
61
+ y : array-like of shape (n_samples,).
62
+ Target values used for blackbox model fitting only. You can provide fitted blackbox to constructor or fit it in this method by providing this parameter
63
+
64
+ masked_features:
65
+ List of interchangeable features
66
+
67
+ categorical_indicator:
68
+ True at the index where the variable should be treated as categorical
69
+
70
+ features_order:
71
+ order of features in query instance
72
+ """
73
+ if y is not None:
74
+ self.blackbox.fit(X, y)
75
+ self.X = X
76
+ self.categorical_indicator = categorical_indicator
77
+ self.features_order = features_order
78
+ self.adjacency_matrix = adjacency_matrix
79
+ self.casual_order = casual_order
80
+ self.pbounds = pbounds
81
+ self.masked_features = masked_features
82
+ return self
83
+
84
+ def predict(self, X):
85
+ """
86
+ Predicts using blackbox model
87
+
88
+ Parameters
89
+ ----------
90
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
91
+ Used for counterfactuals generation
92
+
93
+ Returns
94
+ -------
95
+ Prediction class labels for samples in X by blackbox model
96
+ """
97
+ return self.blackbox.predict(X)
98
+
99
+ def counterfactual(self, query_instance: np.ndarray, desired_class:int, num_counterfactuals: int =1, proximity_weight : float =1,
100
+ sparsity_weight : float =1, plausibility_weight : float =0, diversity_weight : float =1, init_points : int =10,
101
+ n_iter : int =1000, sampling_from_model : bool=True) -> np.ndarray:
102
+ """
103
+ Generates counterfactuals
104
+
105
+ Parameters
106
+ ----------
107
+ query_instance:
108
+ The instance to generate counterfactuals for.
109
+ desired_class:
110
+ The target class for the counterfactuals.
111
+ num_counterfactuals:
112
+ The number of counterfactual instances to generate.
113
+ proximity_weight:
114
+ Weight for proximity loss component
115
+ sparsity_weight:
116
+ Weight for sparsity loss component
117
+ plausibility_weight:
118
+ Weight for plausibility loss component
119
+ diversity_weight:
120
+ Weight for diversity loss component
121
+ init_points:
122
+ Number of initial points for Bayesian Optimization.
123
+ n_iter:
124
+ Number of iterations for Bayesian Optimization.
125
+ sampling_from_model:
126
+ true if you want to generate samples from model after sampling from data and generating with relationship graph
127
+
128
+ Returns
129
+ -------
130
+ np.ndarray:
131
+ The generated counterfactuals that minimize the loss function.
132
+ """
133
+ if plausibility_weight > 0:
134
+ if self.casual_order is None:
135
+ raise ValueError("Casual order must be provided if plausibility loss is on")
136
+ if self.adjacency_matrix is None:
137
+ raise ValueError("adjacency_matrix must be provided")
138
+ if self.adjacency_matrix.shape[0] != self.adjacency_matrix.shape[1]:
139
+ raise ValueError("adjacency matrix must have same number of rows and columns")
140
+ if self.adjacency_matrix.shape[0] != len(self.casual_order):
141
+ raise ValueError("adjacency matrix must be of same length as casual order")
142
+
143
+ if query_instance is None:
144
+ raise ValueError("query_instance must not be None")
145
+ if self.optimizer_type is None:
146
+ raise ValueError("optimizer_type must be set via fit() before calling counterfactual()")
147
+ if self.optimizer is None and self.optimizer_type is OptimizerType.Custom:
148
+ raise ValueError("optimizer must be set before calling counterfactual()")
149
+ if self.optimizer_type is OptimizerType.LinearAdditive:
150
+ if not hasattr(self.blackbox, 'coef_'):
151
+ raise AttributeError('optimizer requires model.coef_ as linear coefficients to be set')
152
+ return generate_cfs(query_instance=query_instance,
153
+ desired_class=desired_class,
154
+ adjacency_matrix=self.adjacency_matrix,
155
+ casual_order=self.casual_order,
156
+ proximity_weight=proximity_weight,
157
+ sparsity_weight=sparsity_weight,
158
+ plausibility_weight=plausibility_weight,
159
+ diversity_weight=diversity_weight,
160
+ bounds=self.pbounds,
161
+ model=self.blackbox,
162
+ features_order=self.features_order,
163
+ masked_features= self.masked_features,
164
+ categorical_indicator= self.categorical_indicator,
165
+ X=self.X,
166
+ num_cfs=num_counterfactuals,
167
+ init_points=init_points,
168
+ n_iter=n_iter,
169
+ sampling_from_model=sampling_from_model,
170
+ optimizer_type=self.optimizer_type,
171
+ optimizer=self.optimizer)
172
+
173
+
174
+
@@ -0,0 +1,81 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from overrides import overrides
4
+ from sklearn.base import ClassifierMixin
5
+ from typing import Sequence, Tuple, Dict, Optional, List, Self
6
+ from .ACFX import ACFX
7
+ from .abstract import OptimizerType, ModelBasedCounterOptimizer
8
+
9
+
10
+ class AcfxCustom(ACFX):
11
+ """
12
+ AcfxCustom: A Counterfactual Explanation Model (using custom blackbox)
13
+ """
14
+ def __init__(self, blackbox: ClassifierMixin):
15
+ """
16
+
17
+ Parameters
18
+ ----------
19
+ blackbox:
20
+ Custom blackbox explainer
21
+ """
22
+ super().__init__(blackbox)
23
+
24
+ @overrides
25
+ def counterfactual(self, query_instance: np.ndarray, desired_class: int, num_counterfactuals: int = 1, proximity_weight: float = 1,
26
+ sparsity_weight: float = 1, plausibility_weight: float = 0, diversity_weight: float = 1,
27
+ init_points: int = 10,
28
+ n_iter: int = 1000, sampling_from_model: bool = True) -> np.ndarray:
29
+
30
+ if self.optimizer is None:
31
+ raise ValueError("Optimizer must be initialized in fit() before calling counterfactual().")
32
+ return super().counterfactual(query_instance, desired_class, num_counterfactuals, proximity_weight, sparsity_weight,
33
+ plausibility_weight, diversity_weight, init_points,
34
+ n_iter, sampling_from_model)
35
+
36
+ def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
37
+ pbounds:Dict[str, Tuple[float, float]],
38
+ optimizer : ModelBasedCounterOptimizer=None, y=None, masked_features:Optional[List[str]]=None,
39
+ categorical_indicator:Optional[List[bool]]=None, features_order:Optional[List[str]] =None) -> Self:
40
+ """
41
+ Fits explainer to the sampled data and blackbox model provided in the constructor
42
+
43
+ :return:
44
+ self
45
+ Fitted estimator.
46
+
47
+ Parameters
48
+ ----------
49
+ X : {sparse matrix} of shape (n_samples, n_features)
50
+ Used for counterfactuals generation
51
+
52
+ adjacency_matrix:
53
+ The adjacency matrix representing the causal structure.
54
+
55
+ casual_order:
56
+ The order of variables in the causal graph.
57
+
58
+ pbounds:
59
+ The bounds for each feature to search over (dict with feature names as keys and tuple (min, max) as values).
60
+
61
+ optimizer:
62
+ Custom optimizer compliant with blackbox predictor
63
+
64
+ y : array-like of shape (n_samples,)
65
+ Target values used for blackbox model fitting only. You can provide fitted blackbox to constructor or fit it in this method by providing this parameter
66
+
67
+ masked_features:
68
+ List of interchangeable features
69
+
70
+ categorical_indicator:
71
+ True at the index where the variable should be treated as categorical
72
+
73
+ features_order:
74
+ order of features in query instance
75
+ """
76
+ self.optimizer_type = OptimizerType.Custom
77
+ if optimizer is None:
78
+ raise ValueError("Optimizer must be given for AcfxCustom")
79
+ self.optimizer = optimizer
80
+ return super().fit(X, adjacency_matrix, casual_order, pbounds,
81
+ y, masked_features,categorical_indicator, features_order)
@@ -0,0 +1,32 @@
1
+ from typing import Sequence, Tuple, Dict, Optional, List, Self
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from interpret.glassbox import ExplainableBoostingClassifier
6
+ from overrides import overrides
7
+
8
+ from .ACFX import ACFX
9
+ from .abstract import OptimizerType
10
+
11
+ class AcfxEBM(ACFX):
12
+ """
13
+ AcfxCustom: A Counterfactual Explanation Model (using EBM as blackbox)
14
+ """
15
+ def __init__(self, blackbox: ExplainableBoostingClassifier):
16
+ """
17
+
18
+ Parameters
19
+ ----------
20
+ blackbox:
21
+ EBM blackbox explainer
22
+ """
23
+ super().__init__(blackbox)
24
+
25
+ @overrides
26
+ def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
27
+ pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
28
+ categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
29
+ self.optimizer_type = OptimizerType.EBM
30
+ return super().fit(X, adjacency_matrix, casual_order, pbounds,
31
+ y, masked_features,categorical_indicator, features_order)
32
+
@@ -0,0 +1,32 @@
1
+ from typing import Sequence, Tuple, Dict, Optional, List, Self
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from overrides import overrides
6
+ from sklearn.linear_model._base import LinearClassifierMixin
7
+
8
+ from .ACFX import ACFX
9
+ from .abstract import OptimizerType
10
+
11
+
12
+ class AcfxLinear(ACFX):
13
+ """
14
+ AcfxCustom: A Counterfactual Explanation Model (using linear additive model as blackbox)
15
+ """
16
+ def __init__(self, blackbox: LinearClassifierMixin):
17
+ """
18
+
19
+ Parameters
20
+ ----------
21
+ blackbox:
22
+ Linear blackbox explainer
23
+ """
24
+ super().__init__(blackbox)
25
+
26
+ @overrides
27
+ def fit(self, X:pd.DataFrame, adjacency_matrix:Optional[np.ndarray], casual_order:Optional[Sequence[int]],
28
+ pbounds:Dict[str, Tuple[float, float]],y=None, masked_features:Optional[List[str]] = None,
29
+ categorical_indicator:Optional[List[bool]] =None, features_order:Optional[List[str]] =None) -> Self:
30
+ self.optimizer_type = OptimizerType.LinearAdditive
31
+ return super().fit(X, adjacency_matrix, casual_order, pbounds,
32
+ y, masked_features,categorical_indicator, features_order)
@@ -0,0 +1,6 @@
1
+ from .ACFX import ACFX
2
+ from .AcfxEBM import AcfxEBM
3
+ from .AcfxCustom import AcfxCustom
4
+ from .AcfxLinear import AcfxLinear
5
+
6
+ __all__ = [ACFX, AcfxLinear, AcfxEBM, AcfxCustom]
@@ -0,0 +1,22 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict
3
+
4
+
5
+ class ModelBasedCounterOptimizer(ABC):
6
+ @abstractmethod
7
+ def optimize_proba(self, target_class: int, feature_masked: List[str]) -> Dict[str, float]:
8
+ """
9
+ Modifies the instance to increase the probability of the target class by adjusting feature values.
10
+
11
+ Parameters:
12
+ -----------
13
+ target_class:
14
+ The desired class to optimize towards.
15
+ feature_masked:
16
+ List of interchangeable features
17
+
18
+ Returns:
19
+ -------
20
+ Dictionary of feature names and their optimized values
21
+ """
22
+ pass
@@ -0,0 +1,6 @@
1
+ from enum import Enum
2
+
3
+ class OptimizerType(Enum):
4
+ EBM = 1
5
+ LinearAdditive = 2
6
+ Custom = 3
@@ -0,0 +1,3 @@
1
+ from .OptimizerType import OptimizerType
2
+ from .ModelBasedCounterOptimizer import ModelBasedCounterOptimizer
3
+ __all__ = [OptimizerType, ModelBasedCounterOptimizer]
File without changes
@@ -0,0 +1,199 @@
1
+ from typing import List, Dict
2
+
3
+ import pandas as pd
4
+ from interpret.glassbox import ExplainableBoostingClassifier
5
+ from overrides import overrides
6
+ from sklearn.utils.extmath import softmax
7
+ import numpy as np
8
+
9
+ from ..abstract import ModelBasedCounterOptimizer
10
+
11
+
12
+ class EBMCounterOptimizer(ModelBasedCounterOptimizer):
13
+ # Y_TEST = '_eco_y_test'
14
+ # Y_PRED = '_eco_y_pred'
15
+ # IS_MODIFIABLE = '_eco_is_modifiable'
16
+
17
+ def __init__(self, model: ExplainableBoostingClassifier, X: pd.DataFrame):
18
+ self.model = model
19
+ self.X = X
20
+ self.updated_features = {}
21
+
22
+ def _get_optimized_feature_value(self, feature_name, feature_idx, feature_val, features, feature_masked, term_idx,
23
+ class_idx) -> Dict[str, float]:
24
+ """
25
+ Returns
26
+ -------
27
+ feature value with maximum score for given target class.
28
+
29
+ @Todo Needs changes to return optimized value due to given strategy.
30
+ """
31
+ # if feature is modifiable and not yet optimized
32
+ if feature_name in feature_masked and feature_name not in self.updated_features:
33
+ # if multiclass classification take bins for term and class
34
+ if len(self.model.term_scores_[term_idx].shape) > 1:
35
+ class_term_scores = self.model.term_scores_[term_idx].T[class_idx]
36
+ else:
37
+ # else take score for class 1 or 1 - score for class 1
38
+ class_term_scores = self.model.term_scores_[term_idx] if class_idx == 1 else 1 - self.model.term_scores_[
39
+ term_idx]
40
+ # take term that gives best score for target class
41
+ class_max = np.max(class_term_scores)
42
+ try:
43
+ feature_score_idx = np.where(class_term_scores[1:-1] == class_max)[0][0] ##this is score, not value imho
44
+ except:
45
+ print(np.where(class_term_scores[1:-1] == class_max))
46
+ # we bin differently for main effects and pairs, so first
47
+ # get the list containing the bins for different resolutions
48
+ bin_levels = self.model.bins_[feature_idx]
49
+ # print(f'Feature score index for feature {feature_name} is {feature_score_idx} which represents score equal: {class_max} test: {class_term_scores[feature_score_idx+1]}')
50
+ # what resolution do we need for this term (main resolution, pair
51
+ # resolution, etc.), but limit to the last resolution available
52
+ bins = bin_levels[min(len(bin_levels), len(features)) - 1]
53
+
54
+ if len(bins) == 0:
55
+ feature_val = self.X[feature_name].sample(1).values[0]
56
+ else:
57
+ if isinstance(bins, dict):
58
+ # categorical feature
59
+ # 'unknown' category strings are in the last bin (-1)
60
+ feature_val = list(bins.values())[
61
+ feature_score_idx - 1] # if maxscore was 0, or -1 just assign random value
62
+ else:
63
+ # continuous feature
64
+ # Get the lower and upper bounds of the specified bin
65
+ lower_idx = feature_score_idx - 1
66
+ upper_idx = feature_score_idx
67
+
68
+ if lower_idx == -1:
69
+ lower = self.model.feature_bounds_[feature_idx][0]
70
+ else:
71
+ lower = bins[lower_idx]
72
+
73
+ if upper_idx == len(bins):
74
+ upper = self.model.feature_bounds_[feature_idx][1]
75
+ else:
76
+ upper = bins[upper_idx]
77
+ # print(f'Drawing randomly from :{lower} to {upper}')
78
+
79
+ # Draw a random number from the range defined by the bin
80
+ feature_val = np.random.uniform(lower, upper)
81
+
82
+ # print(f'This translates into feature value: {feature_val}')
83
+
84
+ self.updated_features.update({feature_name: feature_val})
85
+ elif feature_name in self.updated_features:
86
+ feature_val = self.updated_features.get(feature_name)
87
+ else:
88
+ self.updated_features.update({feature_name: feature_val})
89
+
90
+ return feature_val
91
+
92
+ @overrides
93
+ def optimize_proba(self, target_class : int, feature_masked: List[str]) -> Dict[str, float]:
94
+ """
95
+ The method calculates probabilities taking into account the optimization of given parameters towards the target class.
96
+ Method is based on a default ebm's predict_proba
97
+
98
+ Parameters:
99
+ ebm:
100
+ Trained EBM model
101
+ X:
102
+ Dataset
103
+ target_class:
104
+ Target class from which we take the features
105
+ featured_masked:
106
+ List of interchangeable features
107
+ """
108
+ if target_class not in self.model.classes_:
109
+ raise KeyError(f'Class "{target_class}" does not exists in given EBM model')
110
+
111
+ class_idx = np.where(self.model.classes_ == target_class)[0][0]
112
+ self.updated_features = {}
113
+ sample_scores = []
114
+ cf = {}
115
+ for index, sample in self.X.iterrows():
116
+ # start from the intercept for each sample
117
+ score = self.model.intercept_.copy()
118
+ if isinstance(score, float) or len(score) == 1:
119
+ # regression or binary classification
120
+ score = float(score)
121
+
122
+ # we have 2 terms, so add their score contributions
123
+ for term_idx, features in enumerate(self.model.term_features_):
124
+ # indexing into a tensor requires a multi-dimensional index
125
+ tensor_index = []
126
+ # main effects will have 1 feature, and pairs will have 2 features
127
+ for feature_idx in features:
128
+ feature_name = self.model.feature_names_in_[feature_idx] # Get the feature name by index
129
+ feature_val = sample[feature_name] # Use the feature name to get the correct value from the sample
130
+ bin_idx = 0 # if missing value, use bin index 0
131
+
132
+ if feature_val is not None and feature_val is not np.nan:
133
+ # we bin differently for main effects and pairs, so first
134
+ # get the list containing the bins for different resolutions
135
+ bin_levels = self.model.bins_[feature_idx]
136
+
137
+ # what resolution do we need for this term (main resolution, pair
138
+ # resolution, etc.), but limit to the last resolution available
139
+ bins = bin_levels[min(len(bin_levels), len(features)) - 1]
140
+
141
+ # here is where the magic is located
142
+ feature_val = self._get_optimized_feature_value(feature_name, feature_idx, feature_val,
143
+ features, feature_masked, term_idx, class_idx)
144
+
145
+ if isinstance(bins, dict):
146
+ # categorical feature
147
+ # 'unknown' category strings are in the last bin (-1)
148
+ bin_idx = bins.get(feature_val, -1)
149
+ if bin_idx == -1:
150
+ # check value as string
151
+ bin_idx = bins.get(str(feature_val), -1)
152
+ else:
153
+ # continuous feature
154
+ try:
155
+ # try converting to a float, if that fails it's 'unknown'
156
+ feature_val = float(feature_val)
157
+ # add 1 because the 0th bin is reserved for 'missing'
158
+ bin_idx = np.digitize(feature_val, bins) + 1
159
+ except ValueError:
160
+ # non-floats are 'unknown', which is in the last bin (-1)
161
+ bin_idx = -1
162
+
163
+ if len(self.model.term_scores_[term_idx].shape) > 1:
164
+ sc = self.model.term_scores_[term_idx].T[class_idx][bin_idx]
165
+ else:
166
+ sc = self.model.term_scores_[term_idx][bin_idx]
167
+ # print(f'And feature value {feature_val} translates back to bin index: {bin_idx} which represents score: {sc}')
168
+
169
+ tensor_index.append(bin_idx)
170
+
171
+ # local_score is also the local feature importance
172
+ local_score = self.model.term_scores_[term_idx][tuple(tensor_index)]
173
+
174
+ score += local_score
175
+ sample_scores.append(score)
176
+
177
+ predictions = np.array(sample_scores)
178
+
179
+ if hasattr(self.model, 'classes_'):
180
+ # classification
181
+ if len(self.model.classes_) == 2:
182
+ # binary classification
183
+
184
+ # softmax expects two logits for binary classification
185
+ # the first logit is always equivalent to 0 for binary classification
186
+ predictions = [[0, x] for x in predictions]
187
+ predictions = softmax(predictions)
188
+
189
+ #return predictions, self.updated_features
190
+ return self.updated_features
191
+
192
+ # def check_samples(self, target_class, y_test_key, feature_masked):
193
+ # X = self.X.copy()
194
+ # predictions = self.optimize_proba(target_class, feature_masked)
195
+ # X.loc[:, self.Y_TEST] = np.argmax(predictions, axis=1)
196
+ # X.loc[:, self.Y_PRED] = X[self.Y_TEST].map({key: val for key, val in enumerate(self.model.classes_)})
197
+ # X.loc[:, self.IS_MODIFIABLE] = np.where(X[y_test_key] != X[self.Y_PRED], 1, 0)
198
+ #
199
+ # return X