psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psyke/__init__.py +231 -85
- psyke/clustering/__init__.py +9 -4
- psyke/clustering/cream/__init__.py +6 -10
- psyke/clustering/exact/__init__.py +17 -11
- psyke/clustering/utils.py +0 -1
- psyke/extraction/__init__.py +25 -0
- psyke/extraction/cart/CartPredictor.py +128 -0
- psyke/extraction/cart/FairTree.py +205 -0
- psyke/extraction/cart/FairTreePredictor.py +56 -0
- psyke/extraction/cart/__init__.py +48 -62
- psyke/extraction/hypercubic/__init__.py +187 -47
- psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
- psyke/extraction/hypercubic/creepy/__init__.py +24 -29
- psyke/extraction/hypercubic/divine/__init__.py +86 -0
- psyke/extraction/hypercubic/ginger/__init__.py +100 -0
- psyke/extraction/hypercubic/gridex/__init__.py +45 -84
- psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
- psyke/extraction/hypercubic/hex/__init__.py +104 -0
- psyke/extraction/hypercubic/hypercube.py +275 -72
- psyke/extraction/hypercubic/iter/__init__.py +45 -46
- psyke/extraction/hypercubic/strategy.py +13 -9
- psyke/extraction/real/__init__.py +24 -29
- psyke/extraction/real/utils.py +2 -2
- psyke/extraction/trepan/__init__.py +24 -19
- psyke/genetic/__init__.py +0 -0
- psyke/genetic/fgin/__init__.py +74 -0
- psyke/genetic/gin/__init__.py +144 -0
- psyke/hypercubepredictor.py +102 -0
- psyke/schema/__init__.py +230 -36
- psyke/tuning/__init__.py +40 -28
- psyke/tuning/crash/__init__.py +33 -64
- psyke/tuning/orchid/__init__.py +21 -23
- psyke/tuning/pedro/__init__.py +70 -56
- psyke/utils/logic.py +8 -8
- psyke/utils/plot.py +79 -3
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
- psyke-1.0.4.dev10.dist-info/RECORD +46 -0
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
- psyke/extraction/cart/predictor.py +0 -73
- psyke-0.4.9.dev6.dist-info/RECORD +0 -36
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from typing import Union, Any
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
|
6
|
+
from tuprolog.core import clause, Var, Struct
|
|
7
|
+
from tuprolog.theory import Theory, mutable_theory
|
|
8
|
+
|
|
9
|
+
from psyke.extraction.cart import LeafConstraints, LeafSequence
|
|
10
|
+
from psyke.schema import LessThan, GreaterThan, SchemaException, DiscreteFeature
|
|
11
|
+
from psyke.utils.logic import create_variable_list, create_head, create_term
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CartPredictor:
|
|
15
|
+
"""
|
|
16
|
+
A wrapper for decision and regression trees of sklearn.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, predictor: Union[DecisionTreeClassifier, DecisionTreeRegressor] = DecisionTreeClassifier(),
|
|
20
|
+
discretization=None, normalization=None):
|
|
21
|
+
self._predictor = predictor
|
|
22
|
+
self.discretization = discretization
|
|
23
|
+
self.normalization = normalization
|
|
24
|
+
|
|
25
|
+
def __get_constraints(self, nodes: Iterable[tuple[int, bool]]) -> LeafConstraints:
|
|
26
|
+
thresholds = [self._predictor.tree_.threshold[i[0]] for i in nodes]
|
|
27
|
+
features = [self._predictor.feature_names_in_[self._predictor.tree_.feature[node[0]]] for node in nodes]
|
|
28
|
+
conditions = [node[1] for node in nodes]
|
|
29
|
+
if self.normalization is not None:
|
|
30
|
+
thresholds = [threshold * self.normalization[feature][1] + self.normalization[feature][0]
|
|
31
|
+
for feature, threshold in zip(features, thresholds)]
|
|
32
|
+
cond_dict = {}
|
|
33
|
+
for feature, condition, threshold in zip(features, conditions, thresholds):
|
|
34
|
+
cond = LessThan(threshold) if condition else GreaterThan(threshold)
|
|
35
|
+
if feature in cond_dict:
|
|
36
|
+
try:
|
|
37
|
+
cond_dict[feature][-1] *= cond
|
|
38
|
+
except SchemaException:
|
|
39
|
+
cond_dict[feature].append(cond)
|
|
40
|
+
else:
|
|
41
|
+
cond_dict[feature] = [cond]
|
|
42
|
+
return cond_dict
|
|
43
|
+
|
|
44
|
+
def __get_leaves(self) -> Iterable[int]:
|
|
45
|
+
return [i for i, (left_child, right_child) in enumerate(zip(
|
|
46
|
+
self._left_children, self._right_children
|
|
47
|
+
)) if left_child == -1 and right_child == -1]
|
|
48
|
+
|
|
49
|
+
def __get_prediction(self, node: int) -> Any:
|
|
50
|
+
if hasattr(self._predictor, 'classes_'):
|
|
51
|
+
return self._predictor.classes_[np.argmax(self._predictor.tree_.value[node])]
|
|
52
|
+
else:
|
|
53
|
+
return self._predictor.tree_.value[node]
|
|
54
|
+
|
|
55
|
+
def __path(self, node: int, path=None) -> Iterable[tuple[int, bool]]:
|
|
56
|
+
path = [] if path is None else path
|
|
57
|
+
if node == 0:
|
|
58
|
+
return path
|
|
59
|
+
father = list(self._left_children if node in self._left_children else self._right_children).index(node)
|
|
60
|
+
return self.__path(father, [(father, node in self._left_children)] + path)
|
|
61
|
+
|
|
62
|
+
def __iter__(self) -> LeafSequence:
|
|
63
|
+
leaves = self.__get_leaves()
|
|
64
|
+
return ((self.__get_constraints(self.__path(i)), self.__get_prediction(i)) for i in leaves)
|
|
65
|
+
|
|
66
|
+
def predict(self, data) -> Iterable:
|
|
67
|
+
return self._predictor.predict(data)
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _simplify_nodes(nodes: list) -> Iterable:
|
|
71
|
+
simplified = [nodes.pop(0)]
|
|
72
|
+
while len(nodes) > 0:
|
|
73
|
+
first_node = nodes[0][0]
|
|
74
|
+
for k, conditions in first_node.items():
|
|
75
|
+
for condition in conditions:
|
|
76
|
+
if all(k in node[0] and condition in node[0][k] for node in nodes):
|
|
77
|
+
[node[0][k].remove(condition) for node in nodes]
|
|
78
|
+
simplified.append(nodes.pop(0))
|
|
79
|
+
return [({k: v for k, v in rule.items() if v != []}, prediction) for rule, prediction in simplified]
|
|
80
|
+
|
|
81
|
+
def _create_body(self, variables: dict[str, Var], conditions: LeafConstraints) -> Iterable[Struct]:
|
|
82
|
+
results = []
|
|
83
|
+
for feature_name, cond_list in conditions.items():
|
|
84
|
+
for condition in cond_list:
|
|
85
|
+
feature: DiscreteFeature = [d for d in self.discretization if feature_name in d.admissible_values][0] \
|
|
86
|
+
if self.discretization else None
|
|
87
|
+
results.append(create_term(variables[feature_name], condition) if feature is None else
|
|
88
|
+
create_term(variables[feature.name],
|
|
89
|
+
feature.admissible_values[feature_name],
|
|
90
|
+
isinstance(condition, GreaterThan)))
|
|
91
|
+
return results
|
|
92
|
+
|
|
93
|
+
def create_theory(self, data: pd.DataFrame, simplify: bool = True) -> Theory:
|
|
94
|
+
new_theory = mutable_theory()
|
|
95
|
+
nodes = [node for node in self]
|
|
96
|
+
nodes = self._simplify_nodes(nodes) if simplify else nodes
|
|
97
|
+
for (constraints, prediction) in nodes:
|
|
98
|
+
if self.normalization is not None and data.columns[-1] in self.normalization:
|
|
99
|
+
m, s = self.normalization[data.columns[-1]]
|
|
100
|
+
prediction = prediction * s + m
|
|
101
|
+
variables = create_variable_list(self.discretization, data)
|
|
102
|
+
new_theory.assertZ(
|
|
103
|
+
clause(
|
|
104
|
+
create_head(data.columns[-1], list(variables.values()), prediction),
|
|
105
|
+
self._create_body(variables, constraints)
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
return new_theory
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def predictor(self) -> Union[DecisionTreeClassifier, DecisionTreeRegressor]:
|
|
112
|
+
return self._predictor
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def n_leaves(self) -> int:
|
|
116
|
+
return len(list(self.__get_leaves()))
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def _left_children(self) -> list[int]:
|
|
120
|
+
return self._predictor.tree_.children_left
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def _right_children(self) -> list[int]:
|
|
124
|
+
return self._predictor.tree_.children_right
|
|
125
|
+
|
|
126
|
+
@predictor.setter
|
|
127
|
+
def predictor(self, predictor: Union[DecisionTreeClassifier, DecisionTreeRegressor]):
|
|
128
|
+
self._predictor = predictor
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from collections import Counter
|
|
3
|
+
|
|
4
|
+
from sklearn.metrics import accuracy_score, r2_score
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Node:
|
|
8
|
+
def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
|
|
9
|
+
self.feature = feature
|
|
10
|
+
self.threshold = threshold
|
|
11
|
+
self.left = left
|
|
12
|
+
self.right = right
|
|
13
|
+
self.value = value
|
|
14
|
+
|
|
15
|
+
def is_leaf_node(self):
|
|
16
|
+
return self.value is not None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FairTree:
|
|
20
|
+
def __init__(self, max_depth=3, max_leaves=None, criterion=None, min_samples_split=2, lambda_penalty=0.0,
|
|
21
|
+
protected_attr=None):
|
|
22
|
+
self.max_depth = max_depth
|
|
23
|
+
self.max_leaves = max_leaves
|
|
24
|
+
self.min_samples_split = min_samples_split
|
|
25
|
+
self.lambda_penalty = lambda_penalty
|
|
26
|
+
self.protected_attr = protected_attr
|
|
27
|
+
self.criterion = criterion
|
|
28
|
+
self.root = None
|
|
29
|
+
self.n_leaves = 0
|
|
30
|
+
self.quality_function = None
|
|
31
|
+
|
|
32
|
+
def fit(self, X, y):
|
|
33
|
+
self.n_leaves = 0
|
|
34
|
+
self.root = self._grow_tree(X, y, depth=0)
|
|
35
|
+
while self.n_leaves > self.max_leaves:
|
|
36
|
+
self.prune_least_important_leaf(X, y)
|
|
37
|
+
self.n_leaves -= 1
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def _estimate_output(y):
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
|
|
44
|
+
def score(self, X, y):
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
def predict(self, X):
|
|
48
|
+
return np.array([self._traverse_tree(x, self.root) for _, x in X.iterrows()])
|
|
49
|
+
|
|
50
|
+
def _traverse_tree(self, x, node):
|
|
51
|
+
if node.is_leaf_node():
|
|
52
|
+
return node.value
|
|
53
|
+
if x[node.feature] <= node.threshold:
|
|
54
|
+
return self._traverse_tree(x, node.left)
|
|
55
|
+
return self._traverse_tree(x, node.right)
|
|
56
|
+
|
|
57
|
+
def _grow_tree(self, X, y, depth):
|
|
58
|
+
if depth >= self.max_depth or X.shape[0] < self.min_samples_split or len(set(y.values.flatten())) == 1 or \
|
|
59
|
+
(self.max_leaves is not None and self.n_leaves >= self.max_leaves):
|
|
60
|
+
self.n_leaves += 1
|
|
61
|
+
return Node(value=self._estimate_output(y))
|
|
62
|
+
|
|
63
|
+
best_feature, best_threshold = self._best_split(X, y)
|
|
64
|
+
if best_feature is None:
|
|
65
|
+
self.n_leaves += 1
|
|
66
|
+
return Node(value=self._estimate_output(y))
|
|
67
|
+
|
|
68
|
+
left_idxs = X[best_feature] <= best_threshold
|
|
69
|
+
right_idxs = X[best_feature] > best_threshold
|
|
70
|
+
|
|
71
|
+
left = self._grow_tree(X[left_idxs], y[left_idxs], depth + 1)
|
|
72
|
+
right = self._grow_tree(X[right_idxs], y[right_idxs], depth + 1)
|
|
73
|
+
return Node(best_feature, best_threshold, left, right)
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def generate_thresholds(X, y):
|
|
77
|
+
sorted_indices = np.argsort(X)
|
|
78
|
+
X = np.array(X)[sorted_indices]
|
|
79
|
+
y = np.array(y)[sorted_indices]
|
|
80
|
+
# X = np.array(np.unique(np.unique(list(zip(X, y)), axis=0)[:, 0]), dtype=float)
|
|
81
|
+
return np.array([(X[:-1][i] + X[1:][i]) / 2.0 for i in range(len(X) - 1) if y[i] != y[i + 1]])
|
|
82
|
+
|
|
83
|
+
def _best_split(self, X, y):
|
|
84
|
+
best_gain = -float('inf')
|
|
85
|
+
split_idx, split_threshold = None, None
|
|
86
|
+
|
|
87
|
+
for feature in [feature for feature in X.columns if feature not in self.protected_attr]:
|
|
88
|
+
# for threshold in self.generate_thresholds(X[feature], y):
|
|
89
|
+
for threshold in np.unique(np.quantile(X[feature], np.linspace(0, 1, num=25))):
|
|
90
|
+
left_idxs = X[feature] <= threshold
|
|
91
|
+
right_idxs = X[feature] > threshold
|
|
92
|
+
|
|
93
|
+
if left_idxs.sum() == 0 or right_idxs.sum() == 0:
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
gain = self._fair_gain(y, left_idxs, right_idxs, X[self.protected_attr])
|
|
97
|
+
|
|
98
|
+
if gain > best_gain:
|
|
99
|
+
best_gain = gain
|
|
100
|
+
split_idx = feature
|
|
101
|
+
split_threshold = threshold
|
|
102
|
+
return split_idx, split_threshold
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _disparity(group):
|
|
106
|
+
counts = Counter(group)
|
|
107
|
+
if len(counts) <= 1:
|
|
108
|
+
return 0.0
|
|
109
|
+
values = np.array(list(counts.values())) / len(group)
|
|
110
|
+
return np.abs(values[0] - values[1])
|
|
111
|
+
|
|
112
|
+
def _fair_gain(self, y, left_idx, right_idx, protected):
|
|
113
|
+
child = len(y[left_idx]) / len(y) * self.quality_function(y[left_idx]) + \
|
|
114
|
+
len(y[right_idx]) / len(y) * self.quality_function(y[right_idx])
|
|
115
|
+
info_gain = self.quality_function(y) - child
|
|
116
|
+
penalty = self._disparity(protected[left_idx]) + self._disparity(protected[right_idx])
|
|
117
|
+
return info_gain - self.lambda_penalty * penalty
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _match_path(x, path):
|
|
121
|
+
for node, left in path:
|
|
122
|
+
if left and x[node.feature] > node.threshold:
|
|
123
|
+
return False
|
|
124
|
+
if not left and x[node.feature] <= node.threshold:
|
|
125
|
+
return False
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def candidates(node, parent=None, is_left=None, path=[]):
|
|
130
|
+
if node is None or node.is_leaf_node():
|
|
131
|
+
return []
|
|
132
|
+
leaves = []
|
|
133
|
+
if node.left.is_leaf_node() and node.right.is_leaf_node():
|
|
134
|
+
leaves.append((node, parent, is_left, path))
|
|
135
|
+
leaves += FairTreeClassifier.candidates(node.left, node, True, path + [(node, True)])
|
|
136
|
+
leaves += FairTreeClassifier.candidates(node.right, node, False, path + [(node, False)])
|
|
137
|
+
return leaves
|
|
138
|
+
|
|
139
|
+
def prune_least_important_leaf(self, X, y):
|
|
140
|
+
best_score = -np.inf
|
|
141
|
+
best_prune = None
|
|
142
|
+
|
|
143
|
+
for node, parent, is_left, path in self.candidates(self.root):
|
|
144
|
+
original_left = node.left
|
|
145
|
+
original_right = node.right
|
|
146
|
+
|
|
147
|
+
merged_y = y[(X.apply(lambda x: self._match_path(x, path), axis=1))]
|
|
148
|
+
if len(merged_y) == 0:
|
|
149
|
+
continue
|
|
150
|
+
new_value = self._estimate_output(merged_y)
|
|
151
|
+
node.left = node.right = None
|
|
152
|
+
node.value = new_value
|
|
153
|
+
|
|
154
|
+
score = self.score(X, y)
|
|
155
|
+
if score >= best_score:
|
|
156
|
+
best_score = score
|
|
157
|
+
best_prune = (node, new_value)
|
|
158
|
+
|
|
159
|
+
node.left, node.right, node.value = original_left, original_right, None
|
|
160
|
+
|
|
161
|
+
if best_prune:
|
|
162
|
+
best_prune[0].left = best_prune[0].right = None
|
|
163
|
+
best_prune[0].value = best_prune[1]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class FairTreeClassifier(FairTree):
|
|
167
|
+
def __init__(self, max_depth=3, max_leaves=None, criterion='entropy', min_samples_split=2, lambda_penalty=0.0,
|
|
168
|
+
protected_attr=None):
|
|
169
|
+
super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
|
|
170
|
+
self.quality_function = self._gini if self.criterion == 'gini' else self._entropy
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _estimate_output(y):
|
|
174
|
+
return Counter(y.values.flatten()).most_common(1)[0][0]
|
|
175
|
+
|
|
176
|
+
def score(self, X, y):
|
|
177
|
+
return accuracy_score(y.values.flatten(), self.predict(X))
|
|
178
|
+
|
|
179
|
+
@staticmethod
|
|
180
|
+
def _entropy(y):
|
|
181
|
+
ps = np.unique(y, return_counts=True)[1] / len(y)
|
|
182
|
+
return -np.sum([p * np.log2(p) for p in ps if p > 0])
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def _gini(y):
|
|
186
|
+
return 1.0 - np.sum(np.unique(y, return_counts=True)[1] / len(y)**2)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class FairTreeRegressor(FairTree):
|
|
190
|
+
def __init__(self, max_depth=3, max_leaves=None, criterion='mse', min_samples_split=2, lambda_penalty=0.0,
|
|
191
|
+
protected_attr=None):
|
|
192
|
+
super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
|
|
193
|
+
self.quality_function = self._mse
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def _estimate_output(y):
|
|
197
|
+
return np.mean(y.values.flatten())
|
|
198
|
+
|
|
199
|
+
def score(self, X, y):
|
|
200
|
+
return r2_score(y.values.flatten(), self.predict(X))
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def _mse(y):
|
|
204
|
+
y = y.values.flatten().astype(float)
|
|
205
|
+
return np.mean((y - np.mean(y))**2)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from typing import Union, Any
|
|
3
|
+
|
|
4
|
+
from psyke.extraction.cart import FairTreeClassifier, FairTreeRegressor, LeafSequence, LeafConstraints
|
|
5
|
+
from psyke.extraction.cart.CartPredictor import CartPredictor
|
|
6
|
+
from psyke.schema import LessThan, GreaterThan, SchemaException, Value
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FairTreePredictor(CartPredictor):
|
|
10
|
+
"""
|
|
11
|
+
A wrapper for fair decision and regression trees of psyke.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, predictor: Union[FairTreeClassifier, FairTreeRegressor] = FairTreeClassifier(),
|
|
15
|
+
discretization=None, normalization=None):
|
|
16
|
+
super().__init__(predictor, discretization, normalization)
|
|
17
|
+
|
|
18
|
+
def __iter__(self) -> LeafSequence:
|
|
19
|
+
leaves = [node for node in self.recurse(self._predictor.root, {})]
|
|
20
|
+
return (leaf for leaf in leaves)
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def merge_constraints(constraints: LeafConstraints, constraint: Value, feature: str):
|
|
24
|
+
if feature in constraints:
|
|
25
|
+
try:
|
|
26
|
+
constraints[feature][-1] *= constraint
|
|
27
|
+
except SchemaException:
|
|
28
|
+
constraints[feature].append(constraint)
|
|
29
|
+
else:
|
|
30
|
+
constraints[feature] = [constraint]
|
|
31
|
+
return constraints
|
|
32
|
+
|
|
33
|
+
def recurse(self, node, constraints) -> Union[LeafSequence, tuple[LeafConstraints, Any]]:
|
|
34
|
+
if node.is_leaf_node():
|
|
35
|
+
return constraints, node.value
|
|
36
|
+
|
|
37
|
+
feature = node.feature
|
|
38
|
+
threshold = node.threshold if self.normalization is None else \
|
|
39
|
+
(node.threshold * self.normalization[feature][1] + self.normalization[feature][0])
|
|
40
|
+
|
|
41
|
+
left = self.recurse(node.left, self.merge_constraints(copy.deepcopy(constraints), LessThan(threshold), feature))
|
|
42
|
+
right = self.recurse(node.right, self.merge_constraints(copy.deepcopy(constraints),
|
|
43
|
+
GreaterThan(threshold), feature))
|
|
44
|
+
return (left if isinstance(left, list) else [left]) + (right if isinstance(right, list) else [right])
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def predictor(self) -> Union[FairTreeClassifier, FairTreeRegressor]:
|
|
48
|
+
return self._predictor
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def n_leaves(self) -> int:
|
|
52
|
+
return self._predictor.n_leaves
|
|
53
|
+
|
|
54
|
+
@predictor.setter
|
|
55
|
+
def predictor(self, predictor: Union[FairTreeClassifier, FairTreeRegressor]):
|
|
56
|
+
self._predictor = predictor
|
|
@@ -1,84 +1,70 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
1
3
|
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
|
2
|
-
|
|
3
|
-
from psyke import
|
|
4
|
-
from psyke
|
|
5
|
-
from psyke.
|
|
6
|
-
from
|
|
7
|
-
from tuprolog.theory import Theory
|
|
8
|
-
from typing import Iterable
|
|
4
|
+
|
|
5
|
+
from psyke.extraction import PedagogicalExtractor
|
|
6
|
+
from psyke import get_default_random_seed
|
|
7
|
+
from psyke.extraction.cart.FairTree import FairTreeClassifier, FairTreeRegressor
|
|
8
|
+
from psyke.schema import DiscreteFeature, Value
|
|
9
|
+
from tuprolog.theory import Theory
|
|
10
|
+
from typing import Iterable, Any
|
|
9
11
|
import pandas as pd
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
TREE_SEED = get_default_random_seed()
|
|
13
15
|
|
|
16
|
+
LeafConstraints = dict[str, list[Value]]
|
|
17
|
+
LeafSequence = Iterable[tuple[LeafConstraints, Any]]
|
|
18
|
+
|
|
14
19
|
|
|
15
|
-
class Cart(PedagogicalExtractor):
|
|
20
|
+
class Cart(PedagogicalExtractor, ABC):
|
|
16
21
|
|
|
17
|
-
def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None,
|
|
22
|
+
def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None, max_features=None,
|
|
18
23
|
discretization: Iterable[DiscreteFeature] = None,
|
|
19
24
|
normalization=None, simplify: bool = True):
|
|
25
|
+
from psyke.extraction.cart.CartPredictor import CartPredictor
|
|
26
|
+
|
|
20
27
|
super().__init__(predictor, discretization, normalization)
|
|
21
|
-
self.
|
|
28
|
+
self.is_fair = None
|
|
29
|
+
self._cart_predictor = CartPredictor(discretization=discretization, normalization=normalization)
|
|
22
30
|
self.depth = max_depth
|
|
23
31
|
self.leaves = max_leaves
|
|
32
|
+
self.max_features = max_features
|
|
24
33
|
self._simplify = simplify
|
|
25
34
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
for feature_name, constraint, value in constraints:
|
|
29
|
-
features = [d for d in self.discretization if feature_name in d.admissible_values]
|
|
30
|
-
feature: DiscreteFeature = features[0] if len(features) > 0 else None
|
|
31
|
-
results.append(create_term(variables[feature_name], constraint) if feature is None else
|
|
32
|
-
create_term(variables[feature.name],
|
|
33
|
-
feature.admissible_values[feature_name],
|
|
34
|
-
isinstance(constraint, GreaterThan)))
|
|
35
|
-
return results
|
|
35
|
+
def _extract(self, data: pd.DataFrame) -> Theory:
|
|
36
|
+
from psyke.extraction.cart.FairTreePredictor import FairTreePredictor
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
38
|
+
if self.is_fair:
|
|
39
|
+
self._cart_predictor = FairTreePredictor(discretization=self.discretization,
|
|
40
|
+
normalization=self.normalization)
|
|
41
|
+
fair_tree = FairTreeClassifier if isinstance(data.iloc[0, -1], str) else FairTreeRegressor
|
|
42
|
+
self._cart_predictor.predictor = fair_tree(max_depth=self.depth, max_leaves=self.leaves,
|
|
43
|
+
protected_attr=self.is_fair)
|
|
44
|
+
else:
|
|
45
|
+
tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
|
|
46
|
+
self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
|
|
47
|
+
max_leaf_nodes=self.leaves, max_features=self.max_features)
|
|
48
|
+
self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
|
|
49
|
+
return self._cart_predictor.create_theory(data, self._simplify)
|
|
47
50
|
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
nodes = [node for node in self._cart_predictor]
|
|
51
|
-
nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
|
|
52
|
-
for (constraints, prediction) in nodes:
|
|
53
|
-
if self.normalization is not None:
|
|
54
|
-
m, s = self.normalization[data.columns[-1]]
|
|
55
|
-
prediction = prediction * s + m
|
|
56
|
-
if mapping is not None and prediction in mapping.values():
|
|
57
|
-
for k, v in mapping.items():
|
|
58
|
-
if v == prediction:
|
|
59
|
-
prediction = k
|
|
60
|
-
break
|
|
61
|
-
variables = create_variable_list(self.discretization, data, sort)
|
|
62
|
-
new_theory.assertZ(
|
|
63
|
-
clause(
|
|
64
|
-
create_head(data.columns[-1], list(variables.values()), prediction),
|
|
65
|
-
self._create_body(variables, constraints)
|
|
66
|
-
)
|
|
67
|
-
)
|
|
68
|
-
return new_theory
|
|
51
|
+
def make_fair(self, features: Iterable[str]):
|
|
52
|
+
self.is_fair = features
|
|
69
53
|
|
|
70
|
-
def
|
|
71
|
-
self._cart_predictor.
|
|
72
|
-
if isinstance(data.iloc[0, -1], str) or mapping is not None else DecisionTreeRegressor(random_state=TREE_SEED)
|
|
73
|
-
if mapping is not None:
|
|
74
|
-
data.iloc[:, -1] = data.iloc[:, -1].apply(lambda x: mapping[x] if x in mapping.keys() else x)
|
|
75
|
-
self._cart_predictor.predictor.max_depth = self.depth
|
|
76
|
-
self._cart_predictor.predictor.max_leaf_nodes = self.leaves
|
|
77
|
-
self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
|
|
78
|
-
return self._create_theory(data, mapping, sort)
|
|
54
|
+
def _predict(self, dataframe: pd.DataFrame) -> Iterable:
|
|
55
|
+
return self._cart_predictor.predict(dataframe)
|
|
79
56
|
|
|
80
|
-
def
|
|
81
|
-
|
|
57
|
+
def predict_why(self, data: dict[str, float], verbose=True):
|
|
58
|
+
prediction = None
|
|
59
|
+
conditions = {}
|
|
60
|
+
if self.normalization is not None:
|
|
61
|
+
data = {k: v * self.normalization[k][1] + self.normalization[k][0] if k in self.normalization else v
|
|
62
|
+
for k, v in data.items()}
|
|
63
|
+
for conditions, prediction in self._cart_predictor:
|
|
64
|
+
if all(all(interval.is_in(data[variable]) for interval in intervals)
|
|
65
|
+
for variable, intervals in conditions.items()):
|
|
66
|
+
break
|
|
67
|
+
return prediction, conditions
|
|
82
68
|
|
|
83
69
|
@property
|
|
84
70
|
def n_rules(self) -> int:
|