psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,128 @@
1
+ from collections.abc import Iterable
2
+ from typing import Union, Any
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
6
+ from tuprolog.core import clause, Var, Struct
7
+ from tuprolog.theory import Theory, mutable_theory
8
+
9
+ from psyke.extraction.cart import LeafConstraints, LeafSequence
10
+ from psyke.schema import LessThan, GreaterThan, SchemaException, DiscreteFeature
11
+ from psyke.utils.logic import create_variable_list, create_head, create_term
12
+
13
+
14
+ class CartPredictor:
15
+ """
16
+ A wrapper for decision and regression trees of sklearn.
17
+ """
18
+
19
+ def __init__(self, predictor: Union[DecisionTreeClassifier, DecisionTreeRegressor] = DecisionTreeClassifier(),
20
+ discretization=None, normalization=None):
21
+ self._predictor = predictor
22
+ self.discretization = discretization
23
+ self.normalization = normalization
24
+
25
+ def __get_constraints(self, nodes: Iterable[tuple[int, bool]]) -> LeafConstraints:
26
+ thresholds = [self._predictor.tree_.threshold[i[0]] for i in nodes]
27
+ features = [self._predictor.feature_names_in_[self._predictor.tree_.feature[node[0]]] for node in nodes]
28
+ conditions = [node[1] for node in nodes]
29
+ if self.normalization is not None:
30
+ thresholds = [threshold * self.normalization[feature][1] + self.normalization[feature][0]
31
+ for feature, threshold in zip(features, thresholds)]
32
+ cond_dict = {}
33
+ for feature, condition, threshold in zip(features, conditions, thresholds):
34
+ cond = LessThan(threshold) if condition else GreaterThan(threshold)
35
+ if feature in cond_dict:
36
+ try:
37
+ cond_dict[feature][-1] *= cond
38
+ except SchemaException:
39
+ cond_dict[feature].append(cond)
40
+ else:
41
+ cond_dict[feature] = [cond]
42
+ return cond_dict
43
+
44
+ def __get_leaves(self) -> Iterable[int]:
45
+ return [i for i, (left_child, right_child) in enumerate(zip(
46
+ self._left_children, self._right_children
47
+ )) if left_child == -1 and right_child == -1]
48
+
49
+ def __get_prediction(self, node: int) -> Any:
50
+ if hasattr(self._predictor, 'classes_'):
51
+ return self._predictor.classes_[np.argmax(self._predictor.tree_.value[node])]
52
+ else:
53
+ return self._predictor.tree_.value[node]
54
+
55
+ def __path(self, node: int, path=None) -> Iterable[tuple[int, bool]]:
56
+ path = [] if path is None else path
57
+ if node == 0:
58
+ return path
59
+ father = list(self._left_children if node in self._left_children else self._right_children).index(node)
60
+ return self.__path(father, [(father, node in self._left_children)] + path)
61
+
62
+ def __iter__(self) -> LeafSequence:
63
+ leaves = self.__get_leaves()
64
+ return ((self.__get_constraints(self.__path(i)), self.__get_prediction(i)) for i in leaves)
65
+
66
+ def predict(self, data) -> Iterable:
67
+ return self._predictor.predict(data)
68
+
69
+ @staticmethod
70
+ def _simplify_nodes(nodes: list) -> Iterable:
71
+ simplified = [nodes.pop(0)]
72
+ while len(nodes) > 0:
73
+ first_node = nodes[0][0]
74
+ for k, conditions in first_node.items():
75
+ for condition in conditions:
76
+ if all(k in node[0] and condition in node[0][k] for node in nodes):
77
+ [node[0][k].remove(condition) for node in nodes]
78
+ simplified.append(nodes.pop(0))
79
+ return [({k: v for k, v in rule.items() if v != []}, prediction) for rule, prediction in simplified]
80
+
81
+ def _create_body(self, variables: dict[str, Var], conditions: LeafConstraints) -> Iterable[Struct]:
82
+ results = []
83
+ for feature_name, cond_list in conditions.items():
84
+ for condition in cond_list:
85
+ feature: DiscreteFeature = [d for d in self.discretization if feature_name in d.admissible_values][0] \
86
+ if self.discretization else None
87
+ results.append(create_term(variables[feature_name], condition) if feature is None else
88
+ create_term(variables[feature.name],
89
+ feature.admissible_values[feature_name],
90
+ isinstance(condition, GreaterThan)))
91
+ return results
92
+
93
+ def create_theory(self, data: pd.DataFrame, simplify: bool = True) -> Theory:
94
+ new_theory = mutable_theory()
95
+ nodes = [node for node in self]
96
+ nodes = self._simplify_nodes(nodes) if simplify else nodes
97
+ for (constraints, prediction) in nodes:
98
+ if self.normalization is not None and data.columns[-1] in self.normalization:
99
+ m, s = self.normalization[data.columns[-1]]
100
+ prediction = prediction * s + m
101
+ variables = create_variable_list(self.discretization, data)
102
+ new_theory.assertZ(
103
+ clause(
104
+ create_head(data.columns[-1], list(variables.values()), prediction),
105
+ self._create_body(variables, constraints)
106
+ )
107
+ )
108
+ return new_theory
109
+
110
+ @property
111
+ def predictor(self) -> Union[DecisionTreeClassifier, DecisionTreeRegressor]:
112
+ return self._predictor
113
+
114
+ @property
115
+ def n_leaves(self) -> int:
116
+ return len(list(self.__get_leaves()))
117
+
118
+ @property
119
+ def _left_children(self) -> list[int]:
120
+ return self._predictor.tree_.children_left
121
+
122
+ @property
123
+ def _right_children(self) -> list[int]:
124
+ return self._predictor.tree_.children_right
125
+
126
+ @predictor.setter
127
+ def predictor(self, predictor: Union[DecisionTreeClassifier, DecisionTreeRegressor]):
128
+ self._predictor = predictor
@@ -0,0 +1,205 @@
1
+ import numpy as np
2
+ from collections import Counter
3
+
4
+ from sklearn.metrics import accuracy_score, r2_score
5
+
6
+
7
+ class Node:
8
+ def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
9
+ self.feature = feature
10
+ self.threshold = threshold
11
+ self.left = left
12
+ self.right = right
13
+ self.value = value
14
+
15
+ def is_leaf_node(self):
16
+ return self.value is not None
17
+
18
+
19
+ class FairTree:
20
+ def __init__(self, max_depth=3, max_leaves=None, criterion=None, min_samples_split=2, lambda_penalty=0.0,
21
+ protected_attr=None):
22
+ self.max_depth = max_depth
23
+ self.max_leaves = max_leaves
24
+ self.min_samples_split = min_samples_split
25
+ self.lambda_penalty = lambda_penalty
26
+ self.protected_attr = protected_attr
27
+ self.criterion = criterion
28
+ self.root = None
29
+ self.n_leaves = 0
30
+ self.quality_function = None
31
+
32
+ def fit(self, X, y):
33
+ self.n_leaves = 0
34
+ self.root = self._grow_tree(X, y, depth=0)
35
+ while self.n_leaves > self.max_leaves:
36
+ self.prune_least_important_leaf(X, y)
37
+ self.n_leaves -= 1
38
+ return self
39
+
40
+ @staticmethod
41
+ def _estimate_output(y):
42
+ raise NotImplementedError
43
+
44
+ def score(self, X, y):
45
+ raise NotImplementedError
46
+
47
+ def predict(self, X):
48
+ return np.array([self._traverse_tree(x, self.root) for _, x in X.iterrows()])
49
+
50
+ def _traverse_tree(self, x, node):
51
+ if node.is_leaf_node():
52
+ return node.value
53
+ if x[node.feature] <= node.threshold:
54
+ return self._traverse_tree(x, node.left)
55
+ return self._traverse_tree(x, node.right)
56
+
57
+ def _grow_tree(self, X, y, depth):
58
+ if depth >= self.max_depth or X.shape[0] < self.min_samples_split or len(set(y.values.flatten())) == 1 or \
59
+ (self.max_leaves is not None and self.n_leaves >= self.max_leaves):
60
+ self.n_leaves += 1
61
+ return Node(value=self._estimate_output(y))
62
+
63
+ best_feature, best_threshold = self._best_split(X, y)
64
+ if best_feature is None:
65
+ self.n_leaves += 1
66
+ return Node(value=self._estimate_output(y))
67
+
68
+ left_idxs = X[best_feature] <= best_threshold
69
+ right_idxs = X[best_feature] > best_threshold
70
+
71
+ left = self._grow_tree(X[left_idxs], y[left_idxs], depth + 1)
72
+ right = self._grow_tree(X[right_idxs], y[right_idxs], depth + 1)
73
+ return Node(best_feature, best_threshold, left, right)
74
+
75
+ @staticmethod
76
+ def generate_thresholds(X, y):
77
+ sorted_indices = np.argsort(X)
78
+ X = np.array(X)[sorted_indices]
79
+ y = np.array(y)[sorted_indices]
80
+ # X = np.array(np.unique(np.unique(list(zip(X, y)), axis=0)[:, 0]), dtype=float)
81
+ return np.array([(X[:-1][i] + X[1:][i]) / 2.0 for i in range(len(X) - 1) if y[i] != y[i + 1]])
82
+
83
+ def _best_split(self, X, y):
84
+ best_gain = -float('inf')
85
+ split_idx, split_threshold = None, None
86
+
87
+ for feature in [feature for feature in X.columns if feature not in self.protected_attr]:
88
+ # for threshold in self.generate_thresholds(X[feature], y):
89
+ for threshold in np.unique(np.quantile(X[feature], np.linspace(0, 1, num=25))):
90
+ left_idxs = X[feature] <= threshold
91
+ right_idxs = X[feature] > threshold
92
+
93
+ if left_idxs.sum() == 0 or right_idxs.sum() == 0:
94
+ continue
95
+
96
+ gain = self._fair_gain(y, left_idxs, right_idxs, X[self.protected_attr])
97
+
98
+ if gain > best_gain:
99
+ best_gain = gain
100
+ split_idx = feature
101
+ split_threshold = threshold
102
+ return split_idx, split_threshold
103
+
104
+ @staticmethod
105
+ def _disparity(group):
106
+ counts = Counter(group)
107
+ if len(counts) <= 1:
108
+ return 0.0
109
+ values = np.array(list(counts.values())) / len(group)
110
+ return np.abs(values[0] - values[1])
111
+
112
+ def _fair_gain(self, y, left_idx, right_idx, protected):
113
+ child = len(y[left_idx]) / len(y) * self.quality_function(y[left_idx]) + \
114
+ len(y[right_idx]) / len(y) * self.quality_function(y[right_idx])
115
+ info_gain = self.quality_function(y) - child
116
+ penalty = self._disparity(protected[left_idx]) + self._disparity(protected[right_idx])
117
+ return info_gain - self.lambda_penalty * penalty
118
+
119
+ @staticmethod
120
+ def _match_path(x, path):
121
+ for node, left in path:
122
+ if left and x[node.feature] > node.threshold:
123
+ return False
124
+ if not left and x[node.feature] <= node.threshold:
125
+ return False
126
+ return True
127
+
128
+ @staticmethod
129
+ def candidates(node, parent=None, is_left=None, path=[]):
130
+ if node is None or node.is_leaf_node():
131
+ return []
132
+ leaves = []
133
+ if node.left.is_leaf_node() and node.right.is_leaf_node():
134
+ leaves.append((node, parent, is_left, path))
135
+ leaves += FairTreeClassifier.candidates(node.left, node, True, path + [(node, True)])
136
+ leaves += FairTreeClassifier.candidates(node.right, node, False, path + [(node, False)])
137
+ return leaves
138
+
139
+ def prune_least_important_leaf(self, X, y):
140
+ best_score = -np.inf
141
+ best_prune = None
142
+
143
+ for node, parent, is_left, path in self.candidates(self.root):
144
+ original_left = node.left
145
+ original_right = node.right
146
+
147
+ merged_y = y[(X.apply(lambda x: self._match_path(x, path), axis=1))]
148
+ if len(merged_y) == 0:
149
+ continue
150
+ new_value = self._estimate_output(merged_y)
151
+ node.left = node.right = None
152
+ node.value = new_value
153
+
154
+ score = self.score(X, y)
155
+ if score >= best_score:
156
+ best_score = score
157
+ best_prune = (node, new_value)
158
+
159
+ node.left, node.right, node.value = original_left, original_right, None
160
+
161
+ if best_prune:
162
+ best_prune[0].left = best_prune[0].right = None
163
+ best_prune[0].value = best_prune[1]
164
+
165
+
166
+ class FairTreeClassifier(FairTree):
167
+ def __init__(self, max_depth=3, max_leaves=None, criterion='entropy', min_samples_split=2, lambda_penalty=0.0,
168
+ protected_attr=None):
169
+ super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
170
+ self.quality_function = self._gini if self.criterion == 'gini' else self._entropy
171
+
172
+ @staticmethod
173
+ def _estimate_output(y):
174
+ return Counter(y.values.flatten()).most_common(1)[0][0]
175
+
176
+ def score(self, X, y):
177
+ return accuracy_score(y.values.flatten(), self.predict(X))
178
+
179
+ @staticmethod
180
+ def _entropy(y):
181
+ ps = np.unique(y, return_counts=True)[1] / len(y)
182
+ return -np.sum([p * np.log2(p) for p in ps if p > 0])
183
+
184
+ @staticmethod
185
+ def _gini(y):
186
+ return 1.0 - np.sum(np.unique(y, return_counts=True)[1] / len(y)**2)
187
+
188
+
189
+ class FairTreeRegressor(FairTree):
190
+ def __init__(self, max_depth=3, max_leaves=None, criterion='mse', min_samples_split=2, lambda_penalty=0.0,
191
+ protected_attr=None):
192
+ super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
193
+ self.quality_function = self._mse
194
+
195
+ @staticmethod
196
+ def _estimate_output(y):
197
+ return np.mean(y.values.flatten())
198
+
199
+ def score(self, X, y):
200
+ return r2_score(y.values.flatten(), self.predict(X))
201
+
202
+ @staticmethod
203
+ def _mse(y):
204
+ y = y.values.flatten().astype(float)
205
+ return np.mean((y - np.mean(y))**2)
@@ -0,0 +1,56 @@
1
+ import copy
2
+ from typing import Union, Any
3
+
4
+ from psyke.extraction.cart import FairTreeClassifier, FairTreeRegressor, LeafSequence, LeafConstraints
5
+ from psyke.extraction.cart.CartPredictor import CartPredictor
6
+ from psyke.schema import LessThan, GreaterThan, SchemaException, Value
7
+
8
+
9
+ class FairTreePredictor(CartPredictor):
10
+ """
11
+ A wrapper for fair decision and regression trees of psyke.
12
+ """
13
+
14
+ def __init__(self, predictor: Union[FairTreeClassifier, FairTreeRegressor] = FairTreeClassifier(),
15
+ discretization=None, normalization=None):
16
+ super().__init__(predictor, discretization, normalization)
17
+
18
+ def __iter__(self) -> LeafSequence:
19
+ leaves = [node for node in self.recurse(self._predictor.root, {})]
20
+ return (leaf for leaf in leaves)
21
+
22
+ @staticmethod
23
+ def merge_constraints(constraints: LeafConstraints, constraint: Value, feature: str):
24
+ if feature in constraints:
25
+ try:
26
+ constraints[feature][-1] *= constraint
27
+ except SchemaException:
28
+ constraints[feature].append(constraint)
29
+ else:
30
+ constraints[feature] = [constraint]
31
+ return constraints
32
+
33
+ def recurse(self, node, constraints) -> Union[LeafSequence, tuple[LeafConstraints, Any]]:
34
+ if node.is_leaf_node():
35
+ return constraints, node.value
36
+
37
+ feature = node.feature
38
+ threshold = node.threshold if self.normalization is None else \
39
+ (node.threshold * self.normalization[feature][1] + self.normalization[feature][0])
40
+
41
+ left = self.recurse(node.left, self.merge_constraints(copy.deepcopy(constraints), LessThan(threshold), feature))
42
+ right = self.recurse(node.right, self.merge_constraints(copy.deepcopy(constraints),
43
+ GreaterThan(threshold), feature))
44
+ return (left if isinstance(left, list) else [left]) + (right if isinstance(right, list) else [right])
45
+
46
+ @property
47
+ def predictor(self) -> Union[FairTreeClassifier, FairTreeRegressor]:
48
+ return self._predictor
49
+
50
+ @property
51
+ def n_leaves(self) -> int:
52
+ return self._predictor.n_leaves
53
+
54
+ @predictor.setter
55
+ def predictor(self, predictor: Union[FairTreeClassifier, FairTreeRegressor]):
56
+ self._predictor = predictor
@@ -1,84 +1,70 @@
1
+ from abc import ABC
2
+
1
3
  from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
2
- from psyke.extraction.cart.predictor import CartPredictor, LeafConstraints, LeafSequence
3
- from psyke import get_default_random_seed, PedagogicalExtractor
4
- from psyke.schema import GreaterThan, DiscreteFeature
5
- from psyke.utils.logic import create_variable_list, create_head, create_term
6
- from tuprolog.core import clause, Var, Struct
7
- from tuprolog.theory import Theory, mutable_theory
8
- from typing import Iterable
4
+
5
+ from psyke.extraction import PedagogicalExtractor
6
+ from psyke import get_default_random_seed
7
+ from psyke.extraction.cart.FairTree import FairTreeClassifier, FairTreeRegressor
8
+ from psyke.schema import DiscreteFeature, Value
9
+ from tuprolog.theory import Theory
10
+ from typing import Iterable, Any
9
11
  import pandas as pd
10
12
 
11
13
 
12
14
  TREE_SEED = get_default_random_seed()
13
15
 
16
+ LeafConstraints = dict[str, list[Value]]
17
+ LeafSequence = Iterable[tuple[LeafConstraints, Any]]
18
+
14
19
 
15
- class Cart(PedagogicalExtractor):
20
+ class Cart(PedagogicalExtractor, ABC):
16
21
 
17
- def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None,
22
+ def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None, max_features=None,
18
23
  discretization: Iterable[DiscreteFeature] = None,
19
24
  normalization=None, simplify: bool = True):
25
+ from psyke.extraction.cart.CartPredictor import CartPredictor
26
+
20
27
  super().__init__(predictor, discretization, normalization)
21
- self._cart_predictor = CartPredictor(normalization=normalization)
28
+ self.is_fair = None
29
+ self._cart_predictor = CartPredictor(discretization=discretization, normalization=normalization)
22
30
  self.depth = max_depth
23
31
  self.leaves = max_leaves
32
+ self.max_features = max_features
24
33
  self._simplify = simplify
25
34
 
26
- def _create_body(self, variables: dict[str, Var], constraints: LeafConstraints) -> Iterable[Struct]:
27
- results = []
28
- for feature_name, constraint, value in constraints:
29
- features = [d for d in self.discretization if feature_name in d.admissible_values]
30
- feature: DiscreteFeature = features[0] if len(features) > 0 else None
31
- results.append(create_term(variables[feature_name], constraint) if feature is None else
32
- create_term(variables[feature.name],
33
- feature.admissible_values[feature_name],
34
- isinstance(constraint, GreaterThan)))
35
- return results
35
+ def _extract(self, data: pd.DataFrame) -> Theory:
36
+ from psyke.extraction.cart.FairTreePredictor import FairTreePredictor
36
37
 
37
- @staticmethod
38
- def _simplify_nodes(nodes: list) -> Iterable:
39
- simplified = [nodes.pop(0)]
40
- while len(nodes) > 0:
41
- first_node = nodes[0][0]
42
- for condition in first_node:
43
- if all([condition in [node[0] for node in nodes][i] for i in range(len(nodes))]):
44
- [node[0].remove(condition) for node in nodes]
45
- simplified.append(nodes.pop(0))
46
- return simplified
38
+ if self.is_fair:
39
+ self._cart_predictor = FairTreePredictor(discretization=self.discretization,
40
+ normalization=self.normalization)
41
+ fair_tree = FairTreeClassifier if isinstance(data.iloc[0, -1], str) else FairTreeRegressor
42
+ self._cart_predictor.predictor = fair_tree(max_depth=self.depth, max_leaves=self.leaves,
43
+ protected_attr=self.is_fair)
44
+ else:
45
+ tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
46
+ self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
47
+ max_leaf_nodes=self.leaves, max_features=self.max_features)
48
+ self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
49
+ return self._cart_predictor.create_theory(data, self._simplify)
47
50
 
48
- def _create_theory(self, data: pd.DataFrame, mapping: dict[str: int], sort: bool = True) -> Theory:
49
- new_theory = mutable_theory()
50
- nodes = [node for node in self._cart_predictor]
51
- nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
52
- for (constraints, prediction) in nodes:
53
- if self.normalization is not None:
54
- m, s = self.normalization[data.columns[-1]]
55
- prediction = prediction * s + m
56
- if mapping is not None and prediction in mapping.values():
57
- for k, v in mapping.items():
58
- if v == prediction:
59
- prediction = k
60
- break
61
- variables = create_variable_list(self.discretization, data, sort)
62
- new_theory.assertZ(
63
- clause(
64
- create_head(data.columns[-1], list(variables.values()), prediction),
65
- self._create_body(variables, constraints)
66
- )
67
- )
68
- return new_theory
51
+ def make_fair(self, features: Iterable[str]):
52
+ self.is_fair = features
69
53
 
70
- def _extract(self, data: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
71
- self._cart_predictor.predictor = DecisionTreeClassifier(random_state=TREE_SEED) \
72
- if isinstance(data.iloc[0, -1], str) or mapping is not None else DecisionTreeRegressor(random_state=TREE_SEED)
73
- if mapping is not None:
74
- data.iloc[:, -1] = data.iloc[:, -1].apply(lambda x: mapping[x] if x in mapping.keys() else x)
75
- self._cart_predictor.predictor.max_depth = self.depth
76
- self._cart_predictor.predictor.max_leaf_nodes = self.leaves
77
- self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
78
- return self._create_theory(data, mapping, sort)
54
+ def _predict(self, dataframe: pd.DataFrame) -> Iterable:
55
+ return self._cart_predictor.predict(dataframe)
79
56
 
80
- def _predict(self, data) -> Iterable:
81
- return self._cart_predictor.predict(data)
57
+ def predict_why(self, data: dict[str, float], verbose=True):
58
+ prediction = None
59
+ conditions = {}
60
+ if self.normalization is not None:
61
+ data = {k: v * self.normalization[k][1] + self.normalization[k][0] if k in self.normalization else v
62
+ for k, v in data.items()}
63
+ for conditions, prediction in self._cart_predictor:
64
+ if all(all(interval.is_in(data[variable]) for interval in intervals)
65
+ for variable, intervals in conditions.items()):
66
+ break
67
+ return prediction, conditions
82
68
 
83
69
  @property
84
70
  def n_rules(self) -> int: