psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psyke/__init__.py +231 -85
- psyke/clustering/__init__.py +9 -4
- psyke/clustering/cream/__init__.py +6 -10
- psyke/clustering/exact/__init__.py +17 -11
- psyke/clustering/utils.py +0 -1
- psyke/extraction/__init__.py +25 -0
- psyke/extraction/cart/CartPredictor.py +128 -0
- psyke/extraction/cart/FairTree.py +205 -0
- psyke/extraction/cart/FairTreePredictor.py +56 -0
- psyke/extraction/cart/__init__.py +48 -62
- psyke/extraction/hypercubic/__init__.py +187 -47
- psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
- psyke/extraction/hypercubic/creepy/__init__.py +24 -29
- psyke/extraction/hypercubic/divine/__init__.py +86 -0
- psyke/extraction/hypercubic/ginger/__init__.py +100 -0
- psyke/extraction/hypercubic/gridex/__init__.py +45 -84
- psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
- psyke/extraction/hypercubic/hex/__init__.py +104 -0
- psyke/extraction/hypercubic/hypercube.py +275 -72
- psyke/extraction/hypercubic/iter/__init__.py +45 -46
- psyke/extraction/hypercubic/strategy.py +13 -9
- psyke/extraction/real/__init__.py +24 -29
- psyke/extraction/real/utils.py +2 -2
- psyke/extraction/trepan/__init__.py +24 -19
- psyke/genetic/__init__.py +0 -0
- psyke/genetic/fgin/__init__.py +74 -0
- psyke/genetic/gin/__init__.py +144 -0
- psyke/hypercubepredictor.py +102 -0
- psyke/schema/__init__.py +230 -36
- psyke/tuning/__init__.py +40 -28
- psyke/tuning/crash/__init__.py +33 -64
- psyke/tuning/orchid/__init__.py +21 -23
- psyke/tuning/pedro/__init__.py +70 -56
- psyke/utils/logic.py +8 -8
- psyke/utils/plot.py +79 -3
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
- psyke-1.0.4.dev10.dist-info/RECORD +46 -0
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
- psyke/extraction/cart/predictor.py +0 -73
- psyke-0.4.9.dev6.dist-info/RECORD +0 -36
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import ABC
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from itertools import combinations
|
|
6
|
+
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
9
|
from sklearn.base import ClassifierMixin
|
|
@@ -9,57 +11,180 @@ from sklearn.feature_selection import SelectKBest, f_regression, f_classif
|
|
|
9
11
|
from sklearn.linear_model import LinearRegression
|
|
10
12
|
from tuprolog.core import Var, Struct, clause
|
|
11
13
|
from tuprolog.theory import Theory, mutable_theory
|
|
12
|
-
from psyke import
|
|
13
|
-
from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube
|
|
14
|
+
from psyke.extraction import PedagogicalExtractor
|
|
15
|
+
from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
|
|
16
|
+
GenericCube
|
|
17
|
+
from psyke.hypercubepredictor import HyperCubePredictor
|
|
18
|
+
from psyke.schema import Value
|
|
14
19
|
from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
|
|
15
|
-
from psyke.utils import Target
|
|
20
|
+
from psyke.utils import Target
|
|
16
21
|
from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
|
|
17
22
|
|
|
18
23
|
|
|
19
|
-
class HyperCubePredictor:
|
|
20
|
-
def __init__(self,
|
|
21
|
-
self
|
|
22
|
-
self
|
|
23
|
-
self.
|
|
24
|
+
class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
|
|
25
|
+
def __init__(self, predictor, output, discretization=None, normalization=None):
|
|
26
|
+
HyperCubePredictor.__init__(self, output=output, normalization=normalization)
|
|
27
|
+
PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
|
|
28
|
+
self._default_surrounding_cube = False
|
|
29
|
+
self.threshold = None
|
|
24
30
|
|
|
25
|
-
def
|
|
26
|
-
|
|
31
|
+
def _default_cube(self, dimensions=None) -> HyperCube | RegressionCube | ClassificationCube:
|
|
32
|
+
if self._output == Target.CONSTANT:
|
|
33
|
+
return HyperCube(dimensions)
|
|
34
|
+
if self._output == Target.REGRESSION:
|
|
35
|
+
return RegressionCube(dimensions)
|
|
36
|
+
return ClassificationCube(dimensions)
|
|
27
37
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if self._output == Target.CLASSIFICATION:
|
|
33
|
-
return HyperCubePredictor._get_cube_output(cube, data)
|
|
34
|
-
else:
|
|
35
|
-
return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
|
|
36
|
-
return None
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _find_couples(to_split: Iterable[HyperCube], not_in_cache: set[HyperCube],
|
|
40
|
+
adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
|
|
41
|
+
Iterable[tuple[HyperCube, HyperCube, str]]:
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return len(list(self._hypercubes))
|
|
43
|
+
for cube1, cube2 in combinations(to_split, 2):
|
|
44
|
+
key = (cube1, cube2) if id(cube1) < id(cube2) else (cube2, cube1)
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
if (cube1 in not_in_cache) or (cube2 in not_in_cache):
|
|
47
|
+
adjacent_cache[key] = cube1.is_adjacent(cube2)
|
|
48
|
+
feature = adjacent_cache.get(key)
|
|
49
|
+
if feature is not None:
|
|
50
|
+
yield cube1, cube2, feature
|
|
51
|
+
|
|
52
|
+
def _evaluate_merge(self, not_in_cache: Iterable[HyperCube], dataframe: pd.DataFrame, feature: str,
|
|
53
|
+
cube: HyperCube, other_cube: HyperCube,
|
|
54
|
+
merge_cache: dict[tuple[HyperCube, HyperCube], HyperCube | None]) -> bool:
|
|
55
|
+
if (cube in not_in_cache) or (other_cube in not_in_cache):
|
|
56
|
+
merged_cube = cube.merge_along_dimension(other_cube, feature)
|
|
57
|
+
merged_cube.update(dataframe, self.predictor)
|
|
58
|
+
merge_cache[(cube, other_cube)] = merged_cube
|
|
59
|
+
return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
|
|
60
|
+
merge_cache[(cube, other_cube)].diversity < self.threshold
|
|
61
|
+
|
|
62
|
+
def _sort_cubes(self):
|
|
63
|
+
cubes = [(cube.diversity, i, cube) for i, cube in enumerate(self._hypercubes)]
|
|
64
|
+
cubes.sort()
|
|
65
|
+
self._hypercubes = [cube[2] for cube in cubes]
|
|
66
|
+
|
|
67
|
+
def _merge(self, to_split: list[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
|
|
68
|
+
not_in_cache = set(to_split)
|
|
69
|
+
adjacent_cache = {}
|
|
70
|
+
merge_cache = {}
|
|
71
|
+
while True:
|
|
72
|
+
to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
|
|
73
|
+
HyperCubeExtractor._find_couples(to_split, not_in_cache, adjacent_cache) if
|
|
74
|
+
self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
|
|
75
|
+
|
|
76
|
+
if len(to_merge) == 0:
|
|
77
|
+
break
|
|
78
|
+
best = min(to_merge, key=lambda c: c[1].diversity)
|
|
79
|
+
for cube in best[0]:
|
|
80
|
+
to_split.remove(cube)
|
|
81
|
+
to_split.append(best[1])
|
|
82
|
+
not_in_cache = [best[1]]
|
|
83
|
+
return to_split
|
|
84
|
+
|
|
85
|
+
def extract(self, dataframe: pd.DataFrame) -> Theory:
|
|
86
|
+
theory = PedagogicalExtractor.extract(self, dataframe)
|
|
87
|
+
self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
|
|
88
|
+
self._surrounding.update(dataframe, self.predictor)
|
|
89
|
+
return theory
|
|
90
|
+
|
|
91
|
+
def pairwise_fairness(self, data: dict[str, float], neighbor: dict[str, float]):
|
|
92
|
+
cube1 = self._find_cube(data)
|
|
93
|
+
cube2 = self._find_cube(neighbor)
|
|
94
|
+
different_prediction_reasons = []
|
|
95
|
+
|
|
96
|
+
if cube1.output == cube2.output:
|
|
97
|
+
print("Prediction", cube1.output, "is FAIR")
|
|
98
|
+
else:
|
|
99
|
+
print("Prediction", cube1.output, "may be UNFAIR")
|
|
100
|
+
print("It could be", cube2.output, "if:")
|
|
101
|
+
for d in data:
|
|
102
|
+
a, b = cube2.dimensions[d]
|
|
103
|
+
if data[d] < a:
|
|
104
|
+
print(' ', d, 'increases above', round(a, 1))
|
|
105
|
+
different_prediction_reasons.append(d)
|
|
106
|
+
elif data[d] > b:
|
|
107
|
+
print(' ', d, 'decreases below', round(b, 1))
|
|
108
|
+
different_prediction_reasons.append(d)
|
|
109
|
+
return different_prediction_reasons
|
|
110
|
+
|
|
111
|
+
def predict_counter(self, data: dict[str, float], verbose=True, only_first=True):
|
|
112
|
+
output = ""
|
|
113
|
+
prediction = None
|
|
114
|
+
cube = self._find_cube(data)
|
|
115
|
+
if cube is None:
|
|
116
|
+
output += "The extracted knowledge is not exhaustive; impossible to predict this instance"
|
|
117
|
+
else:
|
|
118
|
+
prediction = self._predict_from_cubes(data)
|
|
119
|
+
output += f"The output is {prediction}\n"
|
|
120
|
+
|
|
121
|
+
point = Point(list(data.keys()), list(data.values()))
|
|
122
|
+
cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
|
|
123
|
+
cubes = sorted([(cube.surface_distance(point), cube.volume(), i, cube) for i, cube in enumerate(cubes)])
|
|
124
|
+
|
|
125
|
+
counter_conditions = []
|
|
126
|
+
|
|
127
|
+
for _, _, _, c in cubes:
|
|
128
|
+
if not only_first or c.output not in [o for o, _ in counter_conditions]:
|
|
129
|
+
counter_conditions.append((c.output, {c: [val for val in v if val is not None and not val.is_in(
|
|
130
|
+
self.unscale(data[c], c))] for c, v in self.__get_conditions(data, c).items()}))
|
|
131
|
+
|
|
132
|
+
if verbose:
|
|
133
|
+
for o, conditions in counter_conditions:
|
|
134
|
+
output += f"The output may be {o} if\n" + HyperCubeExtractor.__conditions_to_string(conditions)
|
|
135
|
+
print(output)
|
|
136
|
+
|
|
137
|
+
return prediction, counter_conditions
|
|
45
138
|
|
|
46
139
|
@staticmethod
|
|
47
|
-
def
|
|
48
|
-
|
|
49
|
-
|
|
140
|
+
def __conditions_to_string(conditions: dict[str, list[Value]]) -> str:
|
|
141
|
+
output = ""
|
|
142
|
+
for d in conditions:
|
|
143
|
+
for i, condition in enumerate(conditions[d]):
|
|
144
|
+
if i == 0:
|
|
145
|
+
output += f' {d} is '
|
|
146
|
+
else:
|
|
147
|
+
output += ' and '
|
|
148
|
+
output += condition.print()
|
|
149
|
+
if i + 1 == len(conditions[d]):
|
|
150
|
+
output += '\n'
|
|
151
|
+
return output
|
|
152
|
+
|
|
153
|
+
def __get_conditions(self, data: dict[str, float], cube: GenericCube) -> dict[str, list[Value]]:
|
|
154
|
+
conditions = {d: [cube.interval_to_value(d, self.unscale)] for d in data.keys()
|
|
155
|
+
if d not in self._dimensions_to_ignore}
|
|
156
|
+
for c in cube.subcubes(self._hypercubes):
|
|
157
|
+
for d in conditions:
|
|
158
|
+
condition = c.interval_to_value(d, self.unscale)
|
|
159
|
+
if condition is None:
|
|
160
|
+
continue
|
|
161
|
+
elif conditions[d][-1] is None:
|
|
162
|
+
conditions[d][-1] = -condition
|
|
163
|
+
else:
|
|
164
|
+
try:
|
|
165
|
+
conditions[d][-1] *= -condition
|
|
166
|
+
except Exception:
|
|
167
|
+
conditions[d].append(-condition)
|
|
168
|
+
return conditions
|
|
50
169
|
|
|
170
|
+
def predict_why(self, data: dict[str, float], verbose=True):
|
|
171
|
+
cube = self._find_cube(data)
|
|
172
|
+
output = ""
|
|
173
|
+
if cube is None:
|
|
174
|
+
output += "The extracted knowledge is not exhaustive; impossible to predict this instance\n"
|
|
175
|
+
if verbose:
|
|
176
|
+
print(output)
|
|
177
|
+
return None, {}
|
|
178
|
+
prediction = self._predict_from_cubes(data)
|
|
179
|
+
output += f"The output is {prediction} because\n"
|
|
180
|
+
conditions = {c: [val for val in v if val is not None and val.is_in(self.unscale(data[c], c))]
|
|
181
|
+
for c, v in self.__get_conditions(data, cube).items()}
|
|
51
182
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
HyperCubePredictor.__init__(self, output=output, normalization=normalization)
|
|
183
|
+
if verbose:
|
|
184
|
+
output += HyperCubeExtractor.__conditions_to_string(conditions)
|
|
185
|
+
print(output)
|
|
56
186
|
|
|
57
|
-
|
|
58
|
-
if self._output == Target.CONSTANT:
|
|
59
|
-
return HyperCube()
|
|
60
|
-
if self._output == Target.REGRESSION:
|
|
61
|
-
return RegressionCube()
|
|
62
|
-
return ClassificationCube()
|
|
187
|
+
return prediction, conditions
|
|
63
188
|
|
|
64
189
|
@staticmethod
|
|
65
190
|
def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
|
|
@@ -67,19 +192,28 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
|
|
|
67
192
|
if not isinstance(output, LinearRegression) else \
|
|
68
193
|
create_head(dataframe.columns[-1], variables[:-1], variables[-1])
|
|
69
194
|
|
|
70
|
-
def
|
|
71
|
-
|
|
195
|
+
def __drop(self, dataframe: pd.DataFrame):
|
|
196
|
+
self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
|
|
197
|
+
|
|
198
|
+
def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
|
|
199
|
+
# self.__drop(dataframe)
|
|
200
|
+
for cube in self._hypercubes:
|
|
201
|
+
for dimension in cube.dimensions:
|
|
202
|
+
if abs(cube[dimension][0] - self._surrounding[dimension][0]) < HyperCube.EPSILON * 2:
|
|
203
|
+
cube.set_infinite(dimension, '-')
|
|
204
|
+
if abs(cube[dimension][1] - self._surrounding[dimension][1]) < HyperCube.EPSILON * 2:
|
|
205
|
+
cube.set_infinite(dimension, '+')
|
|
206
|
+
|
|
207
|
+
if self._default_surrounding_cube:
|
|
208
|
+
self._hypercubes[-1].set_default()
|
|
72
209
|
|
|
73
|
-
def _create_theory(self, dataframe: pd.DataFrame, sort: bool = True) -> Theory:
|
|
74
210
|
new_theory = mutable_theory()
|
|
75
211
|
for cube in self._hypercubes:
|
|
76
|
-
|
|
77
|
-
logger.info(cube.dimensions)
|
|
78
|
-
variables = create_variable_list([], dataframe, sort)
|
|
212
|
+
variables = create_variable_list([], dataframe)
|
|
79
213
|
variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
|
|
80
214
|
head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
|
|
81
215
|
self.unscale(cube.output, dataframe.columns[-1]))
|
|
82
|
-
body = cube.body(variables, self.
|
|
216
|
+
body = cube.body(variables, self._dimensions_to_ignore, self.unscale, self.normalization)
|
|
83
217
|
new_theory.assertZ(clause(head, body))
|
|
84
218
|
return HyperCubeExtractor._prettify_theory(new_theory)
|
|
85
219
|
|
|
@@ -120,10 +254,16 @@ class FeatureRanker:
|
|
|
120
254
|
|
|
121
255
|
|
|
122
256
|
class Grid:
|
|
123
|
-
def __init__(self, iterations: int = 1, strategy: Strategy |
|
|
257
|
+
def __init__(self, iterations: int = 1, strategy: Strategy | Iterable[Strategy] = FixedStrategy()):
|
|
124
258
|
self.iterations = iterations
|
|
125
259
|
self.strategy = strategy
|
|
126
260
|
|
|
261
|
+
def make_fair(self, features: Iterable[str]):
|
|
262
|
+
if isinstance(self.strategy, Strategy):
|
|
263
|
+
self.strategy.make_fair(features)
|
|
264
|
+
elif isinstance(self.strategy, Iterable):
|
|
265
|
+
[strategy.make_fair(features) for strategy in self.strategy]
|
|
266
|
+
|
|
127
267
|
def get(self, feature: str, depth: int) -> int:
|
|
128
268
|
if isinstance(self.strategy, list):
|
|
129
269
|
return self.strategy[depth].get(feature)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.mixture import GaussianMixture
|
|
4
|
+
from tuprolog.theory import Theory
|
|
5
|
+
|
|
6
|
+
from psyke import Target, Extractor, get_default_random_seed
|
|
7
|
+
from psyke.clustering.utils import select_gaussian_mixture
|
|
8
|
+
from psyke.extraction.hypercubic import HyperCube, HyperCubeExtractor, RegressionCube
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class COSMiK(HyperCubeExtractor):
|
|
12
|
+
"""
|
|
13
|
+
Explanator implementing COSMiK algorithm.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, predictor, max_components: int = 4, k: int = 5, patience: int = 15, close_to_center: bool = True,
|
|
17
|
+
output: Target = Target.CONSTANT, discretization=None, normalization=None,
|
|
18
|
+
seed: int = get_default_random_seed()):
|
|
19
|
+
super().__init__(predictor, Target.REGRESSION, discretization, normalization)
|
|
20
|
+
self.max = max_components
|
|
21
|
+
self.k = k
|
|
22
|
+
self.patience = patience
|
|
23
|
+
self.output = output
|
|
24
|
+
self.close_to_center = close_to_center
|
|
25
|
+
self.seed = seed
|
|
26
|
+
|
|
27
|
+
def _extract(self, dataframe: pd.DataFrame) -> Theory:
|
|
28
|
+
np.random.seed(self.seed)
|
|
29
|
+
X, y = dataframe.iloc[:, :-1], dataframe.iloc[:, -1]
|
|
30
|
+
|
|
31
|
+
_, n, _ = select_gaussian_mixture(dataframe, self.max)
|
|
32
|
+
gmm = GaussianMixture(n)
|
|
33
|
+
gmm.fit(X, y)
|
|
34
|
+
|
|
35
|
+
divine = Extractor.divine(gmm, self.k, self.patience, self.close_to_center,
|
|
36
|
+
self.discretization, self.normalization)
|
|
37
|
+
df = X.join(pd.DataFrame(gmm.predict(X)))
|
|
38
|
+
df.columns = dataframe.columns
|
|
39
|
+
divine.extract(df)
|
|
40
|
+
|
|
41
|
+
self._hypercubes = [HyperCube(cube.dimensions.copy()) if self.output == Target.CONSTANT else
|
|
42
|
+
RegressionCube(cube.dimensions.copy()) for cube in divine._hypercubes]
|
|
43
|
+
for cube in self._hypercubes:
|
|
44
|
+
cube.update(dataframe, self.predictor)
|
|
45
|
+
|
|
46
|
+
self._sort_cubes()
|
|
47
|
+
return self._create_theory(dataframe)
|
|
@@ -1,50 +1,45 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import
|
|
4
|
-
from
|
|
5
|
-
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Callable, Any
|
|
5
|
+
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from sklearn.base import ClassifierMixin
|
|
8
|
-
from tuprolog.core import clause
|
|
9
8
|
from tuprolog.theory import Theory
|
|
10
9
|
from psyke import Clustering
|
|
11
10
|
from psyke.clustering import HyperCubeClustering
|
|
12
11
|
from psyke.extraction.hypercubic import HyperCubeExtractor
|
|
13
|
-
from psyke.utils import Target
|
|
12
|
+
from psyke.utils import Target, get_default_random_seed
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
class CReEPy(HyperCubeExtractor
|
|
15
|
+
class CReEPy(HyperCubeExtractor):
|
|
17
16
|
"""
|
|
18
17
|
Explanator implementing CReEPy algorithm.
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
ClusteringType = Callable[[int, float, Target, int, Any, Any, int], HyperCubeClustering]
|
|
21
|
+
|
|
22
|
+
def __init__(self, predictor, clustering: ClusteringType = Clustering.exact, depth: int = 3,
|
|
23
|
+
error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 5,
|
|
24
|
+
ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0, discretization=None,
|
|
25
|
+
normalization=None, seed: int = get_default_random_seed()):
|
|
24
26
|
super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
|
|
25
|
-
normalization)
|
|
26
|
-
self.clustering = clustering(depth, error_threshold, self._output, gauss_components
|
|
27
|
-
|
|
28
|
-
self.
|
|
27
|
+
discretization, normalization)
|
|
28
|
+
self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
|
|
29
|
+
normalization, seed)
|
|
30
|
+
self._default_surrounding_cube = True
|
|
31
|
+
self._dimensions_to_ignore = set([dimension for dimension, relevance in ranks if relevance < ignore_threshold])
|
|
32
|
+
self._protected_features = []
|
|
33
|
+
|
|
34
|
+
def make_fair(self, features: Iterable[str]):
|
|
35
|
+
self.clustering.make_fair(features)
|
|
36
|
+
self._dimensions_to_ignore.update(features)
|
|
29
37
|
|
|
30
|
-
def _extract(self, dataframe: pd.DataFrame
|
|
38
|
+
def _extract(self, dataframe: pd.DataFrame) -> Theory:
|
|
31
39
|
if not isinstance(self.clustering, HyperCubeClustering):
|
|
32
40
|
raise TypeError("clustering must be a HyperCubeClustering")
|
|
33
41
|
|
|
34
42
|
self.clustering.fit(dataframe)
|
|
35
43
|
self._hypercubes = self.clustering.get_hypercubes()
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
cube[dimension] = [-np.inf, np.inf]
|
|
39
|
-
theory = self._create_theory(dataframe)
|
|
40
|
-
last_clause = list(theory.clauses)[-1]
|
|
41
|
-
theory.retract(last_clause)
|
|
42
|
-
theory.assertZ(clause(
|
|
43
|
-
last_clause.head, [list(last_clause.body)[-1]] if self._output is Target.REGRESSION else []))
|
|
44
|
-
last_cube = self._hypercubes[-1]
|
|
45
|
-
for dimension in last_cube.dimensions.keys():
|
|
46
|
-
last_cube[dimension] = [-np.inf, np.inf]
|
|
47
|
-
return theory
|
|
48
|
-
|
|
49
|
-
def _ignore_dimensions(self) -> Iterable[str]:
|
|
50
|
-
return [dimension for dimension, relevance in self.ranks if relevance < self.ignore_threshold]
|
|
44
|
+
self._surrounding = self._hypercubes[-1]
|
|
45
|
+
return self._create_theory(dataframe)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from tuprolog.theory import Theory
|
|
4
|
+
|
|
5
|
+
from psyke import Target, get_default_random_seed
|
|
6
|
+
from psyke.extraction.hypercubic import HyperCubeExtractor
|
|
7
|
+
from psyke.extraction.hypercubic.hypercube import Point, GenericCube, HyperCube
|
|
8
|
+
|
|
9
|
+
from sklearn.neighbors import BallTree
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DiViNE(HyperCubeExtractor):
|
|
13
|
+
"""
|
|
14
|
+
Explanator implementing DiViNE algorithm.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, predictor, k: int = 5, patience: int = 15, close_to_center: bool = True,
|
|
18
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()):
|
|
19
|
+
super().__init__(predictor, Target.CLASSIFICATION, discretization, normalization)
|
|
20
|
+
self.k = k
|
|
21
|
+
self.patience = patience
|
|
22
|
+
self.vicinity_function = DiViNE.closest_to_center if close_to_center else DiViNE.closest_to_corners
|
|
23
|
+
self.seed = seed
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def __pop(data: pd.DataFrame, idx: int = None) -> (Point, pd.DataFrame):
|
|
27
|
+
if idx is None:
|
|
28
|
+
idx = data.sample(1).index.values[0]
|
|
29
|
+
t = data.T
|
|
30
|
+
return DiViNE.__to_point(t.pop(idx)), t.T.reset_index(drop=True)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def __to_point(instance) -> Point:
|
|
34
|
+
point = Point(instance.index.values, instance.values)
|
|
35
|
+
return point
|
|
36
|
+
|
|
37
|
+
def __to_cube(self, point: Point) -> GenericCube:
|
|
38
|
+
cube = HyperCube.cube_from_point(point.dimensions, self._output)
|
|
39
|
+
cube._output = list(point.dimensions.values())[-1]
|
|
40
|
+
return cube
|
|
41
|
+
|
|
42
|
+
def __clean(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
43
|
+
_, idx = BallTree(data.iloc[:, :-1]).query(data.iloc[:, :-1], k=self.k)
|
|
44
|
+
# how many output classes are associated with the k neighbors
|
|
45
|
+
count = np.array(list(map(lambda indices: len(data.iloc[indices].iloc[:, -1].unique()), idx)))
|
|
46
|
+
# instances with neighbors of different classes are discarded
|
|
47
|
+
return data[count == 1]
|
|
48
|
+
|
|
49
|
+
def __closest(self, data: pd.DataFrame, cube: GenericCube) -> (Point, pd.DataFrame):
|
|
50
|
+
return DiViNE.__pop(data, self.vicinity_function(BallTree(data.iloc[:, :-1]), cube))
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def closest_to_center(tree: BallTree, cube: GenericCube):
|
|
54
|
+
return tree.query([list(cube.center.dimensions.values())], k=1)[1][0][-1]
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def closest_to_corners(tree: BallTree, cube: GenericCube):
|
|
58
|
+
distance, idx = tree.query([list(point.dimensions.values()) for point in cube.corners()], k=1)
|
|
59
|
+
return idx[np.argmin(distance)][-1]
|
|
60
|
+
|
|
61
|
+
def _extract(self, dataframe: pd.DataFrame) -> Theory:
|
|
62
|
+
self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=Target.CLASSIFICATION)
|
|
63
|
+
np.random.seed(self.seed)
|
|
64
|
+
data = self.__clean(dataframe)
|
|
65
|
+
|
|
66
|
+
while len(data) > 0:
|
|
67
|
+
discarded = []
|
|
68
|
+
patience = self.patience
|
|
69
|
+
point, data = self.__pop(data)
|
|
70
|
+
cube = self.__to_cube(point)
|
|
71
|
+
|
|
72
|
+
while patience > 0 and len(data) > 0:
|
|
73
|
+
other, data = self.__closest(data, cube)
|
|
74
|
+
if cube.output == list(other.dimensions.values())[-1]:
|
|
75
|
+
cube = cube.merge_with_point(other)
|
|
76
|
+
data = data[~(cube.filter_indices(data.iloc[:, :-1]))].reset_index(drop=True)
|
|
77
|
+
else:
|
|
78
|
+
patience -= 1
|
|
79
|
+
discarded.append(other)
|
|
80
|
+
if cube.volume() > 0:
|
|
81
|
+
cube.update(dataframe, self.predictor)
|
|
82
|
+
self._hypercubes.append(cube)
|
|
83
|
+
if len(discarded) > 0:
|
|
84
|
+
data = pd.concat([data] + [d.to_dataframe() for d in discarded]).reset_index(drop=True)
|
|
85
|
+
self._sort_cubes()
|
|
86
|
+
return self._create_theory(dataframe)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from sklearn.base import ClassifierMixin
|
|
7
|
+
from sklearn.preprocessing import PolynomialFeatures
|
|
8
|
+
from tuprolog.theory import Theory
|
|
9
|
+
|
|
10
|
+
from psyke import get_default_random_seed, Target
|
|
11
|
+
from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube, RegressionCube
|
|
12
|
+
|
|
13
|
+
from deap import base, creator
|
|
14
|
+
|
|
15
|
+
from psyke.genetic.gin import GIn
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GInGER(HyperCubeExtractor):
|
|
19
|
+
"""
|
|
20
|
+
Explanator implementing GInGER algorithm.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
|
|
24
|
+
tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None,
|
|
25
|
+
output: Target = Target.REGRESSION, normalization=None, seed: int = get_default_random_seed()):
|
|
26
|
+
super().__init__(predictor, output=Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
|
|
27
|
+
normalization=normalization)
|
|
28
|
+
self.threshold = threshold
|
|
29
|
+
np.random.seed(seed)
|
|
30
|
+
|
|
31
|
+
self.features = features
|
|
32
|
+
self.max_features = len(features)
|
|
33
|
+
self.sigmas = sigmas
|
|
34
|
+
self.max_slices = max_slices
|
|
35
|
+
self.min_rules = min_rules
|
|
36
|
+
self.poly = max_poly
|
|
37
|
+
self.trained_poly = None
|
|
38
|
+
|
|
39
|
+
self.alpha = alpha
|
|
40
|
+
self.indpb = indpb
|
|
41
|
+
self.tournsize = tournsize
|
|
42
|
+
self.metric = metric
|
|
43
|
+
|
|
44
|
+
self.n_gen = n_gen
|
|
45
|
+
self.n_pop = n_pop
|
|
46
|
+
self.valid = valid
|
|
47
|
+
|
|
48
|
+
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
|
49
|
+
creator.create("Individual", list, fitness=creator.FitnessMax)
|
|
50
|
+
|
|
51
|
+
def __poly_names(self):
|
|
52
|
+
return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
|
|
53
|
+
for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
|
|
54
|
+
for p in self.trained_poly.powers_]
|
|
55
|
+
|
|
56
|
+
def _predict(self, dataframe: pd.DataFrame) -> Iterable:
|
|
57
|
+
dataframe = pd.DataFrame(self.trained_poly.fit_transform(dataframe), columns=self.__poly_names())
|
|
58
|
+
return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
|
|
59
|
+
|
|
60
|
+
def _extract(self, dataframe: pd.DataFrame) -> Theory:
|
|
61
|
+
best = {}
|
|
62
|
+
for poly in range(self.poly):
|
|
63
|
+
for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
|
|
64
|
+
gr = GIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
|
|
65
|
+
slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha, indpb=self.indpb,
|
|
66
|
+
tournsize=self.tournsize, metric=self.metric, output=self._output, warm=True)
|
|
67
|
+
|
|
68
|
+
b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
|
|
69
|
+
best[(score, poly + 1, slices)] = b
|
|
70
|
+
m = min(best)
|
|
71
|
+
poly, slices, best = m[1], m[2], best[m]
|
|
72
|
+
self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
|
|
73
|
+
transformed = pd.DataFrame(self.trained_poly.fit_transform(dataframe.iloc[:, :-1]), columns=self.__poly_names())
|
|
74
|
+
transformed[dataframe.columns[-1]] = dataframe.iloc[:, -1].values
|
|
75
|
+
|
|
76
|
+
self._surrounding = HyperCube.create_surrounding_cube(transformed, output=self._output)
|
|
77
|
+
|
|
78
|
+
cuts = [sorted(best[sum(slices[:i]):sum(slices[:i + 1])]) for i in range(len(slices))]
|
|
79
|
+
|
|
80
|
+
intervals = [[(transformed[self.features[i]].min(), cut[0])] +
|
|
81
|
+
[(cut[i], cut[i + 1]) for i in range(len(cut) - 1)] +
|
|
82
|
+
[(cut[-1], transformed[self.features[i]].max())] for i, cut in enumerate(cuts)]
|
|
83
|
+
|
|
84
|
+
hypercubes = [{f: iv for f, iv in zip(self.features, combo)} for combo in itertools.product(*intervals)]
|
|
85
|
+
mi_ma = {f: (transformed[f].min(), transformed[f].max()) for f in transformed.columns if f not in self.features}
|
|
86
|
+
self._hypercubes = [self._default_cube({feat: h[feat] if feat in self.features else mi_ma[feat]
|
|
87
|
+
for feat in transformed.columns[:-1]}) for h in hypercubes]
|
|
88
|
+
self._hypercubes = [c for c in self._hypercubes if c.count(transformed) >= 2]
|
|
89
|
+
for c in self._hypercubes:
|
|
90
|
+
for feature in transformed.columns:
|
|
91
|
+
if feature not in self.features:
|
|
92
|
+
for direction in ['+', '-']:
|
|
93
|
+
c.set_infinite(feature, direction)
|
|
94
|
+
c.update(transformed)
|
|
95
|
+
if self.threshold is not None:
|
|
96
|
+
self._hypercubes = self._merge(self._hypercubes, transformed)
|
|
97
|
+
return self._create_theory(transformed)
|
|
98
|
+
|
|
99
|
+
def make_fair(self, features: Iterable[str]):
|
|
100
|
+
self._dimensions_to_ignore.update(features)
|