psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from abc import ABC
4
- from typing import Iterable
4
+ from collections.abc import Iterable
5
+ from itertools import combinations
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  from sklearn.base import ClassifierMixin
@@ -9,57 +11,180 @@ from sklearn.feature_selection import SelectKBest, f_regression, f_classif
9
11
  from sklearn.linear_model import LinearRegression
10
12
  from tuprolog.core import Var, Struct, clause
11
13
  from tuprolog.theory import Theory, mutable_theory
12
- from psyke import logger, PedagogicalExtractor
13
- from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube
14
+ from psyke.extraction import PedagogicalExtractor
15
+ from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
16
+ GenericCube
17
+ from psyke.hypercubepredictor import HyperCubePredictor
18
+ from psyke.schema import Value
14
19
  from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
15
- from psyke.utils import Target, get_int_precision
20
+ from psyke.utils import Target
16
21
  from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
17
22
 
18
23
 
19
- class HyperCubePredictor:
20
- def __init__(self, cubes=[], output=Target.CONSTANT, normalization=None):
21
- self._hypercubes = cubes
22
- self._output = output
23
- self.normalization = normalization
24
+ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
25
+ def __init__(self, predictor, output, discretization=None, normalization=None):
26
+ HyperCubePredictor.__init__(self, output=output, normalization=normalization)
27
+ PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
28
+ self._default_surrounding_cube = False
29
+ self.threshold = None
24
30
 
25
- def _predict(self, dataframe: pd.DataFrame) -> Iterable:
26
- return np.array([self._predict_from_cubes(dict(row.to_dict())) for _, row in dataframe.iterrows()])
31
+ def _default_cube(self, dimensions=None) -> HyperCube | RegressionCube | ClassificationCube:
32
+ if self._output == Target.CONSTANT:
33
+ return HyperCube(dimensions)
34
+ if self._output == Target.REGRESSION:
35
+ return RegressionCube(dimensions)
36
+ return ClassificationCube(dimensions)
27
37
 
28
- def _predict_from_cubes(self, data: dict[str, float]) -> float | None:
29
- data = {k: v for k, v in data.items()}
30
- for cube in self._hypercubes:
31
- if cube.__contains__(data):
32
- if self._output == Target.CLASSIFICATION:
33
- return HyperCubePredictor._get_cube_output(cube, data)
34
- else:
35
- return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
36
- return None
38
+ @staticmethod
39
+ def _find_couples(to_split: Iterable[HyperCube], not_in_cache: set[HyperCube],
40
+ adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
41
+ Iterable[tuple[HyperCube, HyperCube, str]]:
37
42
 
38
- @property
39
- def n_rules(self):
40
- return len(list(self._hypercubes))
43
+ for cube1, cube2 in combinations(to_split, 2):
44
+ key = (cube1, cube2) if id(cube1) < id(cube2) else (cube2, cube1)
41
45
 
42
- @property
43
- def volume(self):
44
- return sum([cube.volume() for cube in self._hypercubes])
46
+ if (cube1 in not_in_cache) or (cube2 in not_in_cache):
47
+ adjacent_cache[key] = cube1.is_adjacent(cube2)
48
+ feature = adjacent_cache.get(key)
49
+ if feature is not None:
50
+ yield cube1, cube2, feature
51
+
52
+ def _evaluate_merge(self, not_in_cache: Iterable[HyperCube], dataframe: pd.DataFrame, feature: str,
53
+ cube: HyperCube, other_cube: HyperCube,
54
+ merge_cache: dict[tuple[HyperCube, HyperCube], HyperCube | None]) -> bool:
55
+ if (cube in not_in_cache) or (other_cube in not_in_cache):
56
+ merged_cube = cube.merge_along_dimension(other_cube, feature)
57
+ merged_cube.update(dataframe, self.predictor)
58
+ merge_cache[(cube, other_cube)] = merged_cube
59
+ return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
60
+ merge_cache[(cube, other_cube)].diversity < self.threshold
61
+
62
+ def _sort_cubes(self):
63
+ cubes = [(cube.diversity, i, cube) for i, cube in enumerate(self._hypercubes)]
64
+ cubes.sort()
65
+ self._hypercubes = [cube[2] for cube in cubes]
66
+
67
+ def _merge(self, to_split: list[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
68
+ not_in_cache = set(to_split)
69
+ adjacent_cache = {}
70
+ merge_cache = {}
71
+ while True:
72
+ to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
73
+ HyperCubeExtractor._find_couples(to_split, not_in_cache, adjacent_cache) if
74
+ self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
75
+
76
+ if len(to_merge) == 0:
77
+ break
78
+ best = min(to_merge, key=lambda c: c[1].diversity)
79
+ for cube in best[0]:
80
+ to_split.remove(cube)
81
+ to_split.append(best[1])
82
+ not_in_cache = [best[1]]
83
+ return to_split
84
+
85
+ def extract(self, dataframe: pd.DataFrame) -> Theory:
86
+ theory = PedagogicalExtractor.extract(self, dataframe)
87
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
88
+ self._surrounding.update(dataframe, self.predictor)
89
+ return theory
90
+
91
+ def pairwise_fairness(self, data: dict[str, float], neighbor: dict[str, float]):
92
+ cube1 = self._find_cube(data)
93
+ cube2 = self._find_cube(neighbor)
94
+ different_prediction_reasons = []
95
+
96
+ if cube1.output == cube2.output:
97
+ print("Prediction", cube1.output, "is FAIR")
98
+ else:
99
+ print("Prediction", cube1.output, "may be UNFAIR")
100
+ print("It could be", cube2.output, "if:")
101
+ for d in data:
102
+ a, b = cube2.dimensions[d]
103
+ if data[d] < a:
104
+ print(' ', d, 'increases above', round(a, 1))
105
+ different_prediction_reasons.append(d)
106
+ elif data[d] > b:
107
+ print(' ', d, 'decreases below', round(b, 1))
108
+ different_prediction_reasons.append(d)
109
+ return different_prediction_reasons
110
+
111
+ def predict_counter(self, data: dict[str, float], verbose=True, only_first=True):
112
+ output = ""
113
+ prediction = None
114
+ cube = self._find_cube(data)
115
+ if cube is None:
116
+ output += "The extracted knowledge is not exhaustive; impossible to predict this instance"
117
+ else:
118
+ prediction = self._predict_from_cubes(data)
119
+ output += f"The output is {prediction}\n"
120
+
121
+ point = Point(list(data.keys()), list(data.values()))
122
+ cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
123
+ cubes = sorted([(cube.surface_distance(point), cube.volume(), i, cube) for i, cube in enumerate(cubes)])
124
+
125
+ counter_conditions = []
126
+
127
+ for _, _, _, c in cubes:
128
+ if not only_first or c.output not in [o for o, _ in counter_conditions]:
129
+ counter_conditions.append((c.output, {c: [val for val in v if val is not None and not val.is_in(
130
+ self.unscale(data[c], c))] for c, v in self.__get_conditions(data, c).items()}))
131
+
132
+ if verbose:
133
+ for o, conditions in counter_conditions:
134
+ output += f"The output may be {o} if\n" + HyperCubeExtractor.__conditions_to_string(conditions)
135
+ print(output)
136
+
137
+ return prediction, counter_conditions
45
138
 
46
139
  @staticmethod
47
- def _get_cube_output(cube, data: dict[str, float]) -> float:
48
- return cube.output.predict(pd.DataFrame([data])).flatten()[0] if \
49
- isinstance(cube, RegressionCube) else cube.output
140
+ def __conditions_to_string(conditions: dict[str, list[Value]]) -> str:
141
+ output = ""
142
+ for d in conditions:
143
+ for i, condition in enumerate(conditions[d]):
144
+ if i == 0:
145
+ output += f' {d} is '
146
+ else:
147
+ output += ' and '
148
+ output += condition.print()
149
+ if i + 1 == len(conditions[d]):
150
+ output += '\n'
151
+ return output
152
+
153
+ def __get_conditions(self, data: dict[str, float], cube: GenericCube) -> dict[str, list[Value]]:
154
+ conditions = {d: [cube.interval_to_value(d, self.unscale)] for d in data.keys()
155
+ if d not in self._dimensions_to_ignore}
156
+ for c in cube.subcubes(self._hypercubes):
157
+ for d in conditions:
158
+ condition = c.interval_to_value(d, self.unscale)
159
+ if condition is None:
160
+ continue
161
+ elif conditions[d][-1] is None:
162
+ conditions[d][-1] = -condition
163
+ else:
164
+ try:
165
+ conditions[d][-1] *= -condition
166
+ except Exception:
167
+ conditions[d].append(-condition)
168
+ return conditions
50
169
 
170
+ def predict_why(self, data: dict[str, float], verbose=True):
171
+ cube = self._find_cube(data)
172
+ output = ""
173
+ if cube is None:
174
+ output += "The extracted knowledge is not exhaustive; impossible to predict this instance\n"
175
+ if verbose:
176
+ print(output)
177
+ return None, {}
178
+ prediction = self._predict_from_cubes(data)
179
+ output += f"The output is {prediction} because\n"
180
+ conditions = {c: [val for val in v if val is not None and val.is_in(self.unscale(data[c], c))]
181
+ for c, v in self.__get_conditions(data, cube).items()}
51
182
 
52
- class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
53
- def __init__(self, predictor, output, normalization):
54
- PedagogicalExtractor.__init__(self, predictor, normalization=normalization)
55
- HyperCubePredictor.__init__(self, output=output, normalization=normalization)
183
+ if verbose:
184
+ output += HyperCubeExtractor.__conditions_to_string(conditions)
185
+ print(output)
56
186
 
57
- def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
58
- if self._output == Target.CONSTANT:
59
- return HyperCube()
60
- if self._output == Target.REGRESSION:
61
- return RegressionCube()
62
- return ClassificationCube()
187
+ return prediction, conditions
63
188
 
64
189
  @staticmethod
65
190
  def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
@@ -67,19 +192,28 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
67
192
  if not isinstance(output, LinearRegression) else \
68
193
  create_head(dataframe.columns[-1], variables[:-1], variables[-1])
69
194
 
70
- def _ignore_dimensions(self) -> Iterable[str]:
71
- return []
195
+ def __drop(self, dataframe: pd.DataFrame):
196
+ self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
197
+
198
+ def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
199
+ # self.__drop(dataframe)
200
+ for cube in self._hypercubes:
201
+ for dimension in cube.dimensions:
202
+ if abs(cube[dimension][0] - self._surrounding[dimension][0]) < HyperCube.EPSILON * 2:
203
+ cube.set_infinite(dimension, '-')
204
+ if abs(cube[dimension][1] - self._surrounding[dimension][1]) < HyperCube.EPSILON * 2:
205
+ cube.set_infinite(dimension, '+')
206
+
207
+ if self._default_surrounding_cube:
208
+ self._hypercubes[-1].set_default()
72
209
 
73
- def _create_theory(self, dataframe: pd.DataFrame, sort: bool = True) -> Theory:
74
210
  new_theory = mutable_theory()
75
211
  for cube in self._hypercubes:
76
- logger.info(cube.output)
77
- logger.info(cube.dimensions)
78
- variables = create_variable_list([], dataframe, sort)
212
+ variables = create_variable_list([], dataframe)
79
213
  variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
80
214
  head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
81
215
  self.unscale(cube.output, dataframe.columns[-1]))
82
- body = cube.body(variables, self._ignore_dimensions(), self.unscale, self.normalization)
216
+ body = cube.body(variables, self._dimensions_to_ignore, self.unscale, self.normalization)
83
217
  new_theory.assertZ(clause(head, body))
84
218
  return HyperCubeExtractor._prettify_theory(new_theory)
85
219
 
@@ -120,10 +254,16 @@ class FeatureRanker:
120
254
 
121
255
 
122
256
  class Grid:
123
- def __init__(self, iterations: int = 1, strategy: Strategy | list[Strategy] = FixedStrategy()):
257
+ def __init__(self, iterations: int = 1, strategy: Strategy | Iterable[Strategy] = FixedStrategy()):
124
258
  self.iterations = iterations
125
259
  self.strategy = strategy
126
260
 
261
+ def make_fair(self, features: Iterable[str]):
262
+ if isinstance(self.strategy, Strategy):
263
+ self.strategy.make_fair(features)
264
+ elif isinstance(self.strategy, Iterable):
265
+ [strategy.make_fair(features) for strategy in self.strategy]
266
+
127
267
  def get(self, feature: str, depth: int) -> int:
128
268
  if isinstance(self.strategy, list):
129
269
  return self.strategy[depth].get(feature)
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.mixture import GaussianMixture
4
+ from tuprolog.theory import Theory
5
+
6
+ from psyke import Target, Extractor, get_default_random_seed
7
+ from psyke.clustering.utils import select_gaussian_mixture
8
+ from psyke.extraction.hypercubic import HyperCube, HyperCubeExtractor, RegressionCube
9
+
10
+
11
+ class COSMiK(HyperCubeExtractor):
12
+ """
13
+ Explanator implementing COSMiK algorithm.
14
+ """
15
+
16
+ def __init__(self, predictor, max_components: int = 4, k: int = 5, patience: int = 15, close_to_center: bool = True,
17
+ output: Target = Target.CONSTANT, discretization=None, normalization=None,
18
+ seed: int = get_default_random_seed()):
19
+ super().__init__(predictor, Target.REGRESSION, discretization, normalization)
20
+ self.max = max_components
21
+ self.k = k
22
+ self.patience = patience
23
+ self.output = output
24
+ self.close_to_center = close_to_center
25
+ self.seed = seed
26
+
27
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
28
+ np.random.seed(self.seed)
29
+ X, y = dataframe.iloc[:, :-1], dataframe.iloc[:, -1]
30
+
31
+ _, n, _ = select_gaussian_mixture(dataframe, self.max)
32
+ gmm = GaussianMixture(n)
33
+ gmm.fit(X, y)
34
+
35
+ divine = Extractor.divine(gmm, self.k, self.patience, self.close_to_center,
36
+ self.discretization, self.normalization)
37
+ df = X.join(pd.DataFrame(gmm.predict(X)))
38
+ df.columns = dataframe.columns
39
+ divine.extract(df)
40
+
41
+ self._hypercubes = [HyperCube(cube.dimensions.copy()) if self.output == Target.CONSTANT else
42
+ RegressionCube(cube.dimensions.copy()) for cube in divine._hypercubes]
43
+ for cube in self._hypercubes:
44
+ cube.update(dataframe, self.predictor)
45
+
46
+ self._sort_cubes()
47
+ return self._create_theory(dataframe)
@@ -1,50 +1,45 @@
1
1
  from __future__ import annotations
2
2
 
3
- from abc import ABC
4
- from collections import Iterable
5
- import numpy as np
3
+ from collections.abc import Iterable
4
+ from typing import Callable, Any
5
+
6
6
  import pandas as pd
7
7
  from sklearn.base import ClassifierMixin
8
- from tuprolog.core import clause
9
8
  from tuprolog.theory import Theory
10
9
  from psyke import Clustering
11
10
  from psyke.clustering import HyperCubeClustering
12
11
  from psyke.extraction.hypercubic import HyperCubeExtractor
13
- from psyke.utils import Target
12
+ from psyke.utils import Target, get_default_random_seed
14
13
 
15
14
 
16
- class CReEPy(HyperCubeExtractor, ABC):
15
+ class CReEPy(HyperCubeExtractor):
17
16
  """
18
17
  Explanator implementing CReEPy algorithm.
19
18
  """
20
19
 
21
- def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
22
- gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
23
- normalization=None, clustering=Clustering.exact):
20
+ ClusteringType = Callable[[int, float, Target, int, Any, Any, int], HyperCubeClustering]
21
+
22
+ def __init__(self, predictor, clustering: ClusteringType = Clustering.exact, depth: int = 3,
23
+ error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 5,
24
+ ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0, discretization=None,
25
+ normalization=None, seed: int = get_default_random_seed()):
24
26
  super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
25
- normalization)
26
- self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
27
- self.ranks = ranks
28
- self.ignore_threshold = ignore_threshold
27
+ discretization, normalization)
28
+ self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
29
+ normalization, seed)
30
+ self._default_surrounding_cube = True
31
+ self._dimensions_to_ignore = set([dimension for dimension, relevance in ranks if relevance < ignore_threshold])
32
+ self._protected_features = []
33
+
34
+ def make_fair(self, features: Iterable[str]):
35
+ self.clustering.make_fair(features)
36
+ self._dimensions_to_ignore.update(features)
29
37
 
30
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
38
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
31
39
  if not isinstance(self.clustering, HyperCubeClustering):
32
40
  raise TypeError("clustering must be a HyperCubeClustering")
33
41
 
34
42
  self.clustering.fit(dataframe)
35
43
  self._hypercubes = self.clustering.get_hypercubes()
36
- for cube in self._hypercubes:
37
- for dimension in self._ignore_dimensions():
38
- cube[dimension] = [-np.inf, np.inf]
39
- theory = self._create_theory(dataframe)
40
- last_clause = list(theory.clauses)[-1]
41
- theory.retract(last_clause)
42
- theory.assertZ(clause(
43
- last_clause.head, [list(last_clause.body)[-1]] if self._output is Target.REGRESSION else []))
44
- last_cube = self._hypercubes[-1]
45
- for dimension in last_cube.dimensions.keys():
46
- last_cube[dimension] = [-np.inf, np.inf]
47
- return theory
48
-
49
- def _ignore_dimensions(self) -> Iterable[str]:
50
- return [dimension for dimension, relevance in self.ranks if relevance < self.ignore_threshold]
44
+ self._surrounding = self._hypercubes[-1]
45
+ return self._create_theory(dataframe)
@@ -0,0 +1,86 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from tuprolog.theory import Theory
4
+
5
+ from psyke import Target, get_default_random_seed
6
+ from psyke.extraction.hypercubic import HyperCubeExtractor
7
+ from psyke.extraction.hypercubic.hypercube import Point, GenericCube, HyperCube
8
+
9
+ from sklearn.neighbors import BallTree
10
+
11
+
12
+ class DiViNE(HyperCubeExtractor):
13
+ """
14
+ Explanator implementing DiViNE algorithm.
15
+ """
16
+
17
+ def __init__(self, predictor, k: int = 5, patience: int = 15, close_to_center: bool = True,
18
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
19
+ super().__init__(predictor, Target.CLASSIFICATION, discretization, normalization)
20
+ self.k = k
21
+ self.patience = patience
22
+ self.vicinity_function = DiViNE.closest_to_center if close_to_center else DiViNE.closest_to_corners
23
+ self.seed = seed
24
+
25
+ @staticmethod
26
+ def __pop(data: pd.DataFrame, idx: int = None) -> (Point, pd.DataFrame):
27
+ if idx is None:
28
+ idx = data.sample(1).index.values[0]
29
+ t = data.T
30
+ return DiViNE.__to_point(t.pop(idx)), t.T.reset_index(drop=True)
31
+
32
+ @staticmethod
33
+ def __to_point(instance) -> Point:
34
+ point = Point(instance.index.values, instance.values)
35
+ return point
36
+
37
+ def __to_cube(self, point: Point) -> GenericCube:
38
+ cube = HyperCube.cube_from_point(point.dimensions, self._output)
39
+ cube._output = list(point.dimensions.values())[-1]
40
+ return cube
41
+
42
+ def __clean(self, data: pd.DataFrame) -> pd.DataFrame:
43
+ _, idx = BallTree(data.iloc[:, :-1]).query(data.iloc[:, :-1], k=self.k)
44
+ # how many output classes are associated with the k neighbors
45
+ count = np.array(list(map(lambda indices: len(data.iloc[indices].iloc[:, -1].unique()), idx)))
46
+ # instances with neighbors of different classes are discarded
47
+ return data[count == 1]
48
+
49
+ def __closest(self, data: pd.DataFrame, cube: GenericCube) -> (Point, pd.DataFrame):
50
+ return DiViNE.__pop(data, self.vicinity_function(BallTree(data.iloc[:, :-1]), cube))
51
+
52
+ @staticmethod
53
+ def closest_to_center(tree: BallTree, cube: GenericCube):
54
+ return tree.query([list(cube.center.dimensions.values())], k=1)[1][0][-1]
55
+
56
+ @staticmethod
57
+ def closest_to_corners(tree: BallTree, cube: GenericCube):
58
+ distance, idx = tree.query([list(point.dimensions.values()) for point in cube.corners()], k=1)
59
+ return idx[np.argmin(distance)][-1]
60
+
61
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
62
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=Target.CLASSIFICATION)
63
+ np.random.seed(self.seed)
64
+ data = self.__clean(dataframe)
65
+
66
+ while len(data) > 0:
67
+ discarded = []
68
+ patience = self.patience
69
+ point, data = self.__pop(data)
70
+ cube = self.__to_cube(point)
71
+
72
+ while patience > 0 and len(data) > 0:
73
+ other, data = self.__closest(data, cube)
74
+ if cube.output == list(other.dimensions.values())[-1]:
75
+ cube = cube.merge_with_point(other)
76
+ data = data[~(cube.filter_indices(data.iloc[:, :-1]))].reset_index(drop=True)
77
+ else:
78
+ patience -= 1
79
+ discarded.append(other)
80
+ if cube.volume() > 0:
81
+ cube.update(dataframe, self.predictor)
82
+ self._hypercubes.append(cube)
83
+ if len(discarded) > 0:
84
+ data = pd.concat([data] + [d.to_dataframe() for d in discarded]).reset_index(drop=True)
85
+ self._sort_cubes()
86
+ return self._create_theory(dataframe)
@@ -0,0 +1,100 @@
1
+ import itertools
2
+ from typing import Iterable
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.base import ClassifierMixin
7
+ from sklearn.preprocessing import PolynomialFeatures
8
+ from tuprolog.theory import Theory
9
+
10
+ from psyke import get_default_random_seed, Target
11
+ from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube, RegressionCube
12
+
13
+ from deap import base, creator
14
+
15
+ from psyke.genetic.gin import GIn
16
+
17
+
18
+ class GInGER(HyperCubeExtractor):
19
+ """
20
+ Explanator implementing GInGER algorithm.
21
+ """
22
+
23
+ def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
24
+ tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None,
25
+ output: Target = Target.REGRESSION, normalization=None, seed: int = get_default_random_seed()):
26
+ super().__init__(predictor, output=Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
27
+ normalization=normalization)
28
+ self.threshold = threshold
29
+ np.random.seed(seed)
30
+
31
+ self.features = features
32
+ self.max_features = len(features)
33
+ self.sigmas = sigmas
34
+ self.max_slices = max_slices
35
+ self.min_rules = min_rules
36
+ self.poly = max_poly
37
+ self.trained_poly = None
38
+
39
+ self.alpha = alpha
40
+ self.indpb = indpb
41
+ self.tournsize = tournsize
42
+ self.metric = metric
43
+
44
+ self.n_gen = n_gen
45
+ self.n_pop = n_pop
46
+ self.valid = valid
47
+
48
+ creator.create("FitnessMax", base.Fitness, weights=(1.0,))
49
+ creator.create("Individual", list, fitness=creator.FitnessMax)
50
+
51
+ def __poly_names(self):
52
+ return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
53
+ for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
54
+ for p in self.trained_poly.powers_]
55
+
56
+ def _predict(self, dataframe: pd.DataFrame) -> Iterable:
57
+ dataframe = pd.DataFrame(self.trained_poly.fit_transform(dataframe), columns=self.__poly_names())
58
+ return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
59
+
60
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
61
+ best = {}
62
+ for poly in range(self.poly):
63
+ for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
64
+ gr = GIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
65
+ slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha, indpb=self.indpb,
66
+ tournsize=self.tournsize, metric=self.metric, output=self._output, warm=True)
67
+
68
+ b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
69
+ best[(score, poly + 1, slices)] = b
70
+ m = min(best)
71
+ poly, slices, best = m[1], m[2], best[m]
72
+ self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
73
+ transformed = pd.DataFrame(self.trained_poly.fit_transform(dataframe.iloc[:, :-1]), columns=self.__poly_names())
74
+ transformed[dataframe.columns[-1]] = dataframe.iloc[:, -1].values
75
+
76
+ self._surrounding = HyperCube.create_surrounding_cube(transformed, output=self._output)
77
+
78
+ cuts = [sorted(best[sum(slices[:i]):sum(slices[:i + 1])]) for i in range(len(slices))]
79
+
80
+ intervals = [[(transformed[self.features[i]].min(), cut[0])] +
81
+ [(cut[i], cut[i + 1]) for i in range(len(cut) - 1)] +
82
+ [(cut[-1], transformed[self.features[i]].max())] for i, cut in enumerate(cuts)]
83
+
84
+ hypercubes = [{f: iv for f, iv in zip(self.features, combo)} for combo in itertools.product(*intervals)]
85
+ mi_ma = {f: (transformed[f].min(), transformed[f].max()) for f in transformed.columns if f not in self.features}
86
+ self._hypercubes = [self._default_cube({feat: h[feat] if feat in self.features else mi_ma[feat]
87
+ for feat in transformed.columns[:-1]}) for h in hypercubes]
88
+ self._hypercubes = [c for c in self._hypercubes if c.count(transformed) >= 2]
89
+ for c in self._hypercubes:
90
+ for feature in transformed.columns:
91
+ if feature not in self.features:
92
+ for direction in ['+', '-']:
93
+ c.set_infinite(feature, direction)
94
+ c.update(transformed)
95
+ if self.threshold is not None:
96
+ self._hypercubes = self._merge(self._hypercubes, transformed)
97
+ return self._create_theory(transformed)
98
+
99
+ def make_fair(self, features: Iterable[str]):
100
+ self._dimensions_to_ignore.update(features)