psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
1
  from __future__ import annotations
2
- import random as rnd
3
2
  from itertools import product
4
3
  from typing import Iterable
5
4
  import numpy as np
6
5
  import pandas as pd
6
+ from sklearn.base import ClassifierMixin
7
7
  from tuprolog.theory import Theory
8
- from psyke import get_default_random_seed, PedagogicalExtractor
8
+ from psyke import get_default_random_seed
9
9
  from psyke.utils import Target
10
10
  from psyke.extraction.hypercubic import HyperCubeExtractor, Grid, HyperCube
11
11
 
@@ -15,102 +15,63 @@ class GridEx(HyperCubeExtractor):
15
15
  Explanator implementing GridEx algorithm, doi:10.1007/978-3-030-82017-6_2.
16
16
  """
17
17
 
18
- def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization=None,
19
- seed=get_default_random_seed()):
20
- super().__init__(predictor, Target.CONSTANT, normalization)
18
+ def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
19
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
20
+ super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
21
+ discretization, normalization)
21
22
  self.grid = grid
22
23
  self.min_examples = min_examples
23
24
  self.threshold = threshold
24
- self.__generator = rnd.Random(seed)
25
+ np.random.seed(seed)
25
26
 
26
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
27
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
27
28
  self._hypercubes = []
28
- if isinstance(np.array(self.predictor.predict(dataframe.iloc[0:1, :-1])).flatten()[0], str):
29
- self._output = Target.CLASSIFICATION
30
- surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
31
- surrounding.init_diversity(2 * self.threshold)
32
- self._iterate(surrounding, dataframe)
33
- return self._create_theory(dataframe, sort)
29
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
30
+ self._surrounding.init_diversity(2 * self.threshold)
31
+ self._iterate(dataframe)
32
+ return self._create_theory(dataframe)
34
33
 
35
- def _ignore_dimensions(self) -> Iterable[str]:
36
- cube = self._hypercubes[0]
37
- return [d for d in cube.dimensions if all(c[d] == cube[d] for c in self._hypercubes)]
34
+ def _create_ranges(self, cube, iteration):
35
+ ranges = {}
36
+ for (feature, (a, b)) in cube.dimensions.items():
37
+ n_bins = self.grid.get(feature, iteration)
38
+ if n_bins == 1:
39
+ ranges[feature] = [(a, b)]
40
+ self._dimensions_to_ignore.add(feature)
41
+ else:
42
+ size = (b - a) / n_bins
43
+ ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
44
+ return ranges
38
45
 
39
- def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
46
+ def _cubes_to_split(self, cube, iteration, dataframe, fake, keep_empty=False):
47
+ to_split = []
48
+ for p in product(*self._create_ranges(cube, iteration).values()):
49
+ cube = self._default_cube()
50
+ for i, f in enumerate(dataframe.columns[:-1]):
51
+ cube.update_dimension(f, p[i])
52
+ n = cube.count(dataframe)
53
+ if n > 0 or keep_empty:
54
+ fake = pd.concat([fake, cube.create_samples(self.min_examples - n)])
55
+ cube.update(fake, self.predictor)
56
+ to_split.append(cube)
57
+ return to_split, fake
58
+
59
+ def _iterate(self, dataframe: pd.DataFrame):
40
60
  fake = dataframe.copy()
41
- prev = [surrounding]
42
- next_iteration = []
61
+ prev = [self._surrounding]
43
62
 
44
63
  for iteration in self.grid.iterate():
45
64
  next_iteration = []
46
65
  for cube in prev:
47
- to_split = []
48
66
  if cube.count(dataframe) == 0:
49
67
  continue
50
68
  if cube.diversity < self.threshold:
51
- self._hypercubes += [cube]
69
+ self._hypercubes.append(cube)
52
70
  continue
53
- ranges = {}
54
- for (feature, (a, b)) in cube.dimensions.items():
55
- bins = []
56
- n_bins = self.grid.get(feature, iteration)
57
- size = (b - a) / n_bins
58
- for i in range(n_bins):
59
- bins.append((a + size * i, a + size * (i + 1)))
60
- ranges[feature] = bins
61
- for (pn, p) in enumerate(list(product(*ranges.values()))):
62
- cube = self._default_cube()
63
- for i, f in enumerate(dataframe.columns[:-1]):
64
- cube.update_dimension(f, p[i])
65
- n = cube.count(dataframe)
66
- if n > 0:
67
- fake = pd.concat([fake, cube.create_samples(self.min_examples - n, self.__generator)])
68
- cube.update(fake, self.predictor)
69
- to_split += [cube]
70
- to_split = self._merge(to_split, fake)
71
- next_iteration += [cube for cube in to_split]
72
- prev = next_iteration.copy()
73
- self._hypercubes += [cube for cube in next_iteration]
74
-
75
- @staticmethod
76
- def _find_couples(to_split: Iterable[HyperCube], not_in_cache: Iterable[HyperCube],
77
- adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
78
- Iterable[tuple[HyperCube, HyperCube, str]]:
79
- checked = []
80
- eligible = []
81
- for cube in to_split:
82
- checked.append(cube)
83
- for other_cube in [c for c in to_split if c not in checked]:
84
- if (cube in not_in_cache) or (other_cube in not_in_cache):
85
- adjacent_cache[(cube, other_cube)] = cube.is_adjacent(other_cube)
86
- adjacent_feature = adjacent_cache[(cube, other_cube)]
87
- eligible.append((cube, other_cube, adjacent_feature))
88
- return [couple for couple in eligible if couple[2] is not None]
89
-
90
- def _evaluate_merge(self, not_in_cache: Iterable[HyperCube],
91
- dataframe: pd.DataFrame, feature: str,
92
- cube: HyperCube, other_cube: HyperCube,
93
- merge_cache: dict[(HyperCube, HyperCube), HyperCube | None]) -> bool:
94
- if (cube in not_in_cache) or (other_cube in not_in_cache):
95
- merged_cube = cube.merge_along_dimension(other_cube, feature)
96
- merged_cube.update(dataframe, self.predictor)
97
- merge_cache[(cube, other_cube)] = merged_cube
98
- return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
99
- merge_cache[(cube, other_cube)].diversity < self.threshold
71
+ to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
72
+ next_iteration.extend(self._merge(to_split, fake))
73
+ prev = next_iteration
74
+ self._hypercubes.extend(prev)
100
75
 
101
- def _merge(self, to_split: Iterable[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
102
- not_in_cache = [cube for cube in to_split]
103
- adjacent_cache = {}
104
- merge_cache = {}
105
- # TODO: refactor this. A while true with a break is as ugly as hunger.
106
- while True:
107
- to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
108
- GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
109
- self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
110
- if len(to_merge) == 0:
111
- break
112
- sorted(to_merge, key=lambda c: c[1].diversity)
113
- best = to_merge[0]
114
- to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
115
- not_in_cache = [best[1]]
116
- return to_split
76
+ def make_fair(self, features: Iterable[str]):
77
+ self.grid.make_fair(features)
@@ -1,16 +1,16 @@
1
- from psyke import get_default_random_seed
1
+ from psyke import get_default_random_seed, Target
2
2
  from psyke.extraction.hypercubic import Grid, RegressionCube
3
3
  from psyke.extraction.hypercubic.gridex import GridEx
4
4
 
5
5
 
6
6
  class GridREx(GridEx):
7
7
  """
8
- Explanator implementing GridREx algorithm.
8
+ Explanator implementing GridREx algorithm, doi:10.24963/kr.2022/57.
9
9
  """
10
10
 
11
11
  def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization,
12
12
  seed=get_default_random_seed()):
13
- super().__init__(predictor, grid, min_examples, threshold, normalization, seed)
13
+ super().__init__(predictor, grid, min_examples, threshold, Target.REGRESSION, None, normalization, seed)
14
14
 
15
- def _default_cube(self) -> RegressionCube:
15
+ def _default_cube(self, dimensions=None) -> RegressionCube:
16
16
  return RegressionCube()
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from psyke import get_default_random_seed, Target
9
+ from psyke.extraction.hypercubic import Grid, HyperCube, GenericCube, ClassificationCube
10
+ from psyke.extraction.hypercubic.gridex import GridEx
11
+
12
+
13
+ class HEx(GridEx):
14
+ """
15
+ Explanator implementing HEx algorithm.
16
+ """
17
+
18
+ class Node:
19
+ def __init__(self, cube: GenericCube, parent: HEx.Node = None, threshold: float = None):
20
+ self.cube = cube
21
+ self.parent = parent
22
+ self.children: Iterable[HEx.Node] = []
23
+ self.threshold = threshold
24
+ self.gain = True if parent is None else self.check()
25
+
26
+ def check(self) -> bool:
27
+ other = self.parent
28
+ try:
29
+ while not other.gain:
30
+ other = other.parent
31
+ except AttributeError:
32
+ return True
33
+ if isinstance(other.cube, ClassificationCube):
34
+ return other.cube.output != self.cube.output
35
+ return other.cube.error - self.cube.error > self.threshold * .6
36
+
37
+ def indices(self, dataframe: pd.DataFrame):
38
+ return self.cube.filter_indices(dataframe.iloc[:, :-1])
39
+
40
+ def eligible_children(self, dataframe) -> Iterable[HEx.Node]:
41
+ return [c for c in self.children if c.cube.count(dataframe) > 0]
42
+
43
+ def permanent_children(self, dataframe) -> Iterable[HEx.Node]:
44
+ return [c for c in self.eligible_children(dataframe) if c.gain]
45
+
46
+ def permanent_indices(self, dataframe):
47
+ return np.any([c.cube.filter_indices(dataframe.iloc[:, :-1])
48
+ for c in self.eligible_children(dataframe) if c.gain], axis=0)
49
+
50
+ def update(self, dataframe: pd.DataFrame, predictor, recursive=False):
51
+ if recursive:
52
+ for node in self.children:
53
+ node.update(dataframe, predictor, recursive)
54
+ cleaned = [(c.cube, c.gain) for c in self.eligible_children(dataframe)]
55
+ idx = self.permanent_indices(dataframe)
56
+
57
+ if sum(g for _, g in cleaned) > 0 and sum(self.indices(dataframe)) > sum(idx) and self.gain:
58
+ self.cube.update(dataframe[self.indices(dataframe) & ~idx], predictor)
59
+ return cleaned
60
+
61
+ def linearize(self, dataframe, depth=1):
62
+ children = [c.linearize(dataframe, depth + 1) for c in self.permanent_children(dataframe)]
63
+ return [(cc, dd) for c in children for cc, dd in c if c != []] + \
64
+ [(c, depth) for c in self.permanent_children(dataframe)]
65
+
66
+ def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
67
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
68
+ super().__init__(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
69
+ self._default_surrounding_cube = True
70
+
71
+ def _gain(self, parent_cube: GenericCube, new_cube: GenericCube) -> float:
72
+ if isinstance(parent_cube, ClassificationCube):
73
+ return parent_cube.output != new_cube.output
74
+ return parent_cube.error - new_cube.error > self.threshold * .6
75
+
76
+ def _iterate(self, dataframe: pd.DataFrame):
77
+ fake = dataframe.copy()
78
+ self._surrounding.update(dataframe, self.predictor)
79
+ root = HEx.Node(self._surrounding, threshold=self.threshold)
80
+ current = [root]
81
+
82
+ for iteration in self.grid.iterate():
83
+ next_iteration = []
84
+ for node in current:
85
+ if node.cube.diversity < self.threshold:
86
+ continue
87
+ children, fake = self._cubes_to_split(node.cube, iteration, dataframe, fake, True)
88
+ node.children = [HEx.Node(c, node, threshold=self.threshold) for c in children]
89
+ cleaned = node.update(fake, self.predictor, False)
90
+ node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
91
+ [c for c, _ in cleaned], fake)]
92
+ next_iteration += [n for n in node.children]
93
+
94
+ current = next_iteration.copy()
95
+ _ = root.update(fake, self.predictor, True)
96
+ self._hypercubes = []
97
+ linearized = root.linearize(fake)
98
+ for depth in sorted(np.unique([d for (_, d) in linearized]), reverse=True):
99
+ self._hypercubes += self._merge([c.cube for (c, d) in linearized if d == depth], fake)
100
+
101
+ if len(self._hypercubes) == 0:
102
+ self._hypercubes = [self._surrounding]
103
+ elif not min(np.any([c.filter_indices(dataframe.iloc[:, :-1]) for c in self._hypercubes], axis=0)):
104
+ self._hypercubes = self._hypercubes + [self._surrounding]