psyke 0.8.9.dev48__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. psyke/__init__.py +112 -24
  2. psyke/clustering/__init__.py +4 -0
  3. psyke/clustering/cream/__init__.py +2 -6
  4. psyke/clustering/exact/__init__.py +10 -7
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +6 -2
  7. psyke/extraction/cart/{predictor.py → CartPredictor.py} +52 -7
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +27 -52
  11. psyke/extraction/hypercubic/__init__.py +58 -7
  12. psyke/extraction/hypercubic/creepy/__init__.py +14 -6
  13. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  14. psyke/extraction/hypercubic/gridex/__init__.py +6 -48
  15. psyke/extraction/hypercubic/gridrex/__init__.py +2 -2
  16. psyke/extraction/hypercubic/hypercube.py +33 -26
  17. psyke/extraction/hypercubic/iter/__init__.py +5 -0
  18. psyke/extraction/hypercubic/strategy.py +13 -9
  19. psyke/extraction/real/__init__.py +21 -22
  20. psyke/extraction/real/utils.py +2 -2
  21. psyke/extraction/trepan/__init__.py +19 -15
  22. psyke/genetic/__init__.py +0 -0
  23. psyke/genetic/fgin/__init__.py +74 -0
  24. psyke/genetic/gin/__init__.py +144 -0
  25. psyke/hypercubepredictor.py +4 -2
  26. psyke/tuning/pedro/__init__.py +4 -2
  27. psyke/utils/logic.py +4 -8
  28. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +39 -19
  29. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  30. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  31. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  32. psyke-0.8.9.dev48.dist-info/RECORD +0 -40
  33. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,56 @@
1
+ import copy
2
+ from typing import Union, Any
3
+
4
+ from psyke.extraction.cart import FairTreeClassifier, FairTreeRegressor, LeafSequence, LeafConstraints
5
+ from psyke.extraction.cart.CartPredictor import CartPredictor
6
+ from psyke.schema import LessThan, GreaterThan, SchemaException, Value
7
+
8
+
9
+ class FairTreePredictor(CartPredictor):
10
+ """
11
+ A wrapper for fair decision and regression trees of psyke.
12
+ """
13
+
14
+ def __init__(self, predictor: Union[FairTreeClassifier, FairTreeRegressor] = FairTreeClassifier(),
15
+ discretization=None, normalization=None):
16
+ super().__init__(predictor, discretization, normalization)
17
+
18
+ def __iter__(self) -> LeafSequence:
19
+ leaves = [node for node in self.recurse(self._predictor.root, {})]
20
+ return (leaf for leaf in leaves)
21
+
22
+ @staticmethod
23
+ def merge_constraints(constraints: LeafConstraints, constraint: Value, feature: str):
24
+ if feature in constraints:
25
+ try:
26
+ constraints[feature][-1] *= constraint
27
+ except SchemaException:
28
+ constraints[feature].append(constraint)
29
+ else:
30
+ constraints[feature] = [constraint]
31
+ return constraints
32
+
33
+ def recurse(self, node, constraints) -> Union[LeafSequence, tuple[LeafConstraints, Any]]:
34
+ if node.is_leaf_node():
35
+ return constraints, node.value
36
+
37
+ feature = node.feature
38
+ threshold = node.threshold if self.normalization is None else \
39
+ (node.threshold * self.normalization[feature][1] + self.normalization[feature][0])
40
+
41
+ left = self.recurse(node.left, self.merge_constraints(copy.deepcopy(constraints), LessThan(threshold), feature))
42
+ right = self.recurse(node.right, self.merge_constraints(copy.deepcopy(constraints),
43
+ GreaterThan(threshold), feature))
44
+ return (left if isinstance(left, list) else [left]) + (right if isinstance(right, list) else [right])
45
+
46
+ @property
47
+ def predictor(self) -> Union[FairTreeClassifier, FairTreeRegressor]:
48
+ return self._predictor
49
+
50
+ @property
51
+ def n_leaves(self) -> int:
52
+ return self._predictor.n_leaves
53
+
54
+ @predictor.setter
55
+ def predictor(self, predictor: Union[FairTreeClassifier, FairTreeRegressor]):
56
+ self._predictor = predictor
@@ -3,78 +3,53 @@ from abc import ABC
3
3
  from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
4
4
 
5
5
  from psyke.extraction import PedagogicalExtractor
6
- from psyke.extraction.cart.predictor import CartPredictor, LeafConstraints, LeafSequence
7
6
  from psyke import get_default_random_seed
8
- from psyke.schema import GreaterThan, DiscreteFeature
9
- from psyke.utils.logic import create_variable_list, create_head, create_term
10
- from tuprolog.core import clause, Var, Struct
11
- from tuprolog.theory import Theory, mutable_theory
12
- from typing import Iterable
7
+ from psyke.extraction.cart.FairTree import FairTreeClassifier, FairTreeRegressor
8
+ from psyke.schema import DiscreteFeature, Value
9
+ from tuprolog.theory import Theory
10
+ from typing import Iterable, Any
13
11
  import pandas as pd
14
12
 
15
13
 
16
14
  TREE_SEED = get_default_random_seed()
17
15
 
16
+ LeafConstraints = dict[str, list[Value]]
17
+ LeafSequence = Iterable[tuple[LeafConstraints, Any]]
18
+
18
19
 
19
20
  class Cart(PedagogicalExtractor, ABC):
20
21
 
21
22
  def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None, max_features=None,
22
23
  discretization: Iterable[DiscreteFeature] = None,
23
24
  normalization=None, simplify: bool = True):
25
+ from psyke.extraction.cart.CartPredictor import CartPredictor
26
+
24
27
  super().__init__(predictor, discretization, normalization)
25
- self._cart_predictor = CartPredictor(normalization=normalization)
28
+ self.is_fair = None
29
+ self._cart_predictor = CartPredictor(discretization=discretization, normalization=normalization)
26
30
  self.depth = max_depth
27
31
  self.leaves = max_leaves
28
32
  self.max_features = max_features
29
33
  self._simplify = simplify
30
34
 
31
- def _create_body(self, variables: dict[str, Var], conditions: LeafConstraints) -> Iterable[Struct]:
32
- results = []
33
- for feature_name, cond_list in conditions.items():
34
- for condition in cond_list:
35
- features = [d for d in self.discretization if feature_name in d.admissible_values]
36
- feature: DiscreteFeature = features[0] if len(features) > 0 else None
37
- results.append(create_term(variables[feature_name], condition) if feature is None else
38
- create_term(variables[feature.name],
39
- feature.admissible_values[feature_name],
40
- isinstance(condition, GreaterThan)))
41
- return results
42
-
43
- @staticmethod
44
- def _simplify_nodes(nodes: list) -> Iterable:
45
- simplified = [nodes.pop(0)]
46
- while len(nodes) > 0:
47
- first_node = nodes[0][0]
48
- for k, conditions in first_node.items():
49
- for condition in conditions:
50
- if all(k in node[0] and condition in node[0][k] for node in nodes):
51
- [node[0][k].remove(condition) for node in nodes]
52
- simplified.append(nodes.pop(0))
53
- return [({k: v for k, v in rule.items() if v != []}, prediction) for rule, prediction in simplified]
54
-
55
- def _create_theory(self, data: pd.DataFrame) -> Theory:
56
- new_theory = mutable_theory()
57
- nodes = [node for node in self._cart_predictor]
58
- nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
59
- for (constraints, prediction) in nodes:
60
- if self.normalization is not None and data.columns[-1] in self.normalization:
61
- m, s = self.normalization[data.columns[-1]]
62
- prediction = prediction * s + m
63
- variables = create_variable_list(self.discretization, data)
64
- new_theory.assertZ(
65
- clause(
66
- create_head(data.columns[-1], list(variables.values()), prediction),
67
- self._create_body(variables, constraints)
68
- )
69
- )
70
- return new_theory
71
-
72
35
  def _extract(self, data: pd.DataFrame) -> Theory:
73
- tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
74
- self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
75
- max_leaf_nodes=self.leaves, max_features=self.max_features)
36
+ from psyke.extraction.cart.FairTreePredictor import FairTreePredictor
37
+
38
+ if self.is_fair:
39
+ self._cart_predictor = FairTreePredictor(discretization=self.discretization,
40
+ normalization=self.normalization)
41
+ fair_tree = FairTreeClassifier if isinstance(data.iloc[0, -1], str) else FairTreeRegressor
42
+ self._cart_predictor.predictor = fair_tree(max_depth=self.depth, max_leaves=self.leaves,
43
+ protected_attr=self.is_fair)
44
+ else:
45
+ tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
46
+ self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
47
+ max_leaf_nodes=self.leaves, max_features=self.max_features)
76
48
  self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
77
- return self._create_theory(data)
49
+ return self._cart_predictor.create_theory(data, self._simplify)
50
+
51
+ def make_fair(self, features: Iterable[str]):
52
+ self.is_fair = features
78
53
 
79
54
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
80
55
  return self._cart_predictor.predict(dataframe)
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- import math
4
3
  from abc import ABC
4
+ from collections.abc import Iterable
5
+ from itertools import combinations
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  from sklearn.base import ClassifierMixin
@@ -13,7 +15,7 @@ from psyke.extraction import PedagogicalExtractor
13
15
  from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
14
16
  GenericCube
15
17
  from psyke.hypercubepredictor import HyperCubePredictor
16
- from psyke.schema import Between, Outside, Value
18
+ from psyke.schema import Value
17
19
  from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
18
20
  from psyke.utils import Target
19
21
  from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
@@ -24,19 +26,62 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
24
26
  HyperCubePredictor.__init__(self, output=output, normalization=normalization)
25
27
  PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
26
28
  self._default_surrounding_cube = False
29
+ self.threshold = None
27
30
 
28
- def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
31
+ def _default_cube(self, dimensions=None) -> HyperCube | RegressionCube | ClassificationCube:
29
32
  if self._output == Target.CONSTANT:
30
- return HyperCube()
33
+ return HyperCube(dimensions)
31
34
  if self._output == Target.REGRESSION:
32
- return RegressionCube()
33
- return ClassificationCube()
35
+ return RegressionCube(dimensions)
36
+ return ClassificationCube(dimensions)
37
+
38
+ @staticmethod
39
+ def _find_couples(to_split: Iterable[HyperCube], not_in_cache: set[HyperCube],
40
+ adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
41
+ Iterable[tuple[HyperCube, HyperCube, str]]:
42
+
43
+ for cube1, cube2 in combinations(to_split, 2):
44
+ key = (cube1, cube2) if id(cube1) < id(cube2) else (cube2, cube1)
45
+
46
+ if (cube1 in not_in_cache) or (cube2 in not_in_cache):
47
+ adjacent_cache[key] = cube1.is_adjacent(cube2)
48
+ feature = adjacent_cache.get(key)
49
+ if feature is not None:
50
+ yield cube1, cube2, feature
51
+
52
+ def _evaluate_merge(self, not_in_cache: Iterable[HyperCube], dataframe: pd.DataFrame, feature: str,
53
+ cube: HyperCube, other_cube: HyperCube,
54
+ merge_cache: dict[tuple[HyperCube, HyperCube], HyperCube | None]) -> bool:
55
+ if (cube in not_in_cache) or (other_cube in not_in_cache):
56
+ merged_cube = cube.merge_along_dimension(other_cube, feature)
57
+ merged_cube.update(dataframe, self.predictor)
58
+ merge_cache[(cube, other_cube)] = merged_cube
59
+ return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
60
+ merge_cache[(cube, other_cube)].diversity < self.threshold
34
61
 
35
62
  def _sort_cubes(self):
36
63
  cubes = [(cube.diversity, i, cube) for i, cube in enumerate(self._hypercubes)]
37
64
  cubes.sort()
38
65
  self._hypercubes = [cube[2] for cube in cubes]
39
66
 
67
+ def _merge(self, to_split: list[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
68
+ not_in_cache = set(to_split)
69
+ adjacent_cache = {}
70
+ merge_cache = {}
71
+ while True:
72
+ to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
73
+ HyperCubeExtractor._find_couples(to_split, not_in_cache, adjacent_cache) if
74
+ self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
75
+
76
+ if len(to_merge) == 0:
77
+ break
78
+ best = min(to_merge, key=lambda c: c[1].diversity)
79
+ for cube in best[0]:
80
+ to_split.remove(cube)
81
+ to_split.append(best[1])
82
+ not_in_cache = [best[1]]
83
+ return to_split
84
+
40
85
  def extract(self, dataframe: pd.DataFrame) -> Theory:
41
86
  theory = PedagogicalExtractor.extract(self, dataframe)
42
87
  self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
@@ -209,10 +254,16 @@ class FeatureRanker:
209
254
 
210
255
 
211
256
  class Grid:
212
- def __init__(self, iterations: int = 1, strategy: Strategy | list[Strategy] = FixedStrategy()):
257
+ def __init__(self, iterations: int = 1, strategy: Strategy | Iterable[Strategy] = FixedStrategy()):
213
258
  self.iterations = iterations
214
259
  self.strategy = strategy
215
260
 
261
+ def make_fair(self, features: Iterable[str]):
262
+ if isinstance(self.strategy, Strategy):
263
+ self.strategy.make_fair(features)
264
+ elif isinstance(self.strategy, Iterable):
265
+ [strategy.make_fair(features) for strategy in self.strategy]
266
+
216
267
  def get(self, feature: str, depth: int) -> int:
217
268
  if isinstance(self.strategy, list):
218
269
  return self.strategy[depth].get(feature)
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections import Iterable
4
- import numpy as np
3
+ from collections.abc import Iterable
4
+ from typing import Callable, Any
5
+
5
6
  import pandas as pd
6
7
  from sklearn.base import ClassifierMixin
7
8
  from tuprolog.theory import Theory
@@ -16,16 +17,23 @@ class CReEPy(HyperCubeExtractor):
16
17
  Explanator implementing CReEPy algorithm.
17
18
  """
18
19
 
19
- def __init__(self, predictor, clustering=Clustering.exact, depth: int = 3, error_threshold: float = 0.1,
20
- output: Target = Target.CONSTANT, gauss_components: int = 5, ranks: list[(str, float)] = [],
21
- ignore_threshold: float = 0.0, discretization=None, normalization=None,
22
- seed: int = get_default_random_seed()):
20
+ ClusteringType = Callable[[int, float, Target, int, Any, Any, int], HyperCubeClustering]
21
+
22
+ def __init__(self, predictor, clustering: ClusteringType = Clustering.exact, depth: int = 3,
23
+ error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 5,
24
+ ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0, discretization=None,
25
+ normalization=None, seed: int = get_default_random_seed()):
23
26
  super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
24
27
  discretization, normalization)
25
28
  self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
26
29
  normalization, seed)
27
30
  self._default_surrounding_cube = True
28
31
  self._dimensions_to_ignore = set([dimension for dimension, relevance in ranks if relevance < ignore_threshold])
32
+ self._protected_features = []
33
+
34
+ def make_fair(self, features: Iterable[str]):
35
+ self.clustering.make_fair(features)
36
+ self._dimensions_to_ignore.update(features)
29
37
 
30
38
  def _extract(self, dataframe: pd.DataFrame) -> Theory:
31
39
  if not isinstance(self.clustering, HyperCubeClustering):
@@ -0,0 +1,100 @@
1
+ import itertools
2
+ from typing import Iterable
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.base import ClassifierMixin
7
+ from sklearn.preprocessing import PolynomialFeatures
8
+ from tuprolog.theory import Theory
9
+
10
+ from psyke import get_default_random_seed, Target
11
+ from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube, RegressionCube
12
+
13
+ from deap import base, creator
14
+
15
+ from psyke.genetic.gin import GIn
16
+
17
+
18
+ class GInGER(HyperCubeExtractor):
19
+ """
20
+ Explanator implementing GInGER algorithm.
21
+ """
22
+
23
+ def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
24
+ tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None,
25
+ output: Target = Target.REGRESSION, normalization=None, seed: int = get_default_random_seed()):
26
+ super().__init__(predictor, output=Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
27
+ normalization=normalization)
28
+ self.threshold = threshold
29
+ np.random.seed(seed)
30
+
31
+ self.features = features
32
+ self.max_features = len(features)
33
+ self.sigmas = sigmas
34
+ self.max_slices = max_slices
35
+ self.min_rules = min_rules
36
+ self.poly = max_poly
37
+ self.trained_poly = None
38
+
39
+ self.alpha = alpha
40
+ self.indpb = indpb
41
+ self.tournsize = tournsize
42
+ self.metric = metric
43
+
44
+ self.n_gen = n_gen
45
+ self.n_pop = n_pop
46
+ self.valid = valid
47
+
48
+ creator.create("FitnessMax", base.Fitness, weights=(1.0,))
49
+ creator.create("Individual", list, fitness=creator.FitnessMax)
50
+
51
+ def __poly_names(self):
52
+ return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
53
+ for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
54
+ for p in self.trained_poly.powers_]
55
+
56
+ def _predict(self, dataframe: pd.DataFrame) -> Iterable:
57
+ dataframe = pd.DataFrame(self.trained_poly.fit_transform(dataframe), columns=self.__poly_names())
58
+ return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
59
+
60
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
61
+ best = {}
62
+ for poly in range(self.poly):
63
+ for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
64
+ gr = GIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
65
+ slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha, indpb=self.indpb,
66
+ tournsize=self.tournsize, metric=self.metric, output=self._output, warm=True)
67
+
68
+ b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
69
+ best[(score, poly + 1, slices)] = b
70
+ m = min(best)
71
+ poly, slices, best = m[1], m[2], best[m]
72
+ self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
73
+ transformed = pd.DataFrame(self.trained_poly.fit_transform(dataframe.iloc[:, :-1]), columns=self.__poly_names())
74
+ transformed[dataframe.columns[-1]] = dataframe.iloc[:, -1].values
75
+
76
+ self._surrounding = HyperCube.create_surrounding_cube(transformed, output=self._output)
77
+
78
+ cuts = [sorted(best[sum(slices[:i]):sum(slices[:i + 1])]) for i in range(len(slices))]
79
+
80
+ intervals = [[(transformed[self.features[i]].min(), cut[0])] +
81
+ [(cut[i], cut[i + 1]) for i in range(len(cut) - 1)] +
82
+ [(cut[-1], transformed[self.features[i]].max())] for i, cut in enumerate(cuts)]
83
+
84
+ hypercubes = [{f: iv for f, iv in zip(self.features, combo)} for combo in itertools.product(*intervals)]
85
+ mi_ma = {f: (transformed[f].min(), transformed[f].max()) for f in transformed.columns if f not in self.features}
86
+ self._hypercubes = [self._default_cube({feat: h[feat] if feat in self.features else mi_ma[feat]
87
+ for feat in transformed.columns[:-1]}) for h in hypercubes]
88
+ self._hypercubes = [c for c in self._hypercubes if c.count(transformed) >= 2]
89
+ for c in self._hypercubes:
90
+ for feature in transformed.columns:
91
+ if feature not in self.features:
92
+ for direction in ['+', '-']:
93
+ c.set_infinite(feature, direction)
94
+ c.update(transformed)
95
+ if self.threshold is not None:
96
+ self._hypercubes = self._merge(self._hypercubes, transformed)
97
+ return self._create_theory(transformed)
98
+
99
+ def make_fair(self, features: Iterable[str]):
100
+ self._dimensions_to_ignore.update(features)
@@ -59,7 +59,6 @@ class GridEx(HyperCubeExtractor):
59
59
  def _iterate(self, dataframe: pd.DataFrame):
60
60
  fake = dataframe.copy()
61
61
  prev = [self._surrounding]
62
- next_iteration = []
63
62
 
64
63
  for iteration in self.grid.iterate():
65
64
  next_iteration = []
@@ -67,53 +66,12 @@ class GridEx(HyperCubeExtractor):
67
66
  if cube.count(dataframe) == 0:
68
67
  continue
69
68
  if cube.diversity < self.threshold:
70
- self._hypercubes += [cube]
69
+ self._hypercubes.append(cube)
71
70
  continue
72
71
  to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
73
- next_iteration += [c for c in self._merge(to_split, fake)]
74
- prev = next_iteration.copy()
75
- self._hypercubes += [cube for cube in next_iteration]
72
+ next_iteration.extend(self._merge(to_split, fake))
73
+ prev = next_iteration
74
+ self._hypercubes.extend(prev)
76
75
 
77
- @staticmethod
78
- def _find_couples(to_split: Iterable[HyperCube], not_in_cache: Iterable[HyperCube],
79
- adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
80
- Iterable[tuple[HyperCube, HyperCube, str]]:
81
- checked = []
82
- eligible = []
83
- for cube in to_split:
84
- checked.append(cube)
85
- for other_cube in [c for c in to_split if c not in checked]:
86
- if (cube in not_in_cache) or (other_cube in not_in_cache):
87
- adjacent_cache[(cube, other_cube)] = cube.is_adjacent(other_cube)
88
- adjacent_feature = adjacent_cache[(cube, other_cube)]
89
- eligible.append((cube, other_cube, adjacent_feature))
90
- return [couple for couple in eligible if couple[2] is not None]
91
-
92
- def _evaluate_merge(self, not_in_cache: Iterable[HyperCube],
93
- dataframe: pd.DataFrame, feature: str,
94
- cube: HyperCube, other_cube: HyperCube,
95
- merge_cache: dict[(HyperCube, HyperCube), HyperCube | None]) -> bool:
96
- if (cube in not_in_cache) or (other_cube in not_in_cache):
97
- merged_cube = cube.merge_along_dimension(other_cube, feature)
98
- merged_cube.update(dataframe, self.predictor)
99
- merge_cache[(cube, other_cube)] = merged_cube
100
- return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
101
- merge_cache[(cube, other_cube)].diversity < self.threshold
102
-
103
- def _merge(self, to_split: Iterable[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
104
- not_in_cache = [cube for cube in to_split]
105
- adjacent_cache = {}
106
- merge_cache = {}
107
- cont = True
108
- while cont:
109
- to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
110
- GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
111
- self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
112
- if len(to_merge) == 0:
113
- cont = False
114
- else:
115
- sorted(to_merge, key=lambda c: c[1].diversity)
116
- best = to_merge[0]
117
- to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
118
- not_in_cache = [best[1]]
119
- return to_split
76
+ def make_fair(self, features: Iterable[str]):
77
+ self.grid.make_fair(features)
@@ -5,12 +5,12 @@ from psyke.extraction.hypercubic.gridex import GridEx
5
5
 
6
6
  class GridREx(GridEx):
7
7
  """
8
- Explanator implementing GridREx algorithm.
8
+ Explanator implementing GridREx algorithm, doi:10.24963/kr.2022/57.
9
9
  """
10
10
 
11
11
  def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization,
12
12
  seed=get_default_random_seed()):
13
13
  super().__init__(predictor, grid, min_examples, threshold, Target.REGRESSION, None, normalization, seed)
14
14
 
15
- def _default_cube(self) -> RegressionCube:
15
+ def _default_cube(self, dimensions=None) -> RegressionCube:
16
16
  return RegressionCube()
@@ -143,10 +143,9 @@ class HyperCube:
143
143
  self._default = True
144
144
 
145
145
  def set_infinite(self, dimension: str, direction: str):
146
- if dimension in self._infinite_dimensions:
147
- self._infinite_dimensions[dimension].append(direction)
148
- else:
149
- self._infinite_dimensions[dimension] = [direction]
146
+ if dimension not in self._infinite_dimensions:
147
+ self._infinite_dimensions[dimension] = set()
148
+ self._infinite_dimensions[dimension].add(direction)
150
149
 
151
150
  def copy_infinite_dimensions(self, dimensions: dict[str, str]):
152
151
  self._infinite_dimensions = dimensions.copy()
@@ -260,13 +259,15 @@ class HyperCube:
260
259
  if not self.is_default and value is not None]
261
260
 
262
261
  @staticmethod
263
- def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
264
- output=None) -> GenericCube:
262
+ def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False, output=None,
263
+ features_to_ignore: Iterable[str] = []) -> GenericCube:
265
264
  output = Target.CONSTANT if output is None else output
266
265
  dimensions = {
267
266
  column: (min(dataset[column]) - HyperCube.EPSILON * 2, max(dataset[column]) + HyperCube.EPSILON * 2)
268
267
  for column in dataset.columns[:-1]
269
268
  }
269
+ for column in features_to_ignore:
270
+ dimensions[column] = (-np.inf, np.inf)
270
271
  if closed:
271
272
  if output == Target.CONSTANT:
272
273
  return ClosedCube(dimensions)
@@ -432,14 +433,16 @@ class HyperCube:
432
433
  else:
433
434
  self.update_dimension(feature, (lower, upper))
434
435
 
435
- def update(self, dataset: pd.DataFrame, predictor) -> None:
436
- filtered = self.filter_dataframe(dataset.iloc[:, :-1])
437
- predictions = predictor.predict(filtered)
438
- self._output = np.mean(predictions)
439
- self._diversity = np.std(predictions)
440
- self._error = (abs(predictions - self._output)).mean()
441
- means = filtered.describe().loc['mean']
442
- self._barycenter = Point(means.index.values, means.values)
436
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
437
+ idx = self.filter_indices(dataset.iloc[:, :-1])
438
+ filtered = dataset.iloc[idx, :-1]
439
+ if len(filtered > 0):
440
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
441
+ self._output = np.mean(predictions)
442
+ self._diversity = np.std(predictions)
443
+ self._error = (abs(predictions - self._output)).mean()
444
+ means = filtered.describe().loc['mean']
445
+ self._barycenter = Point(means.index.values, means.values)
443
446
 
444
447
  # TODO: why this is not a property?
445
448
  def init_diversity(self, std: float) -> None:
@@ -450,10 +453,11 @@ class RegressionCube(HyperCube):
450
453
  def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
451
454
  super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
452
455
 
453
- def update(self, dataset: pd.DataFrame, predictor) -> None:
454
- filtered = self.filter_dataframe(dataset.iloc[:, :-1])
456
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
457
+ idx = self.filter_indices(dataset.iloc[:, :-1])
458
+ filtered = dataset.iloc[idx, :-1]
455
459
  if len(filtered > 0):
456
- predictions = predictor.predict(filtered)
460
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
457
461
  self._output.fit(filtered, predictions)
458
462
  self._diversity = self._error = (abs(self._output.predict(filtered) - predictions)).mean()
459
463
  means = filtered.describe().loc['mean']
@@ -471,12 +475,14 @@ class RegressionCube(HyperCube):
471
475
  return new_cube
472
476
 
473
477
  def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
474
- intercept = self.output.intercept_ if normalization is None else unscale(sum(
475
- [-self.output.coef_[i] * normalization[name][0] / normalization[name][1] for i, name in
476
- enumerate(self.dimensions.keys())], self.output.intercept_), list(normalization.keys())[-1])
477
- coefs = self.output.coef_ if normalization is None else [
478
- self.output.coef_[i] / normalization[name][1] * normalization[list(normalization.keys())[-1]][1] for
479
- i, name in enumerate(self.dimensions.keys())
478
+ intercept = self.output.intercept_
479
+ intercept = np.array(intercept).flatten()[0] if isinstance(intercept, Iterable) else intercept
480
+ intercept = intercept if normalization is None else unscale(sum(
481
+ [-self.output.coef_.flatten()[i] * normalization[name][0] / normalization[name][1] for i, name in
482
+ enumerate(self.dimensions.keys())], intercept), list(normalization.keys())[-1])
483
+ coefs = self.output.coef_.flatten() if normalization is None else [
484
+ self.output.coef_.flatten()[i] / normalization[name][1] * normalization[list(normalization.keys())[-1]][1]
485
+ for i, name in enumerate(self.dimensions.keys())
480
486
  ]
481
487
  return list(super().body(variables, ignore, unscale, normalization)) + [linear_function_creator(
482
488
  list(variables.values()), [to_rounded_real(v) for v in coefs], to_rounded_real(intercept)
@@ -487,10 +493,11 @@ class ClassificationCube(HyperCube):
487
493
  def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = ""):
488
494
  super().__init__(dimension=dimension, limits=limits, output=output)
489
495
 
490
- def update(self, dataset: pd.DataFrame, predictor) -> None:
491
- filtered = self.filter_dataframe(dataset.iloc[:, :-1])
496
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
497
+ idx = self.filter_indices(dataset.iloc[:, :-1])
498
+ filtered = dataset.iloc[idx, :-1]
492
499
  if len(filtered > 0):
493
- predictions = predictor.predict(filtered)
500
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
494
501
  self._output = mode(predictions)
495
502
  self._diversity = self._error = 1 - sum(p == self.output for p in predictions) / len(predictions)
496
503
  means = filtered.describe().loc['mean']
@@ -23,6 +23,7 @@ class ITER(HyperCubeExtractor):
23
23
  raise NotImplementedError
24
24
  self.predictor = predictor
25
25
  self.min_update = min_update
26
+ self._init_points = n_points
26
27
  self.n_points = n_points
27
28
  self.max_iterations = max_iterations
28
29
  self.min_examples = min_examples
@@ -33,6 +34,10 @@ class ITER(HyperCubeExtractor):
33
34
  self.seed = seed
34
35
  self.ignore_dimensions = ignore_dimensions if ignore_dimensions is not None else []
35
36
 
37
+ def make_fair(self, features: Iterable[str]):
38
+ self.n_points = self._init_points
39
+ self.ignore_dimensions += list(features)
40
+
36
41
  def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
37
42
  expansions = []
38
43
  for limit in cubes: