psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,14 @@
1
1
  from __future__ import annotations
2
- from random import Random
3
2
  from typing import Iterable
4
3
  import numpy as np
5
4
  import pandas as pd
6
5
  from sklearn.base import ClassifierMixin
7
6
  from tuprolog.theory import Theory
8
- from psyke import PedagogicalExtractor
9
7
  from psyke.extraction.hypercubic import HyperCube, HyperCubeExtractor
10
8
  from psyke.extraction.hypercubic.hypercube import GenericCube
11
9
  from psyke.extraction.hypercubic.utils import MinUpdate, Expansion
12
10
  from psyke.utils import get_default_random_seed, Target
13
11
 
14
- DomainProperties = (Iterable[MinUpdate], GenericCube)
15
-
16
12
 
17
13
  class ITER(HyperCubeExtractor):
18
14
  """
@@ -20,12 +16,14 @@ class ITER(HyperCubeExtractor):
20
16
  """
21
17
 
22
18
  def __init__(self, predictor, min_update, n_points, max_iterations, min_examples, threshold, fill_gaps,
23
- normalization, output: Target = Target.CONSTANT, seed=get_default_random_seed()):
24
- super().__init__(predictor, output, normalization)
19
+ ignore_dimensions: Iterable, normalization, output: Target = Target.CONSTANT,
20
+ seed=get_default_random_seed()):
21
+ super().__init__(predictor, output, normalization=normalization)
25
22
  if output is Target.REGRESSION:
26
23
  raise NotImplementedError
27
24
  self.predictor = predictor
28
25
  self.min_update = min_update
26
+ self._init_points = n_points
29
27
  self.n_points = n_points
30
28
  self.max_iterations = max_iterations
31
29
  self.min_examples = min_examples
@@ -33,13 +31,18 @@ class ITER(HyperCubeExtractor):
33
31
  self.fill_gaps = fill_gaps
34
32
  self._output = Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else \
35
33
  output if output is not None else Target.CONSTANT
36
- self.__generator = Random(seed)
34
+ self.seed = seed
35
+ self.ignore_dimensions = ignore_dimensions if ignore_dimensions is not None else []
36
+
37
+ def make_fair(self, features: Iterable[str]):
38
+ self.n_points = self._init_points
39
+ self.ignore_dimensions += list(features)
37
40
 
38
41
  def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
39
42
  expansions = []
40
43
  for limit in cubes:
41
44
  count = limit.cube.count(dataframe)
42
- dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count, self.__generator)])
45
+ dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count)])
43
46
  limit.cube.update(dataframe, self.predictor)
44
47
  expansions.append(Expansion(
45
48
  limit.cube, limit.feature, limit.direction,
@@ -50,24 +53,21 @@ class ITER(HyperCubeExtractor):
50
53
  return sorted(expansions, key=lambda e: e.distance)[0]
51
54
  return None
52
55
 
53
- def _calculate_min_updates(self, surrounding: GenericCube) -> Iterable[MinUpdate]:
56
+ def _calculate_min_updates(self) -> Iterable[MinUpdate]:
54
57
  return [MinUpdate(name, (interval[1] - interval[0]) * self.min_update) for (name, interval) in
55
- surrounding.dimensions.items()]
58
+ self._surrounding.dimensions.items()]
56
59
 
57
- @staticmethod
58
- def _create_range(cube: GenericCube, domain: DomainProperties, feature: str, direction: str)\
60
+ def _create_range(self, cube: GenericCube, min_updates: Iterable[MinUpdate], feature: str, direction: str)\
59
61
  -> tuple[GenericCube, tuple[float, float]]:
60
- min_updates, surrounding = domain
61
62
  a, b = cube[feature]
62
63
  size = [min_update for min_update in min_updates if min_update.name == feature][0].value
63
- return (cube.copy(), (max(a - size, surrounding.get_first(feature)), a)
64
- if direction == '-' else (b, min(b + size, surrounding.get_second(feature))))
64
+ return (cube.copy(), (max(a - size, self._surrounding.get_first(feature)), a)
65
+ if direction == '-' else (b, min(b + size, self._surrounding.get_second(feature))))
65
66
 
66
- @staticmethod
67
- def _create_temp_cube(cube: GenericCube, domain: DomainProperties,
67
+ def _create_temp_cube(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
68
68
  hypercubes: Iterable[GenericCube], feature: str,
69
69
  direction: str) -> Iterable[Expansion]:
70
- temp_cube, values = ITER._create_range(cube, domain, feature, direction)
70
+ temp_cube, values = self._create_range(cube, min_updates, feature, direction)
71
71
  temp_cube.update_dimension(feature, values)
72
72
  overlap = temp_cube.overlap(hypercubes)
73
73
  while (overlap is not None) & (temp_cube.has_volume()):
@@ -77,23 +77,24 @@ class ITER(HyperCubeExtractor):
77
77
  else:
78
78
  cube.add_limit(feature, direction)
79
79
 
80
- @staticmethod
81
- def _create_temp_cubes(cube: GenericCube, domain: DomainProperties,
80
+ def _create_temp_cubes(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
82
81
  hypercubes: Iterable[GenericCube]) -> Iterable[Expansion]:
83
82
  tmp_cubes = []
84
- for feature in domain[1].dimensions.keys():
83
+ for feature in self._surrounding.dimensions.keys():
84
+ if feature in self.ignore_dimensions:
85
+ continue
85
86
  limit = cube.check_limits(feature)
86
87
  if limit == '*':
87
88
  continue
88
89
  for x in {'-', '+'} - {limit}:
89
- tmp_cubes += ITER._create_temp_cube(cube, domain, hypercubes, feature, x)
90
+ tmp_cubes += self._create_temp_cube(cube, min_updates, hypercubes, feature, x)
90
91
  return tmp_cubes
91
92
 
92
93
  def _cubes_to_update(self, dataframe: pd.DataFrame, to_expand: Iterable[GenericCube],
93
- hypercubes: Iterable[GenericCube], domain: DomainProperties) \
94
+ hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate]) \
94
95
  -> Iterable[tuple[GenericCube, Expansion]]:
95
96
  results = [(hypercube, self._best_cube(dataframe, hypercube, self._create_temp_cubes(
96
- hypercube, domain, hypercubes))) for hypercube in to_expand]
97
+ hypercube, min_updates, hypercubes))) for hypercube in to_expand]
97
98
  return sorted([result for result in results if result[1] is not None], key=lambda x: x[1].distance)
98
99
 
99
100
  def _expand_or_create(self, cube: GenericCube, expansion: Expansion, hypercubes: Iterable[GenericCube]) -> None:
@@ -103,7 +104,7 @@ class ITER(HyperCubeExtractor):
103
104
  cube.expand(expansion, hypercubes)
104
105
 
105
106
  @staticmethod
106
- def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, tuple]:
107
+ def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, float]:
107
108
  if isinstance(output, str):
108
109
  close_sample = dataframe[dataframe.iloc[:, -1] == output].iloc[0].to_dict()
109
110
  else:
@@ -126,36 +127,34 @@ class ITER(HyperCubeExtractor):
126
127
  return [HyperCube.cube_from_point(ITER._find_closer_sample(dataframe, point), output=self._output)
127
128
  for point in points]
128
129
 
129
- def _initialize(self, dataframe: pd.DataFrame) -> tuple[Iterable[GenericCube], DomainProperties]:
130
+ def _initialize(self, dataframe: pd.DataFrame) -> Iterable[MinUpdate]:
130
131
  self._fake_dataframe = dataframe.copy()
131
- surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
132
- min_updates = self._calculate_min_updates(surrounding)
133
- self._hypercubes = self._init_hypercubes(dataframe, min_updates, surrounding)
132
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
133
+ min_updates = self._calculate_min_updates()
134
+ self._init_hypercubes(dataframe, min_updates)
134
135
  for hypercube in self._hypercubes:
135
136
  hypercube.update(dataframe, self.predictor)
136
- return self._hypercubes, (min_updates, surrounding)
137
-
138
- def _init_hypercubes(
139
- self,
140
- dataframe: pd.DataFrame,
141
- min_updates: Iterable[MinUpdate],
142
- surrounding: GenericCube
143
- ) -> Iterable[GenericCube]:
137
+ return min_updates
138
+
139
+ def _init_hypercubes(self, dataframe: pd.DataFrame, min_updates: Iterable[MinUpdate]):
144
140
  while True:
145
141
  hypercubes = self._generate_starting_points(dataframe)
146
142
  for hypercube in hypercubes:
147
- hypercube.expand_all(min_updates, surrounding)
143
+ hypercube.expand_all(min_updates, self._surrounding)
144
+ for d in self.ignore_dimensions:
145
+ hypercube[d] = self._surrounding[d]
148
146
  self.n_points = self.n_points - 1
149
147
  if not HyperCube.check_overlap(hypercubes, hypercubes):
150
148
  break
151
- return hypercubes
149
+ self._hypercubes = hypercubes
152
150
 
153
- def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], domain: DomainProperties,
151
+ def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate],
154
152
  left_iteration: int) -> int:
153
+ np.random.seed(self.seed)
155
154
  iterations = 0
156
155
  to_expand = [cube for cube in hypercubes if cube.limit_count < (len(dataframe.columns) - 1) * 2]
157
156
  while (len(to_expand) > 0) and (iterations < left_iteration):
158
- updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, domain))
157
+ updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, min_updates))
159
158
  if len(updates) > 0:
160
159
  self._expand_or_create(updates[0][0], updates[0][1], hypercubes)
161
160
  iterations += 1
@@ -170,13 +169,13 @@ class ITER(HyperCubeExtractor):
170
169
  min(overlapping_cube.get_first(feature), b) if direction == '+' else b)
171
170
  return cube.overlap(hypercubes)
172
171
 
173
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
174
- self._hypercubes, domain = self._initialize(dataframe)
172
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
173
+ min_updates = self._initialize(dataframe)
175
174
  temp_train = dataframe.copy()
176
175
  fake = dataframe.copy()
177
176
  iterations = 0
178
177
  while temp_train.shape[0] > 0:
179
- iterations += self._iterate(fake, self._hypercubes, domain, self.max_iterations - iterations)
178
+ iterations += self._iterate(fake, self._hypercubes, min_updates, self.max_iterations - iterations)
180
179
  if (iterations >= self.max_iterations) or (not self.fill_gaps):
181
180
  break
182
181
  temp_train = temp_train.iloc[[p is None for p in self.predict(temp_train.iloc[:, :-1])]]
@@ -188,9 +187,9 @@ class ITER(HyperCubeExtractor):
188
187
  if not new_cube.has_volume():
189
188
  break
190
189
  new_cube = HyperCube.cube_from_point(point, self._output)
191
- new_cube.expand_all(domain[0], domain[1], ratio)
190
+ new_cube.expand_all(min_updates, self._surrounding, ratio)
192
191
  overlap = new_cube.overlap(self._hypercubes)
193
192
  ratio *= 2
194
193
  if new_cube.has_volume():
195
194
  self._hypercubes += [new_cube]
196
- return self._create_theory(dataframe, sort)
195
+ return self._create_theory(dataframe)
@@ -1,16 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from functools import reduce
4
- from typing import Iterable
4
+ from collections.abc import Iterable
5
5
 
6
6
 
7
7
  class Strategy:
8
- def __init__(self):
9
- self._partitions = None
8
+ def __init__(self, partitions = None):
9
+ self._partitions = partitions
10
+ self._no_features = []
10
11
 
11
12
  def get(self, feature: str) -> int:
12
13
  raise NotImplementedError
13
14
 
15
+ def make_fair(self, features: Iterable[str]):
16
+ self._no_features = features
17
+
14
18
  def partition_number(self, features: Iterable[str]) -> int:
15
19
  return reduce(lambda x, y: x * y, map(self.get, features), 1)
16
20
 
@@ -29,23 +33,23 @@ class Strategy:
29
33
 
30
34
  class FixedStrategy(Strategy):
31
35
  def __init__(self, partitions: int = 2):
32
- super().__init__()
33
- self._partitions = partitions
36
+ super().__init__(partitions)
34
37
 
35
38
  def get(self, feature: str) -> int:
36
- return self._partitions
39
+ return 1 if feature in self._no_features else self._partitions
37
40
 
38
41
  def __str__(self):
39
42
  return "Fixed ({})".format(super().__str__())
40
43
 
41
44
 
42
45
  class AdaptiveStrategy(Strategy):
43
- def __init__(self, features: Iterable[str], partitions: Iterable[tuple[float, float]] | None = None):
44
- super().__init__()
46
+ def __init__(self, features: Iterable[(str, float)], partitions: Iterable[tuple[float, float]] | None = None):
47
+ super().__init__(partitions if partitions is not None else [(0.33, 2), (0.67, 3)])
45
48
  self.features = features
46
- self._partitions = partitions if partitions is not None else [(0.33, 2), (0.67, 3)]
47
49
 
48
50
  def get(self, feature: str) -> int:
51
+ if feature in self._no_features:
52
+ return 1
49
53
  importance = next(filter(lambda t: t[0] == feature, self.features))[1]
50
54
  n = 1
51
55
  for (imp, part) in self._partitions:
@@ -1,6 +1,6 @@
1
1
  from functools import lru_cache
2
+ from psyke.extraction import PedagogicalExtractor
2
3
  from psyke.extraction.real.utils import Rule, IndexedRuleSet
3
- from psyke import PedagogicalExtractor
4
4
  from psyke.schema import DiscreteFeature
5
5
  from psyke.utils.dataframe import HashableDataFrame
6
6
  from psyke.utils.logic import create_term, create_head, create_variable_list
@@ -15,13 +15,12 @@ class REAL(PedagogicalExtractor):
15
15
  """
16
16
  Explanator implementing Rule Extraction As Learning (REAL) algorithm, doi:10.1016/B978-1-55860-335-6.50013-1.
17
17
  The algorithm is sensible to features' order in the provided dataset during extraction.
18
- To make it reproducible the features are internally sorted (alphabetically).
19
18
  """
20
19
 
21
20
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature]):
22
21
  super().__init__(predictor, discretization)
22
+ self._ignore_feature = []
23
23
  self._ruleset: IndexedRuleSet = IndexedRuleSet()
24
- self._output_mapping = {}
25
24
 
26
25
  @property
27
26
  def n_rules(self):
@@ -31,7 +30,7 @@ class REAL(PedagogicalExtractor):
31
30
  new_rule = self._rule_from_example(sample)
32
31
  return any([new_rule in rule for rule in rules])
33
32
 
34
- def _create_body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
33
+ def _body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
35
34
  result = []
36
35
  for predicates, truth_value in zip(rule.to_lists(), [True, False]):
37
36
  for predicate in predicates:
@@ -40,28 +39,25 @@ class REAL(PedagogicalExtractor):
40
39
  return result
41
40
 
42
41
  def _create_clause(self, dataset: pd.DataFrame, variables: dict[str, Var], key: int, rule: Rule) -> Clause:
43
- head = create_head(dataset.columns[-1],
44
- sorted(list(variables.values())),
45
- str(sorted(list(set(dataset.iloc[:, -1])))[key]))
46
- return clause(head, self._create_body(variables, rule))
42
+ return clause(create_head(dataset.columns[-1], list(variables.values()), key), self._body(variables, rule))
47
43
 
48
44
  def _create_new_rule(self, sample: pd.Series) -> Rule:
49
45
  rule = self._rule_from_example(sample)
50
46
  return self._generalise(rule, sample)
51
47
 
52
48
  def _create_ruleset(self, dataset: pd.DataFrame) -> IndexedRuleSet:
53
- ruleset = IndexedRuleSet.create_indexed_ruleset(dataset)
54
- for index, sample in dataset.iloc[:, :-1].iterrows():
49
+ ruleset = IndexedRuleSet.create_indexed_ruleset(sorted(set(dataset.iloc[:, -1])))
50
+ for _, sample in dataset.iloc[:, :-1].iterrows():
55
51
  prediction = list(self.predictor.predict(sample.to_frame().transpose()))[0]
56
- rules = ruleset.get(self._output_mapping[prediction])
52
+ rules = ruleset.get(prediction)
57
53
  if not self._covers(sample, rules):
58
54
  rules.append(self._create_new_rule(sample))
59
55
  return ruleset.optimize()
60
56
 
61
- def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet, sort: bool = True) -> MutableTheory:
57
+ def _create_theory(self, dataset: pd.DataFrame) -> MutableTheory:
62
58
  theory = mutable_theory()
63
- for key, rule in ruleset.flatten():
64
- variables = create_variable_list(self.discretization, sort=sort)
59
+ for key, rule in self._ruleset.flatten():
60
+ variables = create_variable_list(self.discretization)
65
61
  theory.assertZ(self._create_clause(dataset, variables, key, rule))
66
62
  return theory
67
63
 
@@ -92,16 +88,22 @@ class REAL(PedagogicalExtractor):
92
88
  return self._create_ruleset(dataset)
93
89
 
94
90
  def _internal_predict(self, sample: pd.Series):
95
- x = [index for index, rule in self._ruleset.flatten() if REAL._rule_from_example(sample) in rule]
96
- reverse_mapping = dict((v, k) for k, v in self._output_mapping.items())
97
- return reverse_mapping[x[0]] if len(x) > 0 else None
91
+ x = [index for index, rule in self._ruleset.flatten() if self._rule_from_example(sample) in rule]
92
+ return x[0] if x else None
98
93
 
99
- @staticmethod
100
- def _rule_from_example(sample: pd.Series) -> Rule:
94
+ def make_fair(self, features: Iterable[str]):
95
+ self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
96
+ if self.discretization else [features]
97
+ self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
98
+ self._get_or_set.cache_clear()
99
+
100
+ def _rule_from_example(self, sample: pd.Series) -> Rule:
101
101
  true_predicates, false_predicates = [], []
102
102
  for feature, value in sample.items():
103
+ if feature in self._ignore_feature:
104
+ continue
103
105
  true_predicates.append(str(feature)) if value == 1 else false_predicates.append(str(feature))
104
- return Rule(sorted(true_predicates), sorted(false_predicates))
106
+ return Rule(true_predicates, false_predicates)
105
107
 
106
108
  def _subset(self, samples: pd.DataFrame, predicate: str) -> (pd.DataFrame, bool):
107
109
  samples_0 = samples.copy()
@@ -111,16 +113,9 @@ class REAL(PedagogicalExtractor):
111
113
  samples_all = samples_0.append(samples_1)
112
114
  return samples_all, len(set(self.predictor.predict(samples_all))) == 1
113
115
 
114
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
115
- # Order the dataset by column to preserve reproducibility.
116
- dataframe = dataframe.sort_values(by=list(dataframe.columns.values), ascending=False)
117
- # Always perform output mapping in the same (sorted) way to preserve reproducibility.
118
- if mapping is None:
119
- self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
120
- else:
121
- self._output_mapping = {value: index for index, value in enumerate(sorted(set(mapping[dataframe.iloc[:, -1]])))}
116
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
122
117
  self._ruleset = self._get_or_set(HashableDataFrame(dataframe))
123
- return self._create_theory(dataframe, self._ruleset, sort)
118
+ return self._create_theory(dataframe)
124
119
 
125
120
  def _predict(self, dataframe) -> Iterable:
126
121
  return np.array([self._internal_predict(data.transpose()) for _, data in dataframe.iterrows()])
@@ -49,5 +49,5 @@ class IndexedRuleSet(dict[int, list[Rule]]):
49
49
  ]
50
50
 
51
51
  @staticmethod
52
- def create_indexed_ruleset(dataset: pd.DataFrame) -> IndexedRuleSet:
53
- return IndexedRuleSet({index: [] for index, _ in enumerate(set(dataset.iloc[:, -1]))})
52
+ def create_indexed_ruleset(indices: Iterable) -> IndexedRuleSet:
53
+ return IndexedRuleSet({i: [] for i in indices})
@@ -1,6 +1,7 @@
1
1
  import numpy as np
2
+ from psyke.extraction import PedagogicalExtractor
2
3
  from psyke.extraction.trepan.utils import Node, Split, SplitLogic
3
- from psyke import DiscreteFeature, PedagogicalExtractor
4
+ from psyke import DiscreteFeature
4
5
  from psyke.utils.logic import create_term, create_variable_list, create_head
5
6
  from psyke.utils.sorted import SortedList
6
7
  from tuprolog.core import Var, Struct, clause
@@ -14,11 +15,17 @@ class Trepan(PedagogicalExtractor):
14
15
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature], min_examples: int = 0, max_depth: int = 3,
15
16
  split_logic: SplitLogic = SplitLogic.DEFAULT):
16
17
  super().__init__(predictor, discretization)
18
+ self._ignore_feature = []
17
19
  self.min_examples = min_examples
18
20
  self.max_depth = max_depth
19
21
  self.split_logic = split_logic
20
22
  self._root: Node
21
23
 
24
+ def make_fair(self, features: Iterable[str]):
25
+ self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
26
+ if self.discretization else [features]
27
+ self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
28
+
22
29
  @property
23
30
  def n_rules(self):
24
31
  return sum(1 for _ in self._root)
@@ -28,7 +35,7 @@ class Trepan(PedagogicalExtractor):
28
35
  raise NotImplementedError()
29
36
  if node.n_classes == 1:
30
37
  return None
31
- splits = Trepan._create_splits(node, names)
38
+ splits = self._create_splits(node, names)
32
39
  return None if len(splits) == 0 or splits[0].children[0].depth > self.max_depth else splits[0].children
33
40
 
34
41
  def _compact(self):
@@ -54,28 +61,26 @@ class Trepan(PedagogicalExtractor):
54
61
  def _create_split(node: Node, column: str) -> Union[Split, None]:
55
62
  true_examples = Trepan._create_samples(node, column, 1.0)
56
63
  false_examples = Trepan._create_samples(node, column, 0.0)
57
- true_constrains = list(node.constraints) + [(column, 1.0)]
58
- false_constrains = list(node.constraints) + [(column, 0.0)]
59
- true_node = Node(true_examples, node.n_examples, true_constrains, depth=node.depth + 1)\
64
+ true_constraints = list(node.constraints) + [(column, 1.0)]
65
+ false_constraints = list(node.constraints) + [(column, 0.0)]
66
+ true_node = Node(true_examples, node.n_examples, true_constraints, depth=node.depth + 1) \
60
67
  if true_examples.shape[0] > 0 else None
61
- false_node = Node(false_examples, node.n_examples, false_constrains, depth=node.depth + 1)\
68
+ false_node = Node(false_examples, node.n_examples, false_constraints, depth=node.depth + 1) \
62
69
  if false_examples.shape[0] > 0 else None
63
70
  return None if true_node is None or false_node is None else Split(node, (true_node, false_node))
64
71
 
65
- @staticmethod
66
- def _create_splits(node: Node, names: Iterable[str]) -> SortedList[Split]:
67
- splits, constrains = Trepan._init_splits(node)
68
- for column in names:
69
- if column not in constrains:
70
- split = Trepan._create_split(node, column)
71
- if split is not None:
72
- splits.add(split)
72
+ def _create_splits(self, node: Node, names: Iterable[str]) -> SortedList[Split]:
73
+ splits, constraints = Trepan._init_splits(node)
74
+ for column in [column for column in names if column not in list(constraints) + self._ignore_feature]:
75
+ split = Trepan._create_split(node, column)
76
+ if split is not None:
77
+ splits.add(split)
73
78
  return splits
74
79
 
75
- def _create_theory(self, name: str, sort: bool = True) -> MutableTheory:
80
+ def _create_theory(self, name: str) -> MutableTheory:
76
81
  theory = mutable_theory()
77
82
  for node in self._root:
78
- variables = create_variable_list(self.discretization, sort=sort)
83
+ variables = create_variable_list(self.discretization)
79
84
  theory.assertZ(
80
85
  clause(
81
86
  create_head(name, list(variables.values()), str(node.dominant)),
@@ -116,7 +121,7 @@ class Trepan(PedagogicalExtractor):
116
121
  continue
117
122
  if not skip:
118
123
  return Trepan._internal_predict(x, child, categories)
119
- return node.dominant # Alternatively node.dominant index in categories
124
+ return node.dominant
120
125
 
121
126
  def _optimize(self) -> None:
122
127
  n, nodes = 0, [self._root]
@@ -135,7 +140,7 @@ class Trepan(PedagogicalExtractor):
135
140
  nodes.append(child)
136
141
  return len(to_remove)
137
142
 
138
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
143
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
139
144
  queue = self._init(dataframe)
140
145
  while len(queue) > 0:
141
146
  node = queue.pop()
@@ -148,7 +153,7 @@ class Trepan(PedagogicalExtractor):
148
153
  queue.add_all(best)
149
154
  node.children += list(best)
150
155
  self._optimize()
151
- return self._create_theory(dataframe.columns[-1], sort)
156
+ return self._create_theory(dataframe.columns[-1])
152
157
 
153
158
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
154
159
  return np.array(
File without changes
@@ -0,0 +1,74 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from psyke import Target
5
+ from psyke.genetic.gin import GIn
6
+
7
+ import skfuzzy as skf
8
+
9
+
10
+ class FGIn(GIn):
11
+
12
+ def __init__(self, train, valid, features, sigmas, slices, min_rules=1, poly=1, alpha=0.5, indpb=0.5, tournsize=3,
13
+ metric='R2', output=Target.REGRESSION, warm=False):
14
+ super().__init__(train, valid, features, sigmas, slices, min_rules, poly, alpha, indpb, tournsize,
15
+ metric, output, warm)
16
+ self.feature_to_idx = {f: i for i, f in enumerate(self.X.columns)}
17
+
18
+ def _evaluate(self, individual=None):
19
+ y_pred, valid_regions = self.__predict(individual or self.best, self.X if self.valid is None else self.valid[0])
20
+ if valid_regions < self.min_rules:
21
+ return -9999,
22
+ return self._score(self.y if self.valid is None else self.valid[1], y_pred),
23
+
24
+ @staticmethod
25
+ def __generate_membership(var, domain, thresholds, shape='tri'):
26
+ th = [var.min()] + [min(max(t, var.min()), var.max()) for t in thresholds] + [var.max()]
27
+
28
+ if shape == 'tri':
29
+ mid = [(x1 + x2) / 2 for x1, x2 in zip(th[:-1], th[1:])]
30
+ return [skf.trapmf(domain, [domain.min()] * 2 + mid[:2])] + \
31
+ [skf.trimf(domain, [x1, x2, x3]) for x1, x2, x3 in zip(mid[:-2], mid[1:-1], mid[2:])] + \
32
+ [skf.trapmf(domain, mid[-2:] + [domain.max()] * 2)]
33
+ if shape == 'trap':
34
+ beg = [None, domain.min()] + [(3 * x1 + x2) / 4 for x1, x2 in zip(th[1:-1], th[2:])] + [domain.max()]
35
+ end = [domain.min()] + [(x1 + 3 * x2) / 4 for x1, x2 in zip(th[:-2], th[1:-1])] + [domain.max()]
36
+ return [skf.trapmf(domain, [end[i - 1], beg[i], end[i], beg[i + 1]]) for i in range(1, len(th))]
37
+ raise ValueError('Supported shape values are only \'tri\' and \'trap\'')
38
+
39
+ @staticmethod
40
+ def __extend_domain(x, q_low=0.05, q_high=0.95, p=0.05, k_sigma=2.0, abs_min_margin=0.0):
41
+ ql, qh = np.quantile(x, [q_low, q_high])
42
+ margin = max(p * (qh - ql), k_sigma * np.std(x), abs_min_margin)
43
+ return np.array([ql - margin, qh + margin])
44
+
45
+ def __get_activations(self, x, functions_domains, valid_masks):
46
+ levels = [np.array([skf.interp_membership(domain, mf, x[index]) for mf in mfs])
47
+ for mfs, domain, index in functions_domains.values()]
48
+ return np.prod(np.meshgrid(*levels, indexing='ij'), axis=0).ravel()[valid_masks]
49
+
50
+ def __fuzzify(self, cuts):
51
+ cuts = dict(zip(self.features, cuts))
52
+ doms = {c: FGIn.__extend_domain(self.X[c]) for c in self.features}
53
+ return {c: (FGIn.__generate_membership(self.X[c], doms[c], cuts[c], 'trap'), doms[c],
54
+ self.feature_to_idx[c]) for c in self.features}
55
+
56
+ def __predict(self, individual=None, to_pred=None):
57
+ cuts = self._get_cuts(individual or self.best)
58
+ masks = np.array([self._region(to_pred, cuts) == r for r in range(np.prod([s + 1 for s in self.slices]))])
59
+ valid_masks = masks.sum(axis=1) >= 3
60
+
61
+ masks = [mask for mask in masks if mask.sum() >= 3]
62
+ functions_domains = self.__fuzzify(cuts)
63
+
64
+ pred = np.array([self._output_estimation(mask, to_pred) for mask in masks]).T
65
+ activations = np.array([self.__get_activations(x, functions_domains, valid_masks) for x in to_pred.values])
66
+
67
+ if self.output == Target.CLASSIFICATION:
68
+ classes, idx = np.unique(pred, return_inverse=True)
69
+ pred = classes[np.argmax(np.vstack([activations[:, idx == i].sum(axis=1) for i, c in enumerate(classes)]),
70
+ axis=0)]
71
+ else:
72
+ pred = (pred * activations).sum(axis=1)
73
+
74
+ return pd.DataFrame(pred, index=to_pred.index), len(masks)