psyke 0.8.9.dev48__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. psyke/__init__.py +112 -24
  2. psyke/clustering/__init__.py +4 -0
  3. psyke/clustering/cream/__init__.py +2 -6
  4. psyke/clustering/exact/__init__.py +10 -7
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +6 -2
  7. psyke/extraction/cart/{predictor.py → CartPredictor.py} +52 -7
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +27 -52
  11. psyke/extraction/hypercubic/__init__.py +58 -7
  12. psyke/extraction/hypercubic/creepy/__init__.py +14 -6
  13. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  14. psyke/extraction/hypercubic/gridex/__init__.py +6 -48
  15. psyke/extraction/hypercubic/gridrex/__init__.py +2 -2
  16. psyke/extraction/hypercubic/hypercube.py +33 -26
  17. psyke/extraction/hypercubic/iter/__init__.py +5 -0
  18. psyke/extraction/hypercubic/strategy.py +13 -9
  19. psyke/extraction/real/__init__.py +21 -22
  20. psyke/extraction/real/utils.py +2 -2
  21. psyke/extraction/trepan/__init__.py +19 -15
  22. psyke/genetic/__init__.py +0 -0
  23. psyke/genetic/fgin/__init__.py +74 -0
  24. psyke/genetic/gin/__init__.py +144 -0
  25. psyke/hypercubepredictor.py +4 -2
  26. psyke/tuning/pedro/__init__.py +4 -2
  27. psyke/utils/logic.py +4 -8
  28. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +39 -19
  29. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  30. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  31. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  32. psyke-0.8.9.dev48.dist-info/RECORD +0 -40
  33. {psyke-0.8.9.dev48.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from functools import reduce
4
- from typing import Iterable
4
+ from collections.abc import Iterable
5
5
 
6
6
 
7
7
  class Strategy:
8
- def __init__(self):
9
- self._partitions = None
8
+ def __init__(self, partitions = None):
9
+ self._partitions = partitions
10
+ self._no_features = []
10
11
 
11
12
  def get(self, feature: str) -> int:
12
13
  raise NotImplementedError
13
14
 
15
+ def make_fair(self, features: Iterable[str]):
16
+ self._no_features = features
17
+
14
18
  def partition_number(self, features: Iterable[str]) -> int:
15
19
  return reduce(lambda x, y: x * y, map(self.get, features), 1)
16
20
 
@@ -29,23 +33,23 @@ class Strategy:
29
33
 
30
34
  class FixedStrategy(Strategy):
31
35
  def __init__(self, partitions: int = 2):
32
- super().__init__()
33
- self._partitions = partitions
36
+ super().__init__(partitions)
34
37
 
35
38
  def get(self, feature: str) -> int:
36
- return self._partitions
39
+ return 1 if feature in self._no_features else self._partitions
37
40
 
38
41
  def __str__(self):
39
42
  return "Fixed ({})".format(super().__str__())
40
43
 
41
44
 
42
45
  class AdaptiveStrategy(Strategy):
43
- def __init__(self, features: Iterable[str], partitions: Iterable[tuple[float, float]] | None = None):
44
- super().__init__()
46
+ def __init__(self, features: Iterable[(str, float)], partitions: Iterable[tuple[float, float]] | None = None):
47
+ super().__init__(partitions if partitions is not None else [(0.33, 2), (0.67, 3)])
45
48
  self.features = features
46
- self._partitions = partitions if partitions is not None else [(0.33, 2), (0.67, 3)]
47
49
 
48
50
  def get(self, feature: str) -> int:
51
+ if feature in self._no_features:
52
+ return 1
49
53
  importance = next(filter(lambda t: t[0] == feature, self.features))[1]
50
54
  n = 1
51
55
  for (imp, part) in self._partitions:
@@ -15,13 +15,12 @@ class REAL(PedagogicalExtractor):
15
15
  """
16
16
  Explanator implementing Rule Extraction As Learning (REAL) algorithm, doi:10.1016/B978-1-55860-335-6.50013-1.
17
17
  The algorithm is sensible to features' order in the provided dataset during extraction.
18
- To make it reproducible the features are internally sorted (alphabetically).
19
18
  """
20
19
 
21
20
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature]):
22
21
  super().__init__(predictor, discretization)
22
+ self._ignore_feature = []
23
23
  self._ruleset: IndexedRuleSet = IndexedRuleSet()
24
- self._output_mapping = {}
25
24
 
26
25
  @property
27
26
  def n_rules(self):
@@ -31,7 +30,7 @@ class REAL(PedagogicalExtractor):
31
30
  new_rule = self._rule_from_example(sample)
32
31
  return any([new_rule in rule for rule in rules])
33
32
 
34
- def _create_body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
33
+ def _body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
35
34
  result = []
36
35
  for predicates, truth_value in zip(rule.to_lists(), [True, False]):
37
36
  for predicate in predicates:
@@ -40,27 +39,24 @@ class REAL(PedagogicalExtractor):
40
39
  return result
41
40
 
42
41
  def _create_clause(self, dataset: pd.DataFrame, variables: dict[str, Var], key: int, rule: Rule) -> Clause:
43
- head = create_head(dataset.columns[-1],
44
- sorted(list(variables.values())),
45
- str(sorted(list(set(dataset.iloc[:, -1])))[key]))
46
- return clause(head, self._create_body(variables, rule))
42
+ return clause(create_head(dataset.columns[-1], list(variables.values()), key), self._body(variables, rule))
47
43
 
48
44
  def _create_new_rule(self, sample: pd.Series) -> Rule:
49
45
  rule = self._rule_from_example(sample)
50
46
  return self._generalise(rule, sample)
51
47
 
52
48
  def _create_ruleset(self, dataset: pd.DataFrame) -> IndexedRuleSet:
53
- ruleset = IndexedRuleSet.create_indexed_ruleset(dataset)
54
- for index, sample in dataset.iloc[:, :-1].iterrows():
49
+ ruleset = IndexedRuleSet.create_indexed_ruleset(sorted(set(dataset.iloc[:, -1])))
50
+ for _, sample in dataset.iloc[:, :-1].iterrows():
55
51
  prediction = list(self.predictor.predict(sample.to_frame().transpose()))[0]
56
- rules = ruleset.get(self._output_mapping[prediction])
52
+ rules = ruleset.get(prediction)
57
53
  if not self._covers(sample, rules):
58
54
  rules.append(self._create_new_rule(sample))
59
55
  return ruleset.optimize()
60
56
 
61
- def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet) -> MutableTheory:
57
+ def _create_theory(self, dataset: pd.DataFrame) -> MutableTheory:
62
58
  theory = mutable_theory()
63
- for key, rule in ruleset.flatten():
59
+ for key, rule in self._ruleset.flatten():
64
60
  variables = create_variable_list(self.discretization)
65
61
  theory.assertZ(self._create_clause(dataset, variables, key, rule))
66
62
  return theory
@@ -92,16 +88,22 @@ class REAL(PedagogicalExtractor):
92
88
  return self._create_ruleset(dataset)
93
89
 
94
90
  def _internal_predict(self, sample: pd.Series):
95
- x = [index for index, rule in self._ruleset.flatten() if REAL._rule_from_example(sample) in rule]
96
- reverse_mapping = dict((v, k) for k, v in self._output_mapping.items())
97
- return reverse_mapping[x[0]] if len(x) > 0 else None
91
+ x = [index for index, rule in self._ruleset.flatten() if self._rule_from_example(sample) in rule]
92
+ return x[0] if x else None
98
93
 
99
- @staticmethod
100
- def _rule_from_example(sample: pd.Series) -> Rule:
94
+ def make_fair(self, features: Iterable[str]):
95
+ self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
96
+ if self.discretization else [features]
97
+ self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
98
+ self._get_or_set.cache_clear()
99
+
100
+ def _rule_from_example(self, sample: pd.Series) -> Rule:
101
101
  true_predicates, false_predicates = [], []
102
102
  for feature, value in sample.items():
103
+ if feature in self._ignore_feature:
104
+ continue
103
105
  true_predicates.append(str(feature)) if value == 1 else false_predicates.append(str(feature))
104
- return Rule(sorted(true_predicates), sorted(false_predicates))
106
+ return Rule(true_predicates, false_predicates)
105
107
 
106
108
  def _subset(self, samples: pd.DataFrame, predicate: str) -> (pd.DataFrame, bool):
107
109
  samples_0 = samples.copy()
@@ -112,11 +114,8 @@ class REAL(PedagogicalExtractor):
112
114
  return samples_all, len(set(self.predictor.predict(samples_all))) == 1
113
115
 
114
116
  def _extract(self, dataframe: pd.DataFrame) -> Theory:
115
- # Order the dataset by column to preserve reproducibility.
116
- dataframe = dataframe.sort_values(by=list(dataframe.columns.values), ascending=False)
117
- self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
118
117
  self._ruleset = self._get_or_set(HashableDataFrame(dataframe))
119
- return self._create_theory(dataframe, self._ruleset)
118
+ return self._create_theory(dataframe)
120
119
 
121
120
  def _predict(self, dataframe) -> Iterable:
122
121
  return np.array([self._internal_predict(data.transpose()) for _, data in dataframe.iterrows()])
@@ -49,5 +49,5 @@ class IndexedRuleSet(dict[int, list[Rule]]):
49
49
  ]
50
50
 
51
51
  @staticmethod
52
- def create_indexed_ruleset(dataset: pd.DataFrame) -> IndexedRuleSet:
53
- return IndexedRuleSet({index: [] for index, _ in enumerate(set(dataset.iloc[:, -1]))})
52
+ def create_indexed_ruleset(indices: Iterable) -> IndexedRuleSet:
53
+ return IndexedRuleSet({i: [] for i in indices})
@@ -15,11 +15,17 @@ class Trepan(PedagogicalExtractor):
15
15
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature], min_examples: int = 0, max_depth: int = 3,
16
16
  split_logic: SplitLogic = SplitLogic.DEFAULT):
17
17
  super().__init__(predictor, discretization)
18
+ self._ignore_feature = []
18
19
  self.min_examples = min_examples
19
20
  self.max_depth = max_depth
20
21
  self.split_logic = split_logic
21
22
  self._root: Node
22
23
 
24
+ def make_fair(self, features: Iterable[str]):
25
+ self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
26
+ if self.discretization else [features]
27
+ self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
28
+
23
29
  @property
24
30
  def n_rules(self):
25
31
  return sum(1 for _ in self._root)
@@ -29,7 +35,7 @@ class Trepan(PedagogicalExtractor):
29
35
  raise NotImplementedError()
30
36
  if node.n_classes == 1:
31
37
  return None
32
- splits = Trepan._create_splits(node, names)
38
+ splits = self._create_splits(node, names)
33
39
  return None if len(splits) == 0 or splits[0].children[0].depth > self.max_depth else splits[0].children
34
40
 
35
41
  def _compact(self):
@@ -55,28 +61,26 @@ class Trepan(PedagogicalExtractor):
55
61
  def _create_split(node: Node, column: str) -> Union[Split, None]:
56
62
  true_examples = Trepan._create_samples(node, column, 1.0)
57
63
  false_examples = Trepan._create_samples(node, column, 0.0)
58
- true_constrains = list(node.constraints) + [(column, 1.0)]
59
- false_constrains = list(node.constraints) + [(column, 0.0)]
60
- true_node = Node(true_examples, node.n_examples, true_constrains, depth=node.depth + 1)\
64
+ true_constraints = list(node.constraints) + [(column, 1.0)]
65
+ false_constraints = list(node.constraints) + [(column, 0.0)]
66
+ true_node = Node(true_examples, node.n_examples, true_constraints, depth=node.depth + 1) \
61
67
  if true_examples.shape[0] > 0 else None
62
- false_node = Node(false_examples, node.n_examples, false_constrains, depth=node.depth + 1)\
68
+ false_node = Node(false_examples, node.n_examples, false_constraints, depth=node.depth + 1) \
63
69
  if false_examples.shape[0] > 0 else None
64
70
  return None if true_node is None or false_node is None else Split(node, (true_node, false_node))
65
71
 
66
- @staticmethod
67
- def _create_splits(node: Node, names: Iterable[str]) -> SortedList[Split]:
68
- splits, constrains = Trepan._init_splits(node)
69
- for column in names:
70
- if column not in constrains:
71
- split = Trepan._create_split(node, column)
72
- if split is not None:
73
- splits.add(split)
72
+ def _create_splits(self, node: Node, names: Iterable[str]) -> SortedList[Split]:
73
+ splits, constraints = Trepan._init_splits(node)
74
+ for column in [column for column in names if column not in list(constraints) + self._ignore_feature]:
75
+ split = Trepan._create_split(node, column)
76
+ if split is not None:
77
+ splits.add(split)
74
78
  return splits
75
79
 
76
- def _create_theory(self, name: str, sort: bool = True) -> MutableTheory:
80
+ def _create_theory(self, name: str) -> MutableTheory:
77
81
  theory = mutable_theory()
78
82
  for node in self._root:
79
- variables = create_variable_list(self.discretization, sort=sort)
83
+ variables = create_variable_list(self.discretization)
80
84
  theory.assertZ(
81
85
  clause(
82
86
  create_head(name, list(variables.values()), str(node.dominant)),
File without changes
@@ -0,0 +1,74 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from psyke import Target
5
+ from psyke.genetic.gin import GIn
6
+
7
+ import skfuzzy as skf
8
+
9
+
10
+ class FGIn(GIn):
11
+
12
+ def __init__(self, train, valid, features, sigmas, slices, min_rules=1, poly=1, alpha=0.5, indpb=0.5, tournsize=3,
13
+ metric='R2', output=Target.REGRESSION, warm=False):
14
+ super().__init__(train, valid, features, sigmas, slices, min_rules, poly, alpha, indpb, tournsize,
15
+ metric, output, warm)
16
+ self.feature_to_idx = {f: i for i, f in enumerate(self.X.columns)}
17
+
18
+ def _evaluate(self, individual=None):
19
+ y_pred, valid_regions = self.__predict(individual or self.best, self.X if self.valid is None else self.valid[0])
20
+ if valid_regions < self.min_rules:
21
+ return -9999,
22
+ return self._score(self.y if self.valid is None else self.valid[1], y_pred),
23
+
24
+ @staticmethod
25
+ def __generate_membership(var, domain, thresholds, shape='tri'):
26
+ th = [var.min()] + [min(max(t, var.min()), var.max()) for t in thresholds] + [var.max()]
27
+
28
+ if shape == 'tri':
29
+ mid = [(x1 + x2) / 2 for x1, x2 in zip(th[:-1], th[1:])]
30
+ return [skf.trapmf(domain, [domain.min()] * 2 + mid[:2])] + \
31
+ [skf.trimf(domain, [x1, x2, x3]) for x1, x2, x3 in zip(mid[:-2], mid[1:-1], mid[2:])] + \
32
+ [skf.trapmf(domain, mid[-2:] + [domain.max()] * 2)]
33
+ if shape == 'trap':
34
+ beg = [None, domain.min()] + [(3 * x1 + x2) / 4 for x1, x2 in zip(th[1:-1], th[2:])] + [domain.max()]
35
+ end = [domain.min()] + [(x1 + 3 * x2) / 4 for x1, x2 in zip(th[:-2], th[1:-1])] + [domain.max()]
36
+ return [skf.trapmf(domain, [end[i - 1], beg[i], end[i], beg[i + 1]]) for i in range(1, len(th))]
37
+ raise ValueError('Supported shape values are only \'tri\' and \'trap\'')
38
+
39
+ @staticmethod
40
+ def __extend_domain(x, q_low=0.05, q_high=0.95, p=0.05, k_sigma=2.0, abs_min_margin=0.0):
41
+ ql, qh = np.quantile(x, [q_low, q_high])
42
+ margin = max(p * (qh - ql), k_sigma * np.std(x), abs_min_margin)
43
+ return np.array([ql - margin, qh + margin])
44
+
45
+ def __get_activations(self, x, functions_domains, valid_masks):
46
+ levels = [np.array([skf.interp_membership(domain, mf, x[index]) for mf in mfs])
47
+ for mfs, domain, index in functions_domains.values()]
48
+ return np.prod(np.meshgrid(*levels, indexing='ij'), axis=0).ravel()[valid_masks]
49
+
50
+ def __fuzzify(self, cuts):
51
+ cuts = dict(zip(self.features, cuts))
52
+ doms = {c: FGIn.__extend_domain(self.X[c]) for c in self.features}
53
+ return {c: (FGIn.__generate_membership(self.X[c], doms[c], cuts[c], 'trap'), doms[c],
54
+ self.feature_to_idx[c]) for c in self.features}
55
+
56
+ def __predict(self, individual=None, to_pred=None):
57
+ cuts = self._get_cuts(individual or self.best)
58
+ masks = np.array([self._region(to_pred, cuts) == r for r in range(np.prod([s + 1 for s in self.slices]))])
59
+ valid_masks = masks.sum(axis=1) >= 3
60
+
61
+ masks = [mask for mask in masks if mask.sum() >= 3]
62
+ functions_domains = self.__fuzzify(cuts)
63
+
64
+ pred = np.array([self._output_estimation(mask, to_pred) for mask in masks]).T
65
+ activations = np.array([self.__get_activations(x, functions_domains, valid_masks) for x in to_pred.values])
66
+
67
+ if self.output == Target.CLASSIFICATION:
68
+ classes, idx = np.unique(pred, return_inverse=True)
69
+ pred = classes[np.argmax(np.vstack([activations[:, idx == i].sum(axis=1) for i, c in enumerate(classes)]),
70
+ axis=0)]
71
+ else:
72
+ pred = (pred * activations).sum(axis=1)
73
+
74
+ return pd.DataFrame(pred, index=to_pred.index), len(masks)
@@ -0,0 +1,144 @@
1
+ from statistics import mode
2
+
3
+ import numpy as np
4
+ from deap import base, creator, tools, algorithms
5
+ import random
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error, f1_score, accuracy_score
8
+ from sklearn.preprocessing import PolynomialFeatures
9
+
10
+ from psyke import Target
11
+
12
+
13
+ class GIn:
14
+
15
+ def __init__(self, train, valid, features, sigmas, slices, min_rules=1, poly=1, alpha=0.5, indpb=0.5, tournsize=3,
16
+ metric='R2', output=Target.REGRESSION, warm=False):
17
+ self.X, self.y = train
18
+ self.valid = valid
19
+ self.output = output
20
+
21
+ self.features = features
22
+ self.sigmas = sigmas
23
+ self.slices = slices
24
+ self.min_rules = min_rules
25
+ self.poly = PolynomialFeatures(degree=poly, include_bias=False)
26
+
27
+ self.alpha = alpha
28
+ self.indpb = indpb
29
+ self.tournsize = tournsize
30
+ self.metric = metric
31
+
32
+ self.toolbox = None
33
+ self.stats = None
34
+ self.hof = None
35
+ self.best = None
36
+
37
+ self.__setup(warm)
38
+
39
+ def _region(self, x, cuts):
40
+ indices = [np.searchsorted(np.array(cut), x[f].to_numpy(), side='right')
41
+ for cut, f in zip(cuts, self.features)]
42
+
43
+ regions = np.zeros(len(x), dtype=int)
44
+ multiplier = 1
45
+ for idx, n in zip(reversed(indices), reversed([len(cut) + 1 for cut in cuts])):
46
+ regions += idx * multiplier
47
+ multiplier *= n
48
+
49
+ return regions
50
+
51
+ def _output_estimation(self, mask, to_pred):
52
+ if self.output == Target.REGRESSION:
53
+ return LinearRegression().fit(self.poly.fit_transform(self.X)[mask], self.y[mask]).predict(
54
+ self.poly.fit_transform(to_pred))
55
+ if self.output == Target.CONSTANT:
56
+ return np.mean(self.y[mask])
57
+ if self.output == Target.CLASSIFICATION:
58
+ return mode(self.y[mask])
59
+ raise ValueError('Supported outputs are Target.{REGRESSION, CONSTANT, CLASSIFICATION}')
60
+
61
+ def _score(self, true, pred):
62
+ if self.metric == 'R2':
63
+ return r2_score(true, pred)
64
+ if self.metric == 'MAE':
65
+ return -mean_absolute_error(true, pred)
66
+ if self.metric == 'MSE':
67
+ return -mean_squared_error(true, pred)
68
+ if self.metric == 'F1':
69
+ return f1_score(true, pred, average='weighted')
70
+ if self.metric == 'ACC':
71
+ return accuracy_score(true, pred)
72
+ raise ValueError('Supported metrics are R2, MAE, MSE, F1, ACC')
73
+
74
+ def predict(self, to_pred):
75
+ return self.__predict(to_pred=to_pred)[0]
76
+
77
+ def _get_cuts(self, individual):
78
+ boundaries = np.cumsum([0] + list(self.slices))
79
+ return [sorted(individual[boundaries[i]:boundaries[i + 1]]) for i in range(len(self.slices))]
80
+
81
+ def __predict(self, individual=None, to_pred=None):
82
+ cuts = self._get_cuts(individual or self.best)
83
+
84
+ regions = self._region(to_pred, cuts)
85
+ regionsT = self._region(self.X, cuts)
86
+
87
+ pred = np.empty(len(to_pred), dtype=f'U{self.y.str.len().max()}') if self.output == Target.CLASSIFICATION \
88
+ else np.zeros(len(to_pred))
89
+ valid_regions = 0
90
+
91
+ for r in range(np.prod([s + 1 for s in self.slices])):
92
+ mask = regions == r
93
+ maskT = regionsT == r
94
+ if min(mask.sum(), maskT.sum()) < 3:
95
+ if self.output != Target.CLASSIFICATION:
96
+ pred[mask] = np.mean(self.y)
97
+ continue
98
+ pred[mask] = self._output_estimation(maskT, to_pred[mask])
99
+ valid_regions += 1
100
+
101
+ return pred, valid_regions
102
+
103
+ def _evaluate(self, individual=None):
104
+ y_pred, valid_regions = self.__predict(individual or self.best, self.X if self.valid is None else self.valid[0])
105
+ if valid_regions < self.min_rules:
106
+ return -9999,
107
+ return self._score(self.y if self.valid is None else self.valid[1], y_pred),
108
+
109
+ def __setup(self, warm=False):
110
+ if not warm:
111
+ creator.create("FitnessMax", base.Fitness, weights=(1.0,))
112
+ creator.create("Individual", list, fitness=creator.FitnessMax)
113
+
114
+ self.toolbox = base.Toolbox()
115
+ for f in self.features:
116
+ self.toolbox.register(f, random.uniform, self.X[f].min(), self.X[f].max())
117
+
118
+ self.toolbox.register("individual", tools.initCycle, creator.Individual,
119
+ (sum([[getattr(self.toolbox, f) for i in range(s)]
120
+ for f, s in zip(self.features, self.slices)], [])), n=1)
121
+
122
+ self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
123
+
124
+ self.toolbox.register("mate", tools.cxBlend, alpha=self.alpha)
125
+ self.toolbox.register("mutate", tools.mutGaussian, indpb=self.indpb, mu=0,
126
+ sigma=sum([[sig] * s for sig, s in zip(self.sigmas, self.slices)], []))
127
+ self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize)
128
+ self.toolbox.register("evaluate", self._evaluate)
129
+
130
+ self.stats = tools.Statistics(lambda ind: ind.fitness.values[0])
131
+ self.stats.register("avg", np.mean)
132
+ # self.stats.register("min", np.min)
133
+ self.stats.register("max", np.max)
134
+ # self.stats.register("std", np.std)
135
+
136
+ self.hof = tools.HallOfFame(1)
137
+
138
+ def run(self, n_pop=30, cxpb=0.8, mutpb=0.5, n_gen=50, seed=123):
139
+ random.seed(seed)
140
+ pop = self.toolbox.population(n=n_pop)
141
+ result, log = algorithms.eaSimple(pop, self.toolbox, cxpb=cxpb, mutpb=mutpb, ngen=n_gen,
142
+ stats=self.stats, halloffame=self.hof, verbose=False)
143
+ self.best = tools.selBest(pop, 1)[0]
144
+ return self.best, self._evaluate()[0], result, log
@@ -45,9 +45,9 @@ class HyperCubePredictor(EvaluableModel):
45
45
  idx = tree.query([list(row.values())], k=1)[1][0][0]
46
46
  return HyperCubePredictor._get_cube_output(cubes[idx], row)
47
47
 
48
- def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
48
+ def _brute_predict_surface(self, row: pd.Series) -> GenericCube:
49
49
  return min([(
50
- cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
50
+ cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
51
51
  ) for cube in self._hypercubes])[-1]
52
52
 
53
53
  def _create_brute_tree(self, criterion: str = 'center', n: int = 2) -> (BallTree, list[GenericCube]):
@@ -76,6 +76,8 @@ class HyperCubePredictor(EvaluableModel):
76
76
  return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
77
77
 
78
78
  def _find_cube(self, data: dict[str, float]) -> GenericCube | None:
79
+ if not self._hypercubes:
80
+ return None
79
81
  data = data.copy()
80
82
  for dimension in self._dimensions_to_ignore:
81
83
  if dimension in data:
@@ -55,8 +55,10 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
55
55
  patience = self.patience
56
56
  while patience > 0:
57
57
  print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
58
- extractor = self.algorithm(self.predictor, grid, min_examples=25, output=self.output,
59
- threshold=threshold, normalization=self.normalization)
58
+ param_dict = dict(min_examples=25, threshold=threshold, normalization=self.normalization)
59
+ if self.algorithm != Extractor.gridrex:
60
+ param_dict['output'] = self.output
61
+ extractor = self.algorithm(self.predictor, grid, **param_dict)
60
62
  _ = extractor.extract(self.dataframe)
61
63
  error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
62
64
  else extractor.mae
psyke/utils/logic.py CHANGED
@@ -123,14 +123,10 @@ def to_var(name: str) -> Var:
123
123
  return var(name[0].upper() + name[1:])
124
124
 
125
125
 
126
- def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None, sort: bool = True) -> dict[str, Var]:
127
- if sort:
128
- features = sorted(features, key=lambda x: x.name)
129
- dataset = sorted(dataset.columns[:-1]) if dataset is not None else None
130
- else:
131
- dataset = dataset.columns[:-1] if dataset is not None else None
126
+ def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None) -> dict[str, Var]:
127
+ dataset = dataset.columns[:-1] if dataset is not None else None
132
128
  values = {feature.name: to_var(feature.name) for feature in features} \
133
- if len(features) > 0 else {name: to_var(name) for name in dataset}
129
+ if features else {name: to_var(name) for name in dataset}
134
130
  return values
135
131
 
136
132
 
@@ -325,4 +321,4 @@ def get_not_in_rule(min_included: bool = False, max_included: bool = True) -> Cl
325
321
  parser = DEFAULT_CLAUSES_PARSER
326
322
  theory = parser.parse_clauses(not_in_textual_rule(LE if min_included else L, GE if max_included else G),
327
323
  operators=None)
328
- return theory[0]
324
+ return theory[0]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: psyke
3
- Version: 0.8.9.dev48
3
+ Version: 1.0.4.dev10
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -16,33 +16,55 @@ Classifier: Topic :: Software Development :: Libraries
16
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
17
  Classifier: License :: OSI Approved :: Apache Software License
18
18
  Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Classifier: Programming Language :: Prolog
22
- Requires-Python: >=3.9.0, <3.10
22
+ Requires-Python: ==3.11
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
- Requires-Dist: numpy~=1.26.0
26
- Requires-Dist: pandas~=2.2.0
27
- Requires-Dist: scikit-learn~=1.5.0
25
+ Requires-Dist: numpy~=2.3.4
26
+ Requires-Dist: pandas~=2.3.0
27
+ Requires-Dist: scikit-learn~=1.8.0
28
28
  Requires-Dist: 2ppy~=0.4.0
29
29
  Requires-Dist: kneed~=0.8.1
30
30
  Requires-Dist: sympy~=1.11
31
+ Dynamic: author
32
+ Dynamic: author-email
33
+ Dynamic: classifier
34
+ Dynamic: description
35
+ Dynamic: description-content-type
36
+ Dynamic: home-page
37
+ Dynamic: keywords
38
+ Dynamic: license
39
+ Dynamic: license-file
40
+ Dynamic: platform
41
+ Dynamic: project-url
42
+ Dynamic: requires-dist
43
+ Dynamic: requires-python
44
+ Dynamic: summary
31
45
 
32
46
  # PSyKE
33
47
 
34
48
  ![PSyKE Logo](.img/logo-wide.png)
35
49
 
36
- Some quick links:
50
+ Quick links:
37
51
  * [Home Page](https://apice.unibo.it/xwiki/bin/view/PSyKE/)
38
52
  * [GitHub Repository](https://github.com/psykei/psyke-python)
39
53
  * [PyPi Repository](https://pypi.org/project/psyke/)
40
54
  * [Issues](https://github.com/psykei/psyke-python/issues)
41
55
 
56
+ ## Latest Releases
57
+
58
+ * PSyKE 1.0: Compatibility with Python 3.11.x
59
+ * PSyKE 0.10: New genetic algorithms for knowledge extraction
60
+ * PSyKE 0.9: Fairness mitigation support for knowedge extractors
61
+ * PSyKE 0.8: New features: local explainability and counterfactual support
62
+ * PSyKE 0.7: New SKE algorithms implemented
63
+
42
64
  ## Intro
43
65
 
44
66
  [PSyKE](https://apice.unibo.it/xwiki/bin/view/PSyKE/) (Platform for Symbolic Knowledge Extraction)
45
- is intended as a library for extracting symbolic knowledge (in the form of logic rules) out of sub-symbolic predictors.
67
+ is intended as a library for extracting symbolic knowledge (in the form of logic rule lists) out of sub-symbolic predictors.
46
68
 
47
69
  More precisely, PSyKE offers a general purpose API for knowledge extraction, and a number of different algorithms implementing it,
48
70
  supporting both classification and regression problems.
@@ -91,16 +113,14 @@ We are working on PSyKE to extend its features to encompass explainable clusteri
91
113
 
92
114
  ### End users
93
115
 
94
- PSyKE is deployed as a library on Pypi, and it can therefore be installed as Python package by running:
116
+ PSyKE is deployed as a library on Pypi. It can be installed as Python package by running:
95
117
  ```bash
96
118
  pip install psyke
97
119
  ```
98
120
 
99
121
  #### Requirements
100
- * `numpy`
101
- * `pandas`
102
- * `scikit-learn`
103
- * `2ppy`
122
+
123
+ Please refer to the [requirements file](https://github.com/psykei/psyke-python/blob/master/requirements.txt)
104
124
 
105
125
  ##### Test requirements
106
126
  * `skl2onnx`
@@ -108,15 +128,15 @@ pip install psyke
108
128
  * `parameterized`
109
129
 
110
130
  Once installed, it is possible to create an extractor from a predictor
111
- (e.g. Neural Network, Support Vector Machine, K-Nearest Neighbor, Random Forest, etc.)
112
- and from the dataset used to train the predictor.
131
+ (e.g. Neural Network, Support Vector Machine, K-Nearest Neighbours, Random Forest, etc.)
132
+ and from the data set used to train the predictor.
113
133
 
114
134
  > **Note:** the predictor must expose a method named `predict` to be properly used as an oracle.
115
135
 
116
136
  #### End users
117
137
 
118
138
  A brief example is presented in `demo.py` script in the `demo/` folder.
119
- Using `sklearn`'s Iris dataset we train a K-Nearest Neighbor to predict the correct output class.
139
+ Using `sklearn`'s Iris data set we train a K-Nearest Neighbours to predict the correct output class.
120
140
  Before training, we make the dataset discrete.
121
141
  After that we create two different extractors: REAL and Trepan.
122
142
  We output the extracted theory for both extractors.
@@ -142,8 +162,8 @@ iris(PetalLength8, PetalWidth8, SepalLength8, SepalWidth8, setosa) :- true.
142
162
  ## Developers
143
163
 
144
164
  Working with PSyKE codebase requires a number of tools to be installed:
145
- * Python 3.9
146
- + Python version greater than `3.9.x` are currently __not__ supported
165
+ * Python 3.11
166
+ + Python version >= `3.12.x` are currently __not__ supported
147
167
 
148
168
  * JDK 11+ (please ensure the `JAVA_HOME` environment variable is properly configured)
149
169
  * Git 2.20+