psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
psyke/tuning/__init__.py CHANGED
@@ -3,6 +3,7 @@ from enum import Enum
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
 
6
+ from psyke.extraction.hypercubic import Grid
6
7
  from psyke.utils import Target
7
8
 
8
9
 
@@ -12,14 +13,12 @@ class Objective(Enum):
12
13
 
13
14
 
14
15
  class Optimizer:
15
- def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
16
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
17
- readability_tradeoff: float = 0.1, patience: int = 5,
16
+ def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
17
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
18
18
  normalization=None, discretization=None):
19
19
  self.dataframe = dataframe
20
- self.algorithm = algorithm
21
20
  self.output = output
22
- self.max_mae_increase = max_mae_increase
21
+ self.max_error_increase = max_error_increase
23
22
  self.min_rule_decrease = min_rule_decrease
24
23
  self.readability_tradeoff = readability_tradeoff
25
24
  self.patience = patience
@@ -30,23 +29,13 @@ class Optimizer:
30
29
  def search(self):
31
30
  raise NotImplementedError
32
31
 
33
- def _depth_improvement(self, best, other):
34
- if other[0] == best[0]:
35
- return (best[1] - other[1]) * 2
36
- return 1 / (
37
- (1 - other[0] / best[0]) ** self.readability_tradeoff *
38
- np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
39
- )
40
-
41
- @staticmethod
42
- def _best(params):
43
- param_dict = {Optimizer.__score(t): t for t in params}
32
+ def _best(self, params):
33
+ param_dict = {self._score(t): t for t in params}
44
34
  min_param = min(param_dict)
45
35
  return min_param, param_dict[min_param]
46
36
 
47
- @staticmethod
48
- def __score(param):
49
- return param[0] * np.ceil(param[1] / 5)
37
+ def _score(self, param):
38
+ return param[0] * np.ceil(param[1] * self.readability_tradeoff)
50
39
 
51
40
  def _best_param(self, param):
52
41
  param_dict = {t[param]: t for t in self.params}
@@ -54,24 +43,47 @@ class Optimizer:
54
43
  return min_param, param_dict[min_param]
55
44
 
56
45
  def get_best(self):
57
- names = [self.algorithm, "Predictive loss", "N rules"]
58
- params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
46
+ names = ["Combined", "Predictive loss", "N rules"]
47
+ params = [self._best(self.params), self._best_param(0), self._best_param(1)]
59
48
  for n, p in zip(names, params):
60
49
  self._print_params(n, p[1])
61
50
  print()
62
- return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
51
+ return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
63
52
 
64
53
  def _print_params(self, n, param):
65
54
  raise NotImplementedError
66
55
 
67
56
 
68
- class GridOptimizer(Optimizer, ABC):
69
- def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
70
- min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
- patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
57
+ class SKEOptimizer(Optimizer, ABC):
58
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
59
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
60
+ objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
72
61
  normalization=None, discretization=None):
73
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
62
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
74
63
  patience, normalization, discretization)
75
64
  self.predictor = predictor
76
- self.max_depth = max_depth
77
65
  self.objective = objective
66
+
67
+
68
+ class IterativeOptimizer(Optimizer, ABC):
69
+ def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
70
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
+ patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
72
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
73
+ patience, normalization, discretization)
74
+ self.max_depth = max_depth
75
+
76
+ def _iteration_improvement(self, best, other):
77
+ if other[0] == best[0]:
78
+ return (best[1] - other[1]) * 2
79
+ return 1 / (
80
+ (1 - other[0] / best[0]) ** self.readability_tradeoff *
81
+ np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
82
+ )
83
+
84
+ def _check_iteration_improvement(self, best, current):
85
+ improvement = \
86
+ self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
87
+ if isinstance(improvement, complex):
88
+ improvement = 1.0
89
+ return current, improvement < 1.2
@@ -1,91 +1,60 @@
1
1
  from enum import Enum
2
2
 
3
- import numpy as np
4
3
  import pandas as pd
5
4
 
6
- from psyke import Extractor, Clustering
7
- from psyke.tuning import Objective, Optimizer
5
+ from psyke.tuning import Objective, SKEOptimizer
6
+ from psyke.tuning.orchid import OrCHiD
8
7
  from psyke.utils import Target
9
8
 
10
9
 
11
- class CRASH(Optimizer):
10
+ class CRASH(SKEOptimizer):
12
11
  class Algorithm(Enum):
13
12
  ExACT = 1,
14
13
  CREAM = 2
15
14
 
16
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
15
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
17
16
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
18
- patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
19
- objective: Objective = Objective.MODEL, normalization=None):
20
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
21
- max_depth, patience, objective, normalization)
22
- self.output = output
17
+ max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
18
+ objective: Objective = Objective.MODEL, normalization=None, discretization=None):
19
+ super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
20
+ patience, objective, output, normalization, discretization)
21
+ self.max_depth = max_depth
22
+ self.max_gauss_components = max_gauss_components
23
23
 
24
24
  def search(self):
25
- self.params = self.__search_depth()
25
+ self.params = []
26
+ for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
27
+ self.params += self.__search_algorithm(algorithm)
26
28
 
27
- def __search_depth(self):
29
+ def __search_algorithm(self, algorithm):
28
30
  params = []
29
31
  best = None
30
32
 
31
- for depth in range(1, self.max_depth + 1):
32
- p = self.__search_threshold(depth)
33
- b = Optimizer._best(p)[1]
34
- print()
35
- improvement = self._depth_improvement(
36
- [best[0], best[1]], [b[0], b[1]]
37
- ) if best is not None else np.inf
38
-
39
- best = b
40
- params += p
41
-
42
- if len(params) > 1 and improvement < 1.2:
43
- break
44
- return params
45
-
46
- def __search_threshold(self, depth):
47
- step = self.model_mae / 2.0
48
- threshold = self.model_mae * 0.9
49
- params = []
50
- patience = self.patience
51
- while patience > 0:
52
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
53
- extractor = Extractor.creepy(
54
- self.predictor, depth=depth, error_threshold=threshold, output=self.output,
55
- gauss_components=10, normalization=self.normalization,
56
- clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
57
- )
58
- _ = extractor.extract(self.dataframe)
59
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
60
- extractor.mae(self.dataframe)), extractor.n_rules
61
- print(f"MAE = {mae:.2f}, {n} rules")
62
-
63
- if len(params) == 0:
64
- params.append((mae, n, depth, threshold))
65
- threshold += step
66
- continue
67
-
68
- if (n == 1) or (mae == 0.0):
69
- params.append((mae, n, depth, threshold))
70
- break
71
-
72
- if mae > params[0][0] * self.max_mae_increase:
33
+ for gauss_components in range(2, self.max_gauss_components + 1):
34
+ data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
35
+ else self.dataframe
36
+ current_params = self.__search_components(data, algorithm, gauss_components)
37
+ current_best = self._best(current_params)[1]
38
+ if best is not None and self._score(best) <= self._score(current_best):
73
39
  break
40
+ best = current_best
41
+ params += current_params
74
42
 
75
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
76
-
77
- if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
78
- patience -= 1
79
- step = max(step, abs(mae - threshold) / max(patience, 1))
80
- if mae != params[-1][0] or n != params[-1][1]:
81
- params.append((mae, n, depth, threshold))
82
- threshold += step
83
43
  return params
84
44
 
45
+ def __search_components(self, data, algorithm, gauss_components):
46
+ orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
47
+ self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
48
+ self.normalization, self.discretization)
49
+ orchid.search()
50
+ return [(*p, gauss_components, algorithm) for p in orchid.params]
51
+
85
52
  def _print_params(self, name, params):
86
- print("**********************")
53
+ print("*****************************")
87
54
  print(f"Best {name}")
88
- print("**********************")
55
+ print("*****************************")
89
56
  print(f"MAE = {params[0]:.2f}, {params[1]} rules")
57
+ print(f"Algorithm = {params[5]}")
90
58
  print(f"Threshold = {params[3]:.2f}")
91
59
  print(f"Depth = {params[2]}")
60
+ print(f"Gaussian components = {params[4]}")
@@ -4,60 +4,58 @@ import numpy as np
4
4
  import pandas as pd
5
5
 
6
6
  from psyke import Clustering, EvaluableModel
7
- from psyke.tuning import Optimizer
7
+ from psyke.tuning import Optimizer, IterativeOptimizer
8
8
  from psyke.utils import Target
9
9
 
10
10
 
11
- class OrCHiD(Optimizer):
11
+ class OrCHiD(IterativeOptimizer):
12
12
  class Algorithm(Enum):
13
13
  ExACT = 1,
14
14
  CREAM = 2
15
15
 
16
16
  def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
17
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
- patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
19
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- patience, normalization, discretization)
21
- self.max_depth = max_depth
17
+ max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
+ patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
19
+ super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
20
+ output, normalization, discretization)
21
+ self.algorithm = algorithm
22
+ self.gauss_components = gauss_components
22
23
 
23
24
  def search(self):
24
25
  self.params = self.__search_depth()
25
26
 
26
27
  def __search_depth(self):
27
- params = []
28
- best = None
28
+ params, best = [], None
29
29
 
30
30
  for depth in range(1, self.max_depth + 1):
31
- p = self.__search_threshold(depth)
32
- b = Optimizer._best(p)[1]
31
+ current_params = self.__search_threshold(depth)
32
+ current_best = self._best(current_params)[1]
33
33
  print()
34
- improvement = self._depth_improvement(
35
- [best[0], best[1]], [b[0], b[1]]
36
- ) if best is not None else np.inf
37
-
38
- best = b
39
- params += p
34
+ best, to_break = self._check_iteration_improvement(best, current_best)
35
+ params += current_params
40
36
 
41
- if len(params) > 1 and improvement < 1.2:
37
+ if len(params) > 1 and to_break:
42
38
  break
43
39
  return params
44
40
 
45
41
  def __search_threshold(self, depth):
46
42
  step = 1.0
47
- threshold = 1.0 # self.max_mae_increase * 0.9
43
+ threshold = 1.0
48
44
  params = []
49
45
  patience = self.patience
50
46
  while patience > 0:
51
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
47
+ print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
48
+ f"Gaussian components = {self.gauss_components}. ", end="")
52
49
  clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
53
- depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
50
+ depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
54
51
  )
55
52
  clustering.fit(self.dataframe)
56
53
  task, metric = \
57
54
  (EvaluableModel.Task.CLASSIFICATION, EvaluableModel.ClassificationScore.INVERSE_ACCURACY) \
58
55
  if self.output == Target.CLASSIFICATION else \
59
56
  (EvaluableModel.Task.REGRESSION, EvaluableModel.RegressionScore.MAE)
60
- p, n = clustering.score(self.dataframe, None, False, False, task, [metric])[metric][0], clustering.n_rules
57
+ p, n = clustering.score(self.dataframe, None, False, False, task=task,
58
+ scoring_function=[metric])[metric][0], clustering.n_rules
61
59
 
62
60
  print(f"Predictive loss = {p:.2f}, {n} rules")
63
61
 
@@ -71,7 +69,7 @@ class OrCHiD(Optimizer):
71
69
  params.append((p, n, depth, threshold))
72
70
  break
73
71
 
74
- if p > params[0][0] * self.max_mae_increase:
72
+ if p > params[0][0] * self.max_error_increase:
75
73
  break
76
74
 
77
75
  improvement = (params[-1][0] / p) + (1 - n / params[-1][1])
@@ -1,44 +1,73 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from enum import Enum
4
- from psyke import Extractor
4
+
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ from psyke import Extractor, Target
5
8
  from psyke.extraction.hypercubic import Grid, FeatureRanker
6
9
  from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
7
- from psyke.tuning import Objective, Optimizer
10
+ from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
8
11
 
9
12
 
10
- class PEDRO(Optimizer):
13
+ class PEDRO(SKEOptimizer, IterativeOptimizer):
11
14
  class Algorithm(Enum):
12
15
  GRIDEX = 1,
13
- GRIDREX = 2
16
+ GRIDREX = 2,
17
+ HEX = 3
14
18
 
15
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
19
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
16
20
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
17
21
  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
18
- normalization=None):
19
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- max_depth, patience, objective, normalization)
22
+ output: Target = Target.CONSTANT, normalization=None, discretization=None):
23
+ SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
24
+ readability_tradeoff, patience, objective, output, normalization, discretization)
25
+ IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
26
+ max_depth, patience, output, normalization, discretization)
27
+ self.algorithm = Extractor.gridrex if algorithm == PEDRO.Algorithm.GRIDREX else \
28
+ Extractor.gridex if algorithm == PEDRO.Algorithm.GRIDEX else Extractor.hex
29
+ self.algorithm_name = "GridREx" if algorithm == PEDRO.Algorithm.GRIDREX else \
30
+ "GridEx" if algorithm == PEDRO.Algorithm.GRIDEX else "HEx"
21
31
  self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
22
- self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
23
- self.dataframe.iloc[:, -1].values).mean()
32
+ predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
33
+ expected = self.dataframe.iloc[:, -1].values
34
+ self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
35
+ abs(predictions - expected).mean()
36
+
37
+ def _search_depth(self, strategy, critical, max_partitions):
38
+ params, best = [], None
39
+
40
+ for iterations in range(self.max_depth):
41
+ current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
42
+ current_best = self._best(current_params)[1]
43
+ print()
44
+ best, to_break = self._check_iteration_improvement(best, current_best)
45
+ params += current_params
46
+
47
+ if len(params) > 1 and to_break:
48
+ break
49
+ return params
24
50
 
25
51
  def __search_threshold(self, grid, critical, max_partitions):
26
- step = self.model_mae / 2.0
27
- threshold = self.model_mae * 0.5
52
+ step = self.error / 2.0
53
+ threshold = self.error * 0.5
28
54
  params = []
29
55
  patience = self.patience
30
56
  while patience > 0:
31
- print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm, grid, threshold), end="")
32
- extractor = Extractor.gridrex(self.predictor, grid, threshold=threshold, normalization=self.normalization) \
33
- if self.algorithm == PEDRO.Algorithm.GRIDREX \
34
- else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
57
+ print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
58
+ param_dict = dict(min_examples=25, threshold=threshold, normalization=self.normalization)
59
+ if self.algorithm != Extractor.gridrex:
60
+ param_dict['output'] = self.output
61
+ extractor = self.algorithm(self.predictor, grid, **param_dict)
35
62
  _ = extractor.extract(self.dataframe)
36
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
37
- extractor.mae(self.dataframe)), extractor.n_rules
38
- print("MAE = {:.2f}, {} rules".format(mae, n))
63
+ error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
64
+ else extractor.mae
65
+ error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
66
+ error_function(self.dataframe)), extractor.n_rules
67
+ print("MAE = {:.2f}, {} rules".format(error, n))
39
68
 
40
69
  if len(params) == 0:
41
- params.append((mae, n, threshold, grid))
70
+ params.append((error, n, threshold, grid))
42
71
  threshold += step
43
72
  continue
44
73
 
@@ -46,44 +75,24 @@ class PEDRO(Optimizer):
46
75
  break
47
76
 
48
77
  if n == 1:
49
- params.append((mae, n, threshold, grid))
78
+ params.append((error, n, threshold, grid))
50
79
  break
51
80
 
52
- if mae > params[0][0] * self.max_mae_increase:
81
+ if error > params[0][0] * self.max_error_increase:
53
82
  break
54
83
 
55
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
84
+ improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
56
85
 
57
86
  if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
58
87
  patience -= 1
59
- step = max(step, abs(mae - threshold) / max(patience, 1))
88
+ step = max(step, abs(error - threshold) / max(patience, 1))
60
89
  elif not critical:
61
90
  patience = self.patience
62
- if mae != params[-1][0] or n != params[-1][1]:
63
- params.append((mae, n, threshold, grid))
91
+ if error != params[-1][0] or n != params[-1][1]:
92
+ params.append((error, n, threshold, grid))
64
93
  threshold += step
65
94
  return params
66
95
 
67
- def __search_depth(self, strategy, critical, max_partitions):
68
- params = []
69
- best = None
70
-
71
- for iterations in range(self.max_depth):
72
- grid = Grid(iterations + 1, strategy)
73
- p = self.__search_threshold(grid, critical, max_partitions)
74
- b = Optimizer._best(p)[1]
75
- print()
76
- improvement = self._depth_improvement(
77
- [best[0], best[1]], [b[0], b[1]]
78
- ) if best is not None else np.inf
79
-
80
- best = b
81
- params += p
82
-
83
- if len(params) > 1 and improvement < 1.2:
84
- break
85
- return params
86
-
87
96
  def __contains(self, strategies, strategy):
88
97
  for s in strategies:
89
98
  if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -91,21 +100,26 @@ class PEDRO(Optimizer):
91
100
  return False
92
101
 
93
102
  def search(self):
94
- base_strategy = FixedStrategy(2)
95
- strategies = [base_strategy, FixedStrategy(3)]
96
-
97
- base_partitions = base_strategy.partition_number(self.dataframe.columns[:-1])
103
+ max_partitions = 200
104
+ base_partitions = FixedStrategy(2).partition_number(self.dataframe.columns[:-1]) * 3
105
+ if base_partitions <= max_partitions:
106
+ strategies = [FixedStrategy(2)]
107
+ if FixedStrategy(3).partition_number(self.dataframe.columns[:-1]) <= max_partitions:
108
+ strategies.append(FixedStrategy(3))
109
+ else:
110
+ strategies = []
111
+ base_partitions = max_partitions
98
112
 
99
113
  for n in [2, 3, 5, 10]:
100
114
  for th in [0.99, 0.75, 0.67, 0.5, 0.3]:
101
115
  strategy = AdaptiveStrategy(self.ranked, [(th, n)])
102
- if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions * 3 and \
116
+ if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions and \
103
117
  not self.__contains(strategies, strategy):
104
118
  strategies.append(strategy)
105
119
 
106
120
  for (a, b) in [(0.33, 0.67), (0.25, 0.75), (0.1, 0.9)]:
107
121
  strategy = AdaptiveStrategy(self.ranked, [(a, 2), (b, 3)])
108
- if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions * 3 and \
122
+ if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions and \
109
123
  not self.__contains(strategies, strategy):
110
124
  strategies.append(strategy)
111
125
 
@@ -116,16 +130,16 @@ class PEDRO(Optimizer):
116
130
 
117
131
  params = []
118
132
  for strategy in strategies:
119
- params += self.__search_depth(strategy,
120
- strategy.partition_number(self.dataframe.columns[:-1]) > avg,
121
- base_partitions * 3)
133
+ params += self._search_depth(strategy,
134
+ strategy.partition_number(self.dataframe.columns[:-1]) > avg,
135
+ base_partitions)
122
136
  self.params = params
123
137
 
124
138
  def _print_params(self, name, params):
125
139
  print("**********************")
126
140
  print(f"Best {name}")
127
141
  print("**********************")
128
- print(f"MAE = {params[0]:.2f}, {params[1]} rules")
142
+ print(f"Error = {params[0]:.2f}, {params[1]} rules")
129
143
  print(f"Threshold = {params[2]:.2f}")
130
144
  print(f"Iterations = {params[3].iterations}")
131
145
  print(f"Strategy = {params[3].strategy}")
psyke/utils/logic.py CHANGED
@@ -123,17 +123,17 @@ def to_var(name: str) -> Var:
123
123
  return var(name[0].upper() + name[1:])
124
124
 
125
125
 
126
- def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None, sort: bool = True) -> dict[str, Var]:
127
- if sort:
128
- features = sorted(features, key=lambda x: x.name)
129
- dataset = sorted(dataset.columns[:-1]) if dataset is not None else None
130
- else:
131
- dataset = dataset.columns[:-1] if dataset is not None else None
126
+ def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None) -> dict[str, Var]:
127
+ dataset = dataset.columns[:-1] if dataset is not None else None
132
128
  values = {feature.name: to_var(feature.name) for feature in features} \
133
- if len(features) > 0 else {name: to_var(name) for name in dataset}
129
+ if features else {name: to_var(name) for name in dataset}
134
130
  return values
135
131
 
136
132
 
133
+ def last_in_body(body: Struct) -> Struct:
134
+ return body.args[-1] if body.args[-1].functor == 'is' else last_in_body(body.args[-1])
135
+
136
+
137
137
  def create_head(functor: str, variables: Iterable[Var], output) -> Struct:
138
138
  if isinstance(output, Var):
139
139
  variables += [output]
@@ -321,4 +321,4 @@ def get_not_in_rule(min_included: bool = False, max_included: bool = True) -> Cl
321
321
  parser = DEFAULT_CLAUSES_PARSER
322
322
  theory = parser.parse_clauses(not_in_textual_rule(LE if min_included else L, GE if max_included else G),
323
323
  operators=None)
324
- return theory[0]
324
+ return theory[0]
psyke/utils/plot.py CHANGED
@@ -7,10 +7,85 @@ import matplotlib.pyplot as plt
7
7
  from matplotlib.lines import Line2D
8
8
  from tuprolog.solve.prolog import prolog_solver
9
9
  from tuprolog.theory import Theory, mutable_theory
10
- from psyke.utils.logic import data_to_struct, pretty_theory, get_in_rule, get_not_in_rule
10
+
11
+ from psyke.extraction.hypercubic import HyperCubeExtractor
12
+ from psyke.utils.logic import data_to_struct, get_in_rule, get_not_in_rule
11
13
 
12
14
  import matplotlib
13
- matplotlib.use('TkAgg')
15
+ #matplotlib.use('TkAgg')
16
+
17
+
18
+ def plot_init(xlim, ylim, xlabel, ylabel, size=(4, 3), equal=False):
19
+ plt.figure(figsize=size)
20
+ if equal:
21
+ plt.gca().set_aspect(1)
22
+ plt.xlim(xlim)
23
+ plt.ylim(ylim)
24
+ plt.gca().set_xlabel(xlabel)
25
+ plt.gca().set_ylabel(ylabel)
26
+ plt.gca().set_rasterized(True)
27
+
28
+
29
+ def plot_point(x, y, color, marker, ec=None):
30
+ plt.scatter(x, y, c=color, marker=marker, edgecolors=ec, linewidths=0.6)
31
+
32
+
33
+ def plot_classification_samples(dataframe, classes, colors, markers, labels, loc, name, show=True, ec=None):
34
+ marks = [Line2D([0], [0], color=c, marker=m, lw="0") for c, m in zip(colors, markers)]
35
+
36
+ for cl, c, m in zip(classes, colors, markers):
37
+ df = dataframe[dataframe.target == cl]
38
+ plot_point(df["petal length"], df["petal width"], c, m, ec=ec)
39
+
40
+ plt.gca().legend(marks, labels, loc=loc)
41
+ plt.savefig("plot/{}.pdf".format(name), dpi=500, bbox_inches='tight')
42
+ if show:
43
+ plt.show()
44
+
45
+
46
+ def plot_boundaries(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
47
+ a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None, reverse=False):
48
+ cubes = extractor._hypercubes.copy()
49
+ if reverse:
50
+ cubes.reverse()
51
+ for cube in cubes:
52
+ plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
53
+ fc=colors[cube.output] if fc is None else fc,
54
+ ec=colors[cube.output] if ec is None else ec, alpha=a, hatch=h, linestyle=ls)
55
+
56
+
57
+ def plot_surfaces(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str], ec='r', e=.05):
58
+ for cube in extractor._hypercubes:
59
+ plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
60
+ fc='none', ec=ec)
61
+
62
+
63
+ def plot_perimeters(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str], n: int = 5,
64
+ ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
65
+ for cube in extractor._hypercubes:
66
+ for corner in cube.perimeter_samples(n):
67
+ plt.scatter(corner[x], corner[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
68
+
69
+
70
+ def plot_centers(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
71
+ ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
72
+ for cube in extractor._hypercubes:
73
+ center = cube.center
74
+ plt.scatter(center[x], center[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
75
+
76
+
77
+ def plot_corners(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
78
+ ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
79
+ for cube in extractor._hypercubes:
80
+ for corner in cube.corners():
81
+ plt.scatter(corner[x], corner[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
82
+
83
+
84
+ def plot_barycenters(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
85
+ ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
86
+ for cube in extractor._hypercubes:
87
+ center = cube.barycenter
88
+ plt.scatter(center[x], center[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
14
89
 
15
90
 
16
91
  def predict_from_theory(theory: Theory, data: pd.DataFrame) -> list[float or str]:
@@ -95,6 +170,7 @@ def plot_theory(theory: Theory, data: pd.DataFrame = None, output: str = 'plot.p
95
170
  pass
96
171
  # ax.text2D(0., 0.88, pretty_theory(theory, new_line=False), transform=ax.transAxes, fontsize=8)
97
172
  if isinstance(ys[0], str):
98
- custom_lines = [Line2D([0], [0], marker='o', markerfacecolor=get_color(c), markersize=20, color='w') for c in classes]
173
+ custom_lines = [Line2D([0], [0], marker='o', markerfacecolor=get_color(c),
174
+ markersize=20, color='w') for c in classes]
99
175
  ax.legend(custom_lines, classes, loc='upper left', numpoints=1, ncol=3, fontsize=18, bbox_to_anchor=(0, 0))
100
176
  plt.savefig(output, format='pdf')