psyke 0.5.2.dev3__py3-none-any.whl → 0.5.4.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

psyke/__init__.py CHANGED
@@ -44,9 +44,9 @@ class EvaluableModel(object):
44
44
  V = 3,
45
45
  FMI = 4
46
46
 
47
- def __init__(self, normalization=None, discretization=None):
48
- self.normalization = normalization
47
+ def __init__(self, discretization=None, normalization=None):
49
48
  self.discretization = discretization
49
+ self.normalization = normalization
50
50
 
51
51
  def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
52
52
  """
@@ -312,13 +312,14 @@ class Extractor(EvaluableModel, ABC):
312
312
  @staticmethod
313
313
  def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
314
314
  gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
315
- discretization=None, normalization: dict[str, tuple[float, float]] = None) -> Extractor:
315
+ discretization=None, normalization: dict[str, tuple[float, float]] = None,
316
+ seed: int = get_default_random_seed()) -> Extractor:
316
317
  """
317
318
  Creates a new CReEPy extractor.
318
319
  """
319
320
  from psyke.extraction.hypercubic.creepy import CReEPy
320
321
  return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
321
- discretization, normalization, clustering)
322
+ discretization, normalization, clustering, seed)
322
323
 
323
324
  @staticmethod
324
325
  def real(predictor, discretization=None) -> Extractor:
@@ -341,8 +342,8 @@ class Extractor(EvaluableModel, ABC):
341
342
 
342
343
 
343
344
  class Clustering(EvaluableModel, ABC):
344
- def __init__(self, normalization=None):
345
- super().__init__(normalization)
345
+ def __init__(self, discretization=None, normalization=None):
346
+ super().__init__(discretization, normalization)
346
347
 
347
348
  def fit(self, dataframe: pd.DataFrame):
348
349
  raise NotImplementedError('fit')
@@ -351,18 +352,19 @@ class Clustering(EvaluableModel, ABC):
351
352
  raise NotImplementedError('explain')
352
353
 
353
354
  @staticmethod
354
- def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
355
- gauss_components: int = 2) -> Clustering:
355
+ def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
356
+ discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
356
357
  """
357
358
  Creates a new ExACT instance.
358
359
  """
359
360
  from psyke.clustering.exact import ExACT
360
- return ExACT(depth, error_threshold, output, gauss_components)
361
+ return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
361
362
 
362
363
  @staticmethod
363
- def cream(depth: int, error_threshold: float, output, gauss_components: int = 2) -> Clustering:
364
+ def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
365
+ discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
364
366
  """
365
367
  Creates a new CREAM instance.
366
368
  """
367
369
  from psyke.clustering.cream import CREAM
368
- return CREAM(depth, error_threshold, output, gauss_components)
370
+ return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
@@ -8,8 +8,8 @@ from psyke.hypercubepredictor import HyperCubePredictor
8
8
 
9
9
  class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
10
10
 
11
- def __init__(self, output: Target = Target.CONSTANT, normalization=None):
12
- HyperCubePredictor.__init__(self, output=output, normalization=normalization)
11
+ def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
12
+ HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
13
13
 
14
14
  def get_hypercubes(self) -> Iterable[HyperCube]:
15
15
  raise NotImplementedError('get_hypercubes')
@@ -5,7 +5,7 @@ from typing import Iterable
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
- import psyke.utils
8
+ from psyke.utils import Target, get_default_random_seed
9
9
  from psyke.clustering.exact import ExACT
10
10
  from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
11
11
  from psyke.clustering.utils import select_gaussian_mixture
@@ -16,9 +16,9 @@ class CREAM(ExACT):
16
16
  Explanator implementing CREAM algorithm.
17
17
  """
18
18
 
19
- def __init__(self, depth: int, error_threshold: float,
20
- output: psyke.utils.Target = psyke.utils.Target.CONSTANT, gauss_components: int = 5):
21
- super().__init__(depth, error_threshold, output, gauss_components)
19
+ def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
20
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
21
+ super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
22
22
 
23
23
  def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
24
24
  cubes = []
@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
13
13
  from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
14
14
  from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
15
15
  from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
16
- from psyke.utils import Target
16
+ from psyke.utils import Target, get_default_random_seed
17
17
 
18
18
 
19
19
  class ExACT(HyperCubeClustering, ABC):
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
22
22
  """
23
23
 
24
24
  def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
25
- gauss_components: int = 2, normalization=None):
26
- super().__init__(output, normalization)
25
+ gauss_components: int = 2, discretization=None, normalization=None,
26
+ seed: int = get_default_random_seed()):
27
+ super().__init__(output, discretization, normalization)
27
28
  self.depth = depth
28
29
  self.error_threshold = error_threshold
29
30
  self.gauss_components = gauss_components
30
31
  self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
31
32
  self._predictor.n_neighbors = 1
33
+ self.seed = seed
32
34
 
33
35
  def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
34
36
  cubes = []
@@ -56,6 +58,7 @@ class ExACT(HyperCubeClustering, ABC):
56
58
  )
57
59
 
58
60
  def fit(self, dataframe: pd.DataFrame):
61
+ np.random.seed(self.seed)
59
62
  self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
60
63
  self._hypercubes = \
61
64
  self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
@@ -9,7 +9,7 @@ from tuprolog.theory import Theory
9
9
  from psyke import Clustering
10
10
  from psyke.clustering import HyperCubeClustering
11
11
  from psyke.extraction.hypercubic import HyperCubeExtractor
12
- from psyke.utils import Target
12
+ from psyke.utils import Target, get_default_random_seed
13
13
  from psyke.utils.logic import last_in_body
14
14
 
15
15
 
@@ -20,10 +20,12 @@ class CReEPy(HyperCubeExtractor):
20
20
 
21
21
  def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
22
22
  gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
23
- discretization=None, normalization=None, clustering=Clustering.exact):
23
+ discretization=None, normalization=None, clustering=Clustering.exact,
24
+ seed: int = get_default_random_seed()):
24
25
  super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
25
26
  discretization, normalization)
26
- self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
27
+ self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
28
+ normalization, seed)
27
29
  self.ranks = ranks
28
30
  self.ignore_threshold = ignore_threshold
29
31
 
@@ -11,17 +11,18 @@ from psyke.extraction.hypercubic import RegressionCube, GenericCube, Point
11
11
 
12
12
 
13
13
  class HyperCubePredictor(EvaluableModel):
14
- def __init__(self, output=Target.CONSTANT, normalization=None):
15
- super().__init__(normalization)
14
+ def __init__(self, output=Target.CONSTANT, discretization=None, normalization=None):
15
+ super().__init__(discretization, normalization)
16
16
  self._hypercubes = []
17
17
  self._output = output
18
+ self._surrounding = None
18
19
 
19
20
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
20
21
  return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
21
22
 
22
23
  def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
23
24
  mapping: dict[str: int] = None) -> Iterable:
24
- predictions = self._predict(dataframe)
25
+ predictions = np.array(self._predict(dataframe))
25
26
  idx = [prediction is None for prediction in predictions]
26
27
  if sum(idx) > 0:
27
28
  if criterion == 'default':
@@ -46,7 +47,7 @@ class HyperCubePredictor(EvaluableModel):
46
47
 
47
48
  def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
48
49
  distances = [(
49
- cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
50
+ cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
50
51
  ) for cube in self._hypercubes]
51
52
  return min(distances)[-1]
52
53
 
psyke/tuning/__init__.py CHANGED
@@ -3,6 +3,7 @@ from enum import Enum
3
3
  import numpy as np
4
4
  import pandas as pd
5
5
 
6
+ from psyke.extraction.hypercubic import Grid
6
7
  from psyke.utils import Target
7
8
 
8
9
 
@@ -12,14 +13,12 @@ class Objective(Enum):
12
13
 
13
14
 
14
15
  class Optimizer:
15
- def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
16
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
17
- readability_tradeoff: float = 0.1, patience: int = 5,
16
+ def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
17
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
18
18
  normalization=None, discretization=None):
19
19
  self.dataframe = dataframe
20
- self.algorithm = algorithm
21
20
  self.output = output
22
- self.max_mae_increase = max_mae_increase
21
+ self.max_error_increase = max_error_increase
23
22
  self.min_rule_decrease = min_rule_decrease
24
23
  self.readability_tradeoff = readability_tradeoff
25
24
  self.patience = patience
@@ -30,23 +29,13 @@ class Optimizer:
30
29
  def search(self):
31
30
  raise NotImplementedError
32
31
 
33
- def _depth_improvement(self, best, other):
34
- if other[0] == best[0]:
35
- return (best[1] - other[1]) * 2
36
- return 1 / (
37
- (1 - other[0] / best[0]) ** self.readability_tradeoff *
38
- np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
39
- )
40
-
41
- @staticmethod
42
- def _best(params):
43
- param_dict = {Optimizer.__score(t): t for t in params}
32
+ def _best(self, params):
33
+ param_dict = {self._score(t): t for t in params}
44
34
  min_param = min(param_dict)
45
35
  return min_param, param_dict[min_param]
46
36
 
47
- @staticmethod
48
- def __score(param):
49
- return param[0] * np.ceil(param[1] / 5)
37
+ def _score(self, param):
38
+ return param[0] * np.ceil(param[1] * self.readability_tradeoff)
50
39
 
51
40
  def _best_param(self, param):
52
41
  param_dict = {t[param]: t for t in self.params}
@@ -54,24 +43,45 @@ class Optimizer:
54
43
  return min_param, param_dict[min_param]
55
44
 
56
45
  def get_best(self):
57
- names = [self.algorithm, "Predictive loss", "N rules"]
58
- params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
46
+ names = ["Combined", "Predictive loss", "N rules"]
47
+ params = [self._best(self.params), self._best_param(0), self._best_param(1)]
59
48
  for n, p in zip(names, params):
60
49
  self._print_params(n, p[1])
61
50
  print()
62
- return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
51
+ return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
63
52
 
64
53
  def _print_params(self, n, param):
65
54
  raise NotImplementedError
66
55
 
67
56
 
68
- class GridOptimizer(Optimizer, ABC):
69
- def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
70
- min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
- patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
57
+ class SKEOptimizer(Optimizer, ABC):
58
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
59
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
60
+ objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
72
61
  normalization=None, discretization=None):
73
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
62
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
74
63
  patience, normalization, discretization)
75
64
  self.predictor = predictor
76
- self.max_depth = max_depth
77
65
  self.objective = objective
66
+
67
+
68
+ class IterativeOptimizer(Optimizer, ABC):
69
+ def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
70
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
+ patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
72
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
73
+ patience, normalization, discretization)
74
+ self.max_depth = max_depth
75
+
76
+ def _iteration_improvement(self, best, other):
77
+ if other[0] == best[0]:
78
+ return (best[1] - other[1]) * 2
79
+ return 1 / (
80
+ (1 - other[0] / best[0]) ** self.readability_tradeoff *
81
+ np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
82
+ )
83
+
84
+ def _check_iteration_improvement(self, best, current):
85
+ improvement = \
86
+ self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
87
+ return current, improvement < 1.2
@@ -1,91 +1,54 @@
1
- from enum import Enum
2
-
3
- import numpy as np
4
1
  import pandas as pd
5
2
 
6
- from psyke import Extractor, Clustering
7
- from psyke.tuning import Objective, Optimizer
3
+ from psyke.tuning import Objective, SKEOptimizer
4
+ from psyke.tuning.orchid import OrCHiD
8
5
  from psyke.utils import Target
9
6
 
10
7
 
11
- class CRASH(Optimizer):
12
- class Algorithm(Enum):
13
- ExACT = 1,
14
- CREAM = 2
15
-
16
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
8
+ class CRASH(SKEOptimizer):
9
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
17
10
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
18
- patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
19
- objective: Objective = Objective.MODEL, normalization=None):
20
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
21
- max_depth, patience, objective, normalization)
22
- self.output = output
11
+ max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
12
+ objective: Objective = Objective.MODEL, normalization=None, discretization=None):
13
+ super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
14
+ patience, objective, output, normalization, discretization)
15
+ self.max_depth = max_depth
16
+ self.max_gauss_components = max_gauss_components
23
17
 
24
18
  def search(self):
25
- self.params = self.__search_depth()
19
+ self.params = []
20
+ for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
21
+ self.params += self.__search_algorithm(algorithm)
26
22
 
27
- def __search_depth(self):
23
+ def __search_algorithm(self, algorithm):
28
24
  params = []
29
25
  best = None
30
26
 
31
- for depth in range(1, self.max_depth + 1):
32
- p = self.__search_threshold(depth)
33
- b = Optimizer._best(p)[1]
34
- print()
35
- improvement = self._depth_improvement(
36
- [best[0], best[1]], [b[0], b[1]]
37
- ) if best is not None else np.inf
38
-
39
- best = b
40
- params += p
41
-
42
- if len(params) > 1 and improvement < 1.2:
43
- break
44
- return params
45
-
46
- def __search_threshold(self, depth):
47
- step = self.model_mae / 2.0
48
- threshold = self.model_mae * 0.9
49
- params = []
50
- patience = self.patience
51
- while patience > 0:
52
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
53
- extractor = Extractor.creepy(
54
- self.predictor, depth=depth, error_threshold=threshold, output=self.output,
55
- gauss_components=10, normalization=self.normalization,
56
- clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
57
- )
58
- _ = extractor.extract(self.dataframe)
59
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
60
- extractor.mae(self.dataframe)), extractor.n_rules
61
- print(f"MAE = {mae:.2f}, {n} rules")
62
-
63
- if len(params) == 0:
64
- params.append((mae, n, depth, threshold))
65
- threshold += step
66
- continue
67
-
68
- if (n == 1) or (mae == 0.0):
69
- params.append((mae, n, depth, threshold))
27
+ for gauss_components in range(2, self.max_gauss_components + 1):
28
+ data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
29
+ else self.dataframe
30
+ current_params = self.__search_components(data, algorithm, gauss_components)
31
+ current_best = self._best(current_params)[1]
32
+ if best is not None and self._score(best) <= self._score(current_best):
70
33
  break
34
+ best = current_best
35
+ params += current_params
71
36
 
72
- if mae > params[0][0] * self.max_mae_increase:
73
- break
74
-
75
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
76
-
77
- if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
78
- patience -= 1
79
- step = max(step, abs(mae - threshold) / max(patience, 1))
80
- if mae != params[-1][0] or n != params[-1][1]:
81
- params.append((mae, n, depth, threshold))
82
- threshold += step
83
37
  return params
84
38
 
39
+ def __search_components(self, data, algorithm, gauss_components):
40
+ orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
41
+ self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
42
+ self.normalization, self.discretization)
43
+ orchid.search()
44
+ return [(*p, gauss_components, algorithm) for p in orchid.params]
45
+
85
46
  def _print_params(self, name, params):
86
- print("**********************")
47
+ print("*****************************")
87
48
  print(f"Best {name}")
88
- print("**********************")
49
+ print("*****************************")
89
50
  print(f"MAE = {params[0]:.2f}, {params[1]} rules")
51
+ print(f"Algorithm = {params[5]}")
90
52
  print(f"Threshold = {params[3]:.2f}")
91
53
  print(f"Depth = {params[2]}")
54
+ print(f"Gaussian components = {params[4]}")
@@ -4,53 +4,50 @@ import numpy as np
4
4
  import pandas as pd
5
5
 
6
6
  from psyke import Clustering, EvaluableModel
7
- from psyke.tuning import Optimizer
7
+ from psyke.tuning import Optimizer, IterativeOptimizer
8
8
  from psyke.utils import Target
9
9
 
10
10
 
11
- class OrCHiD(Optimizer):
11
+ class OrCHiD(IterativeOptimizer):
12
12
  class Algorithm(Enum):
13
13
  ExACT = 1,
14
14
  CREAM = 2
15
15
 
16
16
  def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
17
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
- patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
19
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- patience, normalization, discretization)
21
- self.max_depth = max_depth
17
+ max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
+ patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
19
+ super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
20
+ output, normalization, discretization)
21
+ self.algorithm = algorithm
22
+ self.gauss_components = gauss_components
22
23
 
23
24
  def search(self):
24
25
  self.params = self.__search_depth()
25
26
 
26
27
  def __search_depth(self):
27
- params = []
28
- best = None
28
+ params, best = [], None
29
29
 
30
30
  for depth in range(1, self.max_depth + 1):
31
- p = self.__search_threshold(depth)
32
- b = Optimizer._best(p)[1]
31
+ current_params = self.__search_threshold(depth)
32
+ current_best = self._best(current_params)[1]
33
33
  print()
34
- improvement = self._depth_improvement(
35
- [best[0], best[1]], [b[0], b[1]]
36
- ) if best is not None else np.inf
37
-
38
- best = b
39
- params += p
34
+ best, to_break = self._check_iteration_improvement(best, current_best)
35
+ params += current_params
40
36
 
41
- if len(params) > 1 and improvement < 1.2:
37
+ if len(params) > 1 and to_break:
42
38
  break
43
39
  return params
44
40
 
45
41
  def __search_threshold(self, depth):
46
42
  step = 1.0
47
- threshold = 1.0 # self.max_mae_increase * 0.9
43
+ threshold = 1.0
48
44
  params = []
49
45
  patience = self.patience
50
46
  while patience > 0:
51
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
47
+ print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
48
+ f"Gaussian components = {self.gauss_components}. ", end="")
52
49
  clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
53
- depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
50
+ depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
54
51
  )
55
52
  clustering.fit(self.dataframe)
56
53
  task, metric = \
@@ -72,7 +69,7 @@ class OrCHiD(Optimizer):
72
69
  params.append((p, n, depth, threshold))
73
70
  break
74
71
 
75
- if p > params[0][0] * self.max_mae_increase:
72
+ if p > params[0][0] * self.max_error_increase:
76
73
  break
77
74
 
78
75
  improvement = (params[-1][0] / p) + (1 - n / params[-1][1])
@@ -1,30 +1,52 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from enum import Enum
4
- from psyke import Extractor
4
+
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ from psyke import Extractor, Target
5
8
  from psyke.extraction.hypercubic import Grid, FeatureRanker
6
9
  from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
7
- from psyke.tuning import Objective, GridOptimizer
10
+ from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
8
11
 
9
12
 
10
- class PEDRO(GridOptimizer):
13
+ class PEDRO(SKEOptimizer, IterativeOptimizer):
11
14
  class Algorithm(Enum):
12
15
  GRIDEX = 1,
13
16
  GRIDREX = 2
14
17
 
15
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
18
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
16
19
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
17
20
  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
18
- normalization=None):
19
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- max_depth, patience, objective, normalization)
21
+ output: Target = Target.CONSTANT, normalization=None, discretization=None):
22
+ SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
23
+ readability_tradeoff, patience, objective, output, normalization, discretization)
24
+ IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
25
+ max_depth, patience, output, normalization, discretization)
26
+ self.algorithm = algorithm
21
27
  self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
22
- self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
23
- self.dataframe.iloc[:, -1].values).mean()
28
+ predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
29
+ expected = self.dataframe.iloc[:, -1].values
30
+ self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
31
+ abs(predictions - expected).mean()
32
+
33
+ def _search_depth(self, strategy, critical, max_partitions):
34
+ params, best = [], None
35
+
36
+ for iterations in range(self.max_depth):
37
+ current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
38
+ current_best = self._best(current_params)[1]
39
+ print()
40
+ best, to_break = self._check_iteration_improvement(best, current_best)
41
+ params += current_params
42
+
43
+ if len(params) > 1 and to_break:
44
+ break
45
+ return params
24
46
 
25
47
  def __search_threshold(self, grid, critical, max_partitions):
26
- step = self.model_mae / 2.0
27
- threshold = self.model_mae * 0.5
48
+ step = self.error / 2.0
49
+ threshold = self.error * 0.5
28
50
  params = []
29
51
  patience = self.patience
30
52
  while patience > 0:
@@ -33,12 +55,14 @@ class PEDRO(GridOptimizer):
33
55
  if self.algorithm == PEDRO.Algorithm.GRIDREX \
34
56
  else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
35
57
  _ = extractor.extract(self.dataframe)
36
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
37
- extractor.mae(self.dataframe)), extractor.n_rules
38
- print("MAE = {:.2f}, {} rules".format(mae, n))
58
+ error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
59
+ else extractor.mae
60
+ error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
61
+ error_function(self.dataframe)), extractor.n_rules
62
+ print("MAE = {:.2f}, {} rules".format(error, n))
39
63
 
40
64
  if len(params) == 0:
41
- params.append((mae, n, threshold, grid))
65
+ params.append((error, n, threshold, grid))
42
66
  threshold += step
43
67
  continue
44
68
 
@@ -46,44 +70,24 @@ class PEDRO(GridOptimizer):
46
70
  break
47
71
 
48
72
  if n == 1:
49
- params.append((mae, n, threshold, grid))
73
+ params.append((error, n, threshold, grid))
50
74
  break
51
75
 
52
- if mae > params[0][0] * self.max_mae_increase:
76
+ if error > params[0][0] * self.max_error_increase:
53
77
  break
54
78
 
55
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
79
+ improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
56
80
 
57
81
  if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
58
82
  patience -= 1
59
- step = max(step, abs(mae - threshold) / max(patience, 1))
83
+ step = max(step, abs(error - threshold) / max(patience, 1))
60
84
  elif not critical:
61
85
  patience = self.patience
62
- if mae != params[-1][0] or n != params[-1][1]:
63
- params.append((mae, n, threshold, grid))
86
+ if error != params[-1][0] or n != params[-1][1]:
87
+ params.append((error, n, threshold, grid))
64
88
  threshold += step
65
89
  return params
66
90
 
67
- def __search_depth(self, strategy, critical, max_partitions):
68
- params = []
69
- best = None
70
-
71
- for iterations in range(self.max_depth):
72
- grid = Grid(iterations + 1, strategy)
73
- p = self.__search_threshold(grid, critical, max_partitions)
74
- b = GridOptimizer._best(p)[1]
75
- print()
76
- improvement = self._depth_improvement(
77
- [best[0], best[1]], [b[0], b[1]]
78
- ) if best is not None else np.inf
79
-
80
- best = b
81
- params += p
82
-
83
- if len(params) > 1 and improvement < 1.2:
84
- break
85
- return params
86
-
87
91
  def __contains(self, strategies, strategy):
88
92
  for s in strategies:
89
93
  if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -116,16 +120,16 @@ class PEDRO(GridOptimizer):
116
120
 
117
121
  params = []
118
122
  for strategy in strategies:
119
- params += self.__search_depth(strategy,
120
- strategy.partition_number(self.dataframe.columns[:-1]) > avg,
121
- base_partitions * 3)
123
+ params += self._search_depth(strategy,
124
+ strategy.partition_number(self.dataframe.columns[:-1]) > avg,
125
+ base_partitions * 3)
122
126
  self.params = params
123
127
 
124
128
  def _print_params(self, name, params):
125
129
  print("**********************")
126
130
  print(f"Best {name}")
127
131
  print("**********************")
128
- print(f"MAE = {params[0]:.2f}, {params[1]} rules")
132
+ print(f"Error = {params[0]:.2f}, {params[1]} rules")
129
133
  print(f"Threshold = {params[2]:.2f}")
130
134
  print(f"Iterations = {params[3].iterations}")
131
135
  print(f"Strategy = {params[3].strategy}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.5.2.dev3
3
+ Version: 0.5.4.dev1
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -1,9 +1,9 @@
1
- psyke/__init__.py,sha256=u1B61ld8R6c8IrVu8cnw1SO1hiamXxYRg_ffS41HTrU,17194
2
- psyke/hypercubepredictor.py,sha256=ngy0k5ESck3FDwq8UViGw8wEdwm0uU1IJjSCxm_MSrA,4147
3
- psyke/clustering/__init__.py,sha256=lST9WKPZ5fT7sgHHk7BJi3OBd1_8douSA01FRl_xH7s,529
1
+ psyke/__init__.py,sha256=oi97R35NM2IvZ5kjm89sSXuMGFmd9RZuxlgniVywVuo,17575
2
+ psyke/hypercubepredictor.py,sha256=AEhpPzCxqIRUOtAUw-jZ9XueNtCf1zsFSPBvRzLSG6c,4229
3
+ psyke/clustering/__init__.py,sha256=36MokTVwwWR_-o0mesvXHaYEYVTK2pn2m0ZY4G3Y3qU,581
4
4
  psyke/clustering/utils.py,sha256=S0YwCKyHVYp9qUAQVzCMrTwcQFPJ5TD14Jwn10DE-Z4,1616
5
- psyke/clustering/cream/__init__.py,sha256=YDlhlDfrBnop3-1GjEJeFYNlPM68YPhM9Kb7TA1psi8,2864
6
- psyke/clustering/exact/__init__.py,sha256=txIseVHlFMBWcDifMc9mFYdcnIi3W3n3SQ2H1WRfNUc,5066
5
+ psyke/clustering/cream/__init__.py,sha256=W6k7vdjuUdA_azYA4vb5JtpWrofhDJ0DbM2jsnRKzfw,2994
6
+ psyke/clustering/exact/__init__.py,sha256=GpMGOcN2bGn3wfaUKOdis3vnLEtAx9j886qsk-O4N7k,5243
7
7
  psyke/extraction/__init__.py,sha256=_-j8zrRqulumYLmlxJ6qUxKmzT4epZu39kpZZIfLC4s,1622
8
8
  psyke/extraction/cart/__init__.py,sha256=IilEP4DxSAK9_x5ehPTvopuwlQqBMpGMiNRo-f90rqQ,4179
9
9
  psyke/extraction/cart/predictor.py,sha256=2-2mv5fI0lTwwfTaEonxKh0ZUdhxuIEE6OP_rJxgmqc,3019
@@ -12,7 +12,7 @@ psyke/extraction/hypercubic/hypercube.py,sha256=o98MA6yJNSw4DaV9PkLTtowwCMA2V64u
12
12
  psyke/extraction/hypercubic/strategy.py,sha256=X-roIsfcpJyMdo2px5JtbhP7-XE-zUNkaEK7XGXoWA8,1636
13
13
  psyke/extraction/hypercubic/utils.py,sha256=D2FN5CCm_T3h23DmLFoTnIcFo7LvIq__ktl4hjUqkcA,1525
14
14
  psyke/extraction/hypercubic/cosmik/__init__.py,sha256=8eVz_mZizIVU-AkE-FuGG3YBtQsrN3WFXjY-tZzY7Wc,1778
15
- psyke/extraction/hypercubic/creepy/__init__.py,sha256=SBgnc4iKHwXRulDijJnNW3eIDHtkvpA6bzG6N1E97i8,2255
15
+ psyke/extraction/hypercubic/creepy/__init__.py,sha256=pG8O1IH-x14OWRxPUbU8w4N59XYGfpvpfmWShHwKFiY,2410
16
16
  psyke/extraction/hypercubic/divine/__init__.py,sha256=-MO-uWeDkGZDTYu87puEuUi85Mmefo-HYRcA8Jn4K0Q,3496
17
17
  psyke/extraction/hypercubic/gridex/__init__.py,sha256=al2tBUc2YHsiMtu2T4mTNB_-8wY4rqYjV1AYqRdiNoY,5636
18
18
  psyke/extraction/hypercubic/gridrex/__init__.py,sha256=RtPJ5Nokcbk2H9pJAMvua3VzYOnT0HPakbPD4uAfEFk,562
@@ -22,18 +22,18 @@ psyke/extraction/real/utils.py,sha256=eHGU-Y0inn_8jrk9lMcuRUKXpsTkI-s_myXSWz4bAL
22
22
  psyke/extraction/trepan/__init__.py,sha256=1aiV7nZa4qGJhF8vASCeakzyV_vr-ojeO7ONH7oAj0Y,6640
23
23
  psyke/extraction/trepan/utils.py,sha256=iSUJ1ooNQT_VO1KfBZuIUeUsyUbGdQf_pSEE87vMeQg,2320
24
24
  psyke/schema/__init__.py,sha256=gOUWx3gYSkRehlJ5opK0Q16-Tv5fwSTl19k7kzIHALU,15760
25
- psyke/tuning/__init__.py,sha256=f3NS883R5RXinqd7EGEeb0kisY5SwSxAcvzbtLPdKG4,2886
26
- psyke/tuning/crash/__init__.py,sha256=EH129fDnYM3u-6DqqJAhlhETNdiBQ9LNPGGtOm30I_s,3450
27
- psyke/tuning/orchid/__init__.py,sha256=1CvqdQoRNZt23zQSPnqFIEzDU4xeN1Yk296HdEg1_sE,3513
28
- psyke/tuning/pedro/__init__.py,sha256=4E6nCjIu0OEO8OK7yyGQKzO5o8Kbc34IDLpTRDiGYrk,5567
25
+ psyke/tuning/__init__.py,sha256=I-07lLZb02DoIm9AGXPPPOkB55ANu8RU4TMy2j30Pxg,3574
26
+ psyke/tuning/crash/__init__.py,sha256=1c806aCGnRI9mwhDxXamgieX-d4U9i5BV1RqLlF3cho,2535
27
+ psyke/tuning/orchid/__init__.py,sha256=s64iABbteik27CrRPHSVHNZX25JKlDu7YYjhseOizxw,3618
28
+ psyke/tuning/pedro/__init__.py,sha256=cyPPLHmauJw0BEj7Ph8Fvg7hpoRyrknXtUrUn3ubH-o,6180
29
29
  psyke/utils/__init__.py,sha256=F-fgBT9CkthIwW8dDCuF5OoQDVMBNvIsZyvNqkgZNUA,1767
30
30
  psyke/utils/dataframe.py,sha256=cPbCl_paACCtO0twCiHKUcEKIYiT89WDwQ-f5I9oKrg,6841
31
31
  psyke/utils/logic.py,sha256=7bbW6qcKof5PlqoQ0n5Kt3Obcot-KqGAvpE8rMXvEPE,12419
32
32
  psyke/utils/metrics.py,sha256=Oo5BOonOSfo0qYsXWT5dmypZ7jiStByFC2MKEU0uMHg,2250
33
33
  psyke/utils/plot.py,sha256=HVk0psjispUTUQ0do6jnlEUrdZ75q7RQkz7jsj7JUWM,7541
34
34
  psyke/utils/sorted.py,sha256=C3CPW2JisND30BRk5c1sAAHs3Lb_wsRB2qZrYFuRnfM,678
35
- psyke-0.5.2.dev3.dist-info/LICENSE,sha256=KP9K6Hgezf_xdMFW7ORyKz9uA8Y8k52YJn292wcP-_E,11354
36
- psyke-0.5.2.dev3.dist-info/METADATA,sha256=a_gn7Yi-R2fyo2RVDCK-7l3_u7TVQvoB0gJJGx0GiDI,8107
37
- psyke-0.5.2.dev3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
38
- psyke-0.5.2.dev3.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
39
- psyke-0.5.2.dev3.dist-info/RECORD,,
35
+ psyke-0.5.4.dev1.dist-info/LICENSE,sha256=KP9K6Hgezf_xdMFW7ORyKz9uA8Y8k52YJn292wcP-_E,11354
36
+ psyke-0.5.4.dev1.dist-info/METADATA,sha256=yPQIViGqjVMPeYvei9K2jvi61Pmu7RFmCI0WQ4TYRNQ,8107
37
+ psyke-0.5.4.dev1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
38
+ psyke-0.5.4.dev1.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
39
+ psyke-0.5.4.dev1.dist-info/RECORD,,