psyke 0.5.2.dev4__tar.gz → 0.5.4.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show
  1. {psyke-0.5.2.dev4/psyke.egg-info → psyke-0.5.4.dev1}/PKG-INFO +1 -1
  2. psyke-0.5.4.dev1/VERSION +1 -0
  3. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/__init__.py +13 -11
  4. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/__init__.py +2 -2
  5. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/cream/__init__.py +4 -4
  6. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/exact/__init__.py +6 -3
  7. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/creepy/__init__.py +5 -3
  8. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/hypercubepredictor.py +5 -4
  9. psyke-0.5.4.dev1/psyke/tuning/__init__.py +87 -0
  10. psyke-0.5.4.dev1/psyke/tuning/crash/__init__.py +54 -0
  11. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/tuning/orchid/__init__.py +19 -22
  12. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/tuning/pedro/__init__.py +49 -45
  13. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1/psyke.egg-info}/PKG-INFO +1 -1
  14. psyke-0.5.2.dev4/VERSION +0 -1
  15. psyke-0.5.2.dev4/psyke/tuning/__init__.py +0 -77
  16. psyke-0.5.2.dev4/psyke/tuning/crash/__init__.py +0 -91
  17. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/LICENSE +0 -0
  18. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/MANIFEST.in +0 -0
  19. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/README.md +0 -0
  20. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/utils.py +0 -0
  21. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/__init__.py +0 -0
  22. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/cart/__init__.py +0 -0
  23. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/cart/predictor.py +0 -0
  24. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/__init__.py +0 -0
  25. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/cosmik/__init__.py +0 -0
  26. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/divine/__init__.py +0 -0
  27. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  28. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/gridrex/__init__.py +0 -0
  29. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/hypercube.py +0 -0
  30. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  31. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/strategy.py +0 -0
  32. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/utils.py +0 -0
  33. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/real/__init__.py +0 -0
  34. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/real/utils.py +0 -0
  35. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/trepan/__init__.py +0 -0
  36. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/trepan/utils.py +0 -0
  37. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/schema/__init__.py +0 -0
  38. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/__init__.py +0 -0
  39. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/dataframe.py +0 -0
  40. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/logic.py +0 -0
  41. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/metrics.py +0 -0
  42. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/plot.py +0 -0
  43. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/utils/sorted.py +0 -0
  44. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke.egg-info/SOURCES.txt +0 -0
  45. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke.egg-info/dependency_links.txt +0 -0
  46. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke.egg-info/not-zip-safe +0 -0
  47. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke.egg-info/requires.txt +0 -0
  48. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke.egg-info/top_level.txt +0 -0
  49. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/pyproject.toml +0 -0
  50. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/setup.cfg +0 -0
  51. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/setup.py +0 -0
  52. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/__init__.py +0 -0
  53. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/clustering/__init__.py +0 -0
  54. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/__init__.py +0 -0
  55. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/cart/__init__.py +0 -0
  56. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/cart/test_cart.py +0 -0
  57. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/cart/test_simplified_cart.py +0 -0
  58. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/__init__.py +0 -0
  59. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  60. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
  61. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  62. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
  63. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/hypercubic/test_hypercube.py +0 -0
  64. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/real/__init__.py +0 -0
  65. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/real/test_real.py +0 -0
  66. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/real/test_rule.py +0 -0
  67. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/trepan/__init__.py +0 -0
  68. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/trepan/test_node.py +0 -0
  69. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/trepan/test_split.py +0 -0
  70. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/extraction/trepan/test_trepan.py +0 -0
  71. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/utils/__init__.py +0 -0
  72. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/utils/test_prune.py +0 -0
  73. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/utils/test_simplify.py +0 -0
  74. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/psyke/utils/test_simplify_formatter.py +0 -0
  75. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/resources/__init__.py +0 -0
  76. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/resources/datasets/__init__.py +0 -0
  77. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/resources/predictors/__init__.py +0 -0
  78. {psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/test/resources/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.5.2.dev4
3
+ Version: 0.5.4.dev1
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -0,0 +1 @@
1
+ 0.5.4.dev1
@@ -44,9 +44,9 @@ class EvaluableModel(object):
44
44
  V = 3,
45
45
  FMI = 4
46
46
 
47
- def __init__(self, normalization=None, discretization=None):
48
- self.normalization = normalization
47
+ def __init__(self, discretization=None, normalization=None):
49
48
  self.discretization = discretization
49
+ self.normalization = normalization
50
50
 
51
51
  def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
52
52
  """
@@ -312,13 +312,14 @@ class Extractor(EvaluableModel, ABC):
312
312
  @staticmethod
313
313
  def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
314
314
  gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
315
- discretization=None, normalization: dict[str, tuple[float, float]] = None) -> Extractor:
315
+ discretization=None, normalization: dict[str, tuple[float, float]] = None,
316
+ seed: int = get_default_random_seed()) -> Extractor:
316
317
  """
317
318
  Creates a new CReEPy extractor.
318
319
  """
319
320
  from psyke.extraction.hypercubic.creepy import CReEPy
320
321
  return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
321
- discretization, normalization, clustering)
322
+ discretization, normalization, clustering, seed)
322
323
 
323
324
  @staticmethod
324
325
  def real(predictor, discretization=None) -> Extractor:
@@ -341,8 +342,8 @@ class Extractor(EvaluableModel, ABC):
341
342
 
342
343
 
343
344
  class Clustering(EvaluableModel, ABC):
344
- def __init__(self, normalization=None):
345
- super().__init__(normalization)
345
+ def __init__(self, discretization=None, normalization=None):
346
+ super().__init__(discretization, normalization)
346
347
 
347
348
  def fit(self, dataframe: pd.DataFrame):
348
349
  raise NotImplementedError('fit')
@@ -351,18 +352,19 @@ class Clustering(EvaluableModel, ABC):
351
352
  raise NotImplementedError('explain')
352
353
 
353
354
  @staticmethod
354
- def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
355
- gauss_components: int = 2) -> Clustering:
355
+ def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
356
+ discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
356
357
  """
357
358
  Creates a new ExACT instance.
358
359
  """
359
360
  from psyke.clustering.exact import ExACT
360
- return ExACT(depth, error_threshold, output, gauss_components)
361
+ return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
361
362
 
362
363
  @staticmethod
363
- def cream(depth: int, error_threshold: float, output, gauss_components: int = 2) -> Clustering:
364
+ def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
365
+ discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
364
366
  """
365
367
  Creates a new CREAM instance.
366
368
  """
367
369
  from psyke.clustering.cream import CREAM
368
- return CREAM(depth, error_threshold, output, gauss_components)
370
+ return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
@@ -8,8 +8,8 @@ from psyke.hypercubepredictor import HyperCubePredictor
8
8
 
9
9
  class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
10
10
 
11
- def __init__(self, output: Target = Target.CONSTANT, normalization=None):
12
- HyperCubePredictor.__init__(self, output=output, normalization=normalization)
11
+ def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
12
+ HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
13
13
 
14
14
  def get_hypercubes(self) -> Iterable[HyperCube]:
15
15
  raise NotImplementedError('get_hypercubes')
@@ -5,7 +5,7 @@ from typing import Iterable
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
- import psyke.utils
8
+ from psyke.utils import Target, get_default_random_seed
9
9
  from psyke.clustering.exact import ExACT
10
10
  from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
11
11
  from psyke.clustering.utils import select_gaussian_mixture
@@ -16,9 +16,9 @@ class CREAM(ExACT):
16
16
  Explanator implementing CREAM algorithm.
17
17
  """
18
18
 
19
- def __init__(self, depth: int, error_threshold: float,
20
- output: psyke.utils.Target = psyke.utils.Target.CONSTANT, gauss_components: int = 5):
21
- super().__init__(depth, error_threshold, output, gauss_components)
19
+ def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
20
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
21
+ super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
22
22
 
23
23
  def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
24
24
  cubes = []
@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
13
13
  from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
14
14
  from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
15
15
  from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
16
- from psyke.utils import Target
16
+ from psyke.utils import Target, get_default_random_seed
17
17
 
18
18
 
19
19
  class ExACT(HyperCubeClustering, ABC):
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
22
22
  """
23
23
 
24
24
  def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
25
- gauss_components: int = 2, normalization=None):
26
- super().__init__(output, normalization)
25
+ gauss_components: int = 2, discretization=None, normalization=None,
26
+ seed: int = get_default_random_seed()):
27
+ super().__init__(output, discretization, normalization)
27
28
  self.depth = depth
28
29
  self.error_threshold = error_threshold
29
30
  self.gauss_components = gauss_components
30
31
  self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
31
32
  self._predictor.n_neighbors = 1
33
+ self.seed = seed
32
34
 
33
35
  def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
34
36
  cubes = []
@@ -56,6 +58,7 @@ class ExACT(HyperCubeClustering, ABC):
56
58
  )
57
59
 
58
60
  def fit(self, dataframe: pd.DataFrame):
61
+ np.random.seed(self.seed)
59
62
  self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
60
63
  self._hypercubes = \
61
64
  self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
@@ -9,7 +9,7 @@ from tuprolog.theory import Theory
9
9
  from psyke import Clustering
10
10
  from psyke.clustering import HyperCubeClustering
11
11
  from psyke.extraction.hypercubic import HyperCubeExtractor
12
- from psyke.utils import Target
12
+ from psyke.utils import Target, get_default_random_seed
13
13
  from psyke.utils.logic import last_in_body
14
14
 
15
15
 
@@ -20,10 +20,12 @@ class CReEPy(HyperCubeExtractor):
20
20
 
21
21
  def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
22
22
  gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
23
- discretization=None, normalization=None, clustering=Clustering.exact):
23
+ discretization=None, normalization=None, clustering=Clustering.exact,
24
+ seed: int = get_default_random_seed()):
24
25
  super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
25
26
  discretization, normalization)
26
- self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
27
+ self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
28
+ normalization, seed)
27
29
  self.ranks = ranks
28
30
  self.ignore_threshold = ignore_threshold
29
31
 
@@ -11,17 +11,18 @@ from psyke.extraction.hypercubic import RegressionCube, GenericCube, Point
11
11
 
12
12
 
13
13
  class HyperCubePredictor(EvaluableModel):
14
- def __init__(self, output=Target.CONSTANT, normalization=None):
15
- super().__init__(normalization)
14
+ def __init__(self, output=Target.CONSTANT, discretization=None, normalization=None):
15
+ super().__init__(discretization, normalization)
16
16
  self._hypercubes = []
17
17
  self._output = output
18
+ self._surrounding = None
18
19
 
19
20
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
20
21
  return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
21
22
 
22
23
  def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
23
24
  mapping: dict[str: int] = None) -> Iterable:
24
- predictions = self._predict(dataframe)
25
+ predictions = np.array(self._predict(dataframe))
25
26
  idx = [prediction is None for prediction in predictions]
26
27
  if sum(idx) > 0:
27
28
  if criterion == 'default':
@@ -46,7 +47,7 @@ class HyperCubePredictor(EvaluableModel):
46
47
 
47
48
  def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
48
49
  distances = [(
49
- cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
50
+ cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
50
51
  ) for cube in self._hypercubes]
51
52
  return min(distances)[-1]
52
53
 
@@ -0,0 +1,87 @@
1
+ from abc import ABC
2
+ from enum import Enum
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from psyke.extraction.hypercubic import Grid
7
+ from psyke.utils import Target
8
+
9
+
10
+ class Objective(Enum):
11
+ MODEL = 1,
12
+ DATA = 2
13
+
14
+
15
+ class Optimizer:
16
+ def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
17
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
18
+ normalization=None, discretization=None):
19
+ self.dataframe = dataframe
20
+ self.output = output
21
+ self.max_error_increase = max_error_increase
22
+ self.min_rule_decrease = min_rule_decrease
23
+ self.readability_tradeoff = readability_tradeoff
24
+ self.patience = patience
25
+ self.params = None
26
+ self.normalization = normalization
27
+ self.discretization = discretization
28
+
29
+ def search(self):
30
+ raise NotImplementedError
31
+
32
+ def _best(self, params):
33
+ param_dict = {self._score(t): t for t in params}
34
+ min_param = min(param_dict)
35
+ return min_param, param_dict[min_param]
36
+
37
+ def _score(self, param):
38
+ return param[0] * np.ceil(param[1] * self.readability_tradeoff)
39
+
40
+ def _best_param(self, param):
41
+ param_dict = {t[param]: t for t in self.params}
42
+ min_param = min(param_dict)
43
+ return min_param, param_dict[min_param]
44
+
45
+ def get_best(self):
46
+ names = ["Combined", "Predictive loss", "N rules"]
47
+ params = [self._best(self.params), self._best_param(0), self._best_param(1)]
48
+ for n, p in zip(names, params):
49
+ self._print_params(n, p[1])
50
+ print()
51
+ return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
52
+
53
+ def _print_params(self, n, param):
54
+ raise NotImplementedError
55
+
56
+
57
+ class SKEOptimizer(Optimizer, ABC):
58
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
59
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
60
+ objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
61
+ normalization=None, discretization=None):
62
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
63
+ patience, normalization, discretization)
64
+ self.predictor = predictor
65
+ self.objective = objective
66
+
67
+
68
+ class IterativeOptimizer(Optimizer, ABC):
69
+ def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
70
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
+ patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
72
+ super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
73
+ patience, normalization, discretization)
74
+ self.max_depth = max_depth
75
+
76
+ def _iteration_improvement(self, best, other):
77
+ if other[0] == best[0]:
78
+ return (best[1] - other[1]) * 2
79
+ return 1 / (
80
+ (1 - other[0] / best[0]) ** self.readability_tradeoff *
81
+ np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
82
+ )
83
+
84
+ def _check_iteration_improvement(self, best, current):
85
+ improvement = \
86
+ self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
87
+ return current, improvement < 1.2
@@ -0,0 +1,54 @@
1
+ import pandas as pd
2
+
3
+ from psyke.tuning import Objective, SKEOptimizer
4
+ from psyke.tuning.orchid import OrCHiD
5
+ from psyke.utils import Target
6
+
7
+
8
+ class CRASH(SKEOptimizer):
9
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
10
+ min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
11
+ max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
12
+ objective: Objective = Objective.MODEL, normalization=None, discretization=None):
13
+ super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
14
+ patience, objective, output, normalization, discretization)
15
+ self.max_depth = max_depth
16
+ self.max_gauss_components = max_gauss_components
17
+
18
+ def search(self):
19
+ self.params = []
20
+ for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
21
+ self.params += self.__search_algorithm(algorithm)
22
+
23
+ def __search_algorithm(self, algorithm):
24
+ params = []
25
+ best = None
26
+
27
+ for gauss_components in range(2, self.max_gauss_components + 1):
28
+ data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
29
+ else self.dataframe
30
+ current_params = self.__search_components(data, algorithm, gauss_components)
31
+ current_best = self._best(current_params)[1]
32
+ if best is not None and self._score(best) <= self._score(current_best):
33
+ break
34
+ best = current_best
35
+ params += current_params
36
+
37
+ return params
38
+
39
+ def __search_components(self, data, algorithm, gauss_components):
40
+ orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
41
+ self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
42
+ self.normalization, self.discretization)
43
+ orchid.search()
44
+ return [(*p, gauss_components, algorithm) for p in orchid.params]
45
+
46
+ def _print_params(self, name, params):
47
+ print("*****************************")
48
+ print(f"Best {name}")
49
+ print("*****************************")
50
+ print(f"MAE = {params[0]:.2f}, {params[1]} rules")
51
+ print(f"Algorithm = {params[5]}")
52
+ print(f"Threshold = {params[3]:.2f}")
53
+ print(f"Depth = {params[2]}")
54
+ print(f"Gaussian components = {params[4]}")
@@ -4,53 +4,50 @@ import numpy as np
4
4
  import pandas as pd
5
5
 
6
6
  from psyke import Clustering, EvaluableModel
7
- from psyke.tuning import Optimizer
7
+ from psyke.tuning import Optimizer, IterativeOptimizer
8
8
  from psyke.utils import Target
9
9
 
10
10
 
11
- class OrCHiD(Optimizer):
11
+ class OrCHiD(IterativeOptimizer):
12
12
  class Algorithm(Enum):
13
13
  ExACT = 1,
14
14
  CREAM = 2
15
15
 
16
16
  def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
17
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
- patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
19
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- patience, normalization, discretization)
21
- self.max_depth = max_depth
17
+ max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
18
+ patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
19
+ super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
20
+ output, normalization, discretization)
21
+ self.algorithm = algorithm
22
+ self.gauss_components = gauss_components
22
23
 
23
24
  def search(self):
24
25
  self.params = self.__search_depth()
25
26
 
26
27
  def __search_depth(self):
27
- params = []
28
- best = None
28
+ params, best = [], None
29
29
 
30
30
  for depth in range(1, self.max_depth + 1):
31
- p = self.__search_threshold(depth)
32
- b = Optimizer._best(p)[1]
31
+ current_params = self.__search_threshold(depth)
32
+ current_best = self._best(current_params)[1]
33
33
  print()
34
- improvement = self._depth_improvement(
35
- [best[0], best[1]], [b[0], b[1]]
36
- ) if best is not None else np.inf
37
-
38
- best = b
39
- params += p
34
+ best, to_break = self._check_iteration_improvement(best, current_best)
35
+ params += current_params
40
36
 
41
- if len(params) > 1 and improvement < 1.2:
37
+ if len(params) > 1 and to_break:
42
38
  break
43
39
  return params
44
40
 
45
41
  def __search_threshold(self, depth):
46
42
  step = 1.0
47
- threshold = 1.0 # self.max_mae_increase * 0.9
43
+ threshold = 1.0
48
44
  params = []
49
45
  patience = self.patience
50
46
  while patience > 0:
51
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
47
+ print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
48
+ f"Gaussian components = {self.gauss_components}. ", end="")
52
49
  clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
53
- depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
50
+ depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
54
51
  )
55
52
  clustering.fit(self.dataframe)
56
53
  task, metric = \
@@ -72,7 +69,7 @@ class OrCHiD(Optimizer):
72
69
  params.append((p, n, depth, threshold))
73
70
  break
74
71
 
75
- if p > params[0][0] * self.max_mae_increase:
72
+ if p > params[0][0] * self.max_error_increase:
76
73
  break
77
74
 
78
75
  improvement = (params[-1][0] / p) + (1 - n / params[-1][1])
@@ -1,30 +1,52 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from enum import Enum
4
- from psyke import Extractor
4
+
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ from psyke import Extractor, Target
5
8
  from psyke.extraction.hypercubic import Grid, FeatureRanker
6
9
  from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
7
- from psyke.tuning import Objective, GridOptimizer
10
+ from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
8
11
 
9
12
 
10
- class PEDRO(GridOptimizer):
13
+ class PEDRO(SKEOptimizer, IterativeOptimizer):
11
14
  class Algorithm(Enum):
12
15
  GRIDEX = 1,
13
16
  GRIDREX = 2
14
17
 
15
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
18
+ def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
16
19
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
17
20
  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
18
- normalization=None):
19
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
20
- max_depth, patience, objective, normalization)
21
+ output: Target = Target.CONSTANT, normalization=None, discretization=None):
22
+ SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
23
+ readability_tradeoff, patience, objective, output, normalization, discretization)
24
+ IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
25
+ max_depth, patience, output, normalization, discretization)
26
+ self.algorithm = algorithm
21
27
  self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
22
- self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
23
- self.dataframe.iloc[:, -1].values).mean()
28
+ predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
29
+ expected = self.dataframe.iloc[:, -1].values
30
+ self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
31
+ abs(predictions - expected).mean()
32
+
33
+ def _search_depth(self, strategy, critical, max_partitions):
34
+ params, best = [], None
35
+
36
+ for iterations in range(self.max_depth):
37
+ current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
38
+ current_best = self._best(current_params)[1]
39
+ print()
40
+ best, to_break = self._check_iteration_improvement(best, current_best)
41
+ params += current_params
42
+
43
+ if len(params) > 1 and to_break:
44
+ break
45
+ return params
24
46
 
25
47
  def __search_threshold(self, grid, critical, max_partitions):
26
- step = self.model_mae / 2.0
27
- threshold = self.model_mae * 0.5
48
+ step = self.error / 2.0
49
+ threshold = self.error * 0.5
28
50
  params = []
29
51
  patience = self.patience
30
52
  while patience > 0:
@@ -33,12 +55,14 @@ class PEDRO(GridOptimizer):
33
55
  if self.algorithm == PEDRO.Algorithm.GRIDREX \
34
56
  else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
35
57
  _ = extractor.extract(self.dataframe)
36
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
37
- extractor.mae(self.dataframe)), extractor.n_rules
38
- print("MAE = {:.2f}, {} rules".format(mae, n))
58
+ error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
59
+ else extractor.mae
60
+ error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
61
+ error_function(self.dataframe)), extractor.n_rules
62
+ print("MAE = {:.2f}, {} rules".format(error, n))
39
63
 
40
64
  if len(params) == 0:
41
- params.append((mae, n, threshold, grid))
65
+ params.append((error, n, threshold, grid))
42
66
  threshold += step
43
67
  continue
44
68
 
@@ -46,44 +70,24 @@ class PEDRO(GridOptimizer):
46
70
  break
47
71
 
48
72
  if n == 1:
49
- params.append((mae, n, threshold, grid))
73
+ params.append((error, n, threshold, grid))
50
74
  break
51
75
 
52
- if mae > params[0][0] * self.max_mae_increase:
76
+ if error > params[0][0] * self.max_error_increase:
53
77
  break
54
78
 
55
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
79
+ improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
56
80
 
57
81
  if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
58
82
  patience -= 1
59
- step = max(step, abs(mae - threshold) / max(patience, 1))
83
+ step = max(step, abs(error - threshold) / max(patience, 1))
60
84
  elif not critical:
61
85
  patience = self.patience
62
- if mae != params[-1][0] or n != params[-1][1]:
63
- params.append((mae, n, threshold, grid))
86
+ if error != params[-1][0] or n != params[-1][1]:
87
+ params.append((error, n, threshold, grid))
64
88
  threshold += step
65
89
  return params
66
90
 
67
- def __search_depth(self, strategy, critical, max_partitions):
68
- params = []
69
- best = None
70
-
71
- for iterations in range(self.max_depth):
72
- grid = Grid(iterations + 1, strategy)
73
- p = self.__search_threshold(grid, critical, max_partitions)
74
- b = GridOptimizer._best(p)[1]
75
- print()
76
- improvement = self._depth_improvement(
77
- [best[0], best[1]], [b[0], b[1]]
78
- ) if best is not None else np.inf
79
-
80
- best = b
81
- params += p
82
-
83
- if len(params) > 1 and improvement < 1.2:
84
- break
85
- return params
86
-
87
91
  def __contains(self, strategies, strategy):
88
92
  for s in strategies:
89
93
  if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -116,16 +120,16 @@ class PEDRO(GridOptimizer):
116
120
 
117
121
  params = []
118
122
  for strategy in strategies:
119
- params += self.__search_depth(strategy,
120
- strategy.partition_number(self.dataframe.columns[:-1]) > avg,
121
- base_partitions * 3)
123
+ params += self._search_depth(strategy,
124
+ strategy.partition_number(self.dataframe.columns[:-1]) > avg,
125
+ base_partitions * 3)
122
126
  self.params = params
123
127
 
124
128
  def _print_params(self, name, params):
125
129
  print("**********************")
126
130
  print(f"Best {name}")
127
131
  print("**********************")
128
- print(f"MAE = {params[0]:.2f}, {params[1]} rules")
132
+ print(f"Error = {params[0]:.2f}, {params[1]} rules")
129
133
  print(f"Threshold = {params[2]:.2f}")
130
134
  print(f"Iterations = {params[3].iterations}")
131
135
  print(f"Strategy = {params[3].strategy}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.5.2.dev4
3
+ Version: 0.5.4.dev1
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
psyke-0.5.2.dev4/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.5.2.dev4
@@ -1,77 +0,0 @@
1
- from abc import ABC
2
- from enum import Enum
3
- import numpy as np
4
- import pandas as pd
5
-
6
- from psyke.utils import Target
7
-
8
-
9
- class Objective(Enum):
10
- MODEL = 1,
11
- DATA = 2
12
-
13
-
14
- class Optimizer:
15
- def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
16
- max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
17
- readability_tradeoff: float = 0.1, patience: int = 5,
18
- normalization=None, discretization=None):
19
- self.dataframe = dataframe
20
- self.algorithm = algorithm
21
- self.output = output
22
- self.max_mae_increase = max_mae_increase
23
- self.min_rule_decrease = min_rule_decrease
24
- self.readability_tradeoff = readability_tradeoff
25
- self.patience = patience
26
- self.params = None
27
- self.normalization = normalization
28
- self.discretization = discretization
29
-
30
- def search(self):
31
- raise NotImplementedError
32
-
33
- def _depth_improvement(self, best, other):
34
- if other[0] == best[0]:
35
- return (best[1] - other[1]) * 2
36
- return 1 / (
37
- (1 - other[0] / best[0]) ** self.readability_tradeoff *
38
- np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
39
- )
40
-
41
- @staticmethod
42
- def _best(params):
43
- param_dict = {Optimizer.__score(t): t for t in params}
44
- min_param = min(param_dict)
45
- return min_param, param_dict[min_param]
46
-
47
- @staticmethod
48
- def __score(param):
49
- return param[0] * np.ceil(param[1] / 5)
50
-
51
- def _best_param(self, param):
52
- param_dict = {t[param]: t for t in self.params}
53
- min_param = min(param_dict)
54
- return min_param, param_dict[min_param]
55
-
56
- def get_best(self):
57
- names = [self.algorithm, "Predictive loss", "N rules"]
58
- params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
59
- for n, p in zip(names, params):
60
- self._print_params(n, p[1])
61
- print()
62
- return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
63
-
64
- def _print_params(self, n, param):
65
- raise NotImplementedError
66
-
67
-
68
- class GridOptimizer(Optimizer, ABC):
69
- def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
70
- min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
71
- patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
72
- normalization=None, discretization=None):
73
- super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
74
- patience, normalization, discretization)
75
- self.predictor = predictor
76
- self.max_depth = max_depth
77
- self.objective = objective
@@ -1,91 +0,0 @@
1
- from enum import Enum
2
-
3
- import numpy as np
4
- import pandas as pd
5
-
6
- from psyke import Extractor, Clustering
7
- from psyke.tuning import Objective, Optimizer
8
- from psyke.utils import Target
9
-
10
-
11
- class CRASH(Optimizer):
12
- class Algorithm(Enum):
13
- ExACT = 1,
14
- CREAM = 2
15
-
16
- def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
17
- min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
18
- patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
19
- objective: Objective = Objective.MODEL, normalization=None):
20
- super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
21
- max_depth, patience, objective, normalization)
22
- self.output = output
23
-
24
- def search(self):
25
- self.params = self.__search_depth()
26
-
27
- def __search_depth(self):
28
- params = []
29
- best = None
30
-
31
- for depth in range(1, self.max_depth + 1):
32
- p = self.__search_threshold(depth)
33
- b = Optimizer._best(p)[1]
34
- print()
35
- improvement = self._depth_improvement(
36
- [best[0], best[1]], [b[0], b[1]]
37
- ) if best is not None else np.inf
38
-
39
- best = b
40
- params += p
41
-
42
- if len(params) > 1 and improvement < 1.2:
43
- break
44
- return params
45
-
46
- def __search_threshold(self, depth):
47
- step = self.model_mae / 2.0
48
- threshold = self.model_mae * 0.9
49
- params = []
50
- patience = self.patience
51
- while patience > 0:
52
- print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
53
- extractor = Extractor.creepy(
54
- self.predictor, depth=depth, error_threshold=threshold, output=self.output,
55
- gauss_components=10, normalization=self.normalization,
56
- clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
57
- )
58
- _ = extractor.extract(self.dataframe)
59
- mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
60
- extractor.mae(self.dataframe)), extractor.n_rules
61
- print(f"MAE = {mae:.2f}, {n} rules")
62
-
63
- if len(params) == 0:
64
- params.append((mae, n, depth, threshold))
65
- threshold += step
66
- continue
67
-
68
- if (n == 1) or (mae == 0.0):
69
- params.append((mae, n, depth, threshold))
70
- break
71
-
72
- if mae > params[0][0] * self.max_mae_increase:
73
- break
74
-
75
- improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
76
-
77
- if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
78
- patience -= 1
79
- step = max(step, abs(mae - threshold) / max(patience, 1))
80
- if mae != params[-1][0] or n != params[-1][1]:
81
- params.append((mae, n, depth, threshold))
82
- threshold += step
83
- return params
84
-
85
- def _print_params(self, name, params):
86
- print("**********************")
87
- print(f"Best {name}")
88
- print("**********************")
89
- print(f"MAE = {params[0]:.2f}, {params[1]} rules")
90
- print(f"Threshold = {params[3]:.2f}")
91
- print(f"Depth = {params[2]}")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes