psyke 0.6.1.dev4__tar.gz → 0.7.5.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

Files changed (77) hide show
  1. {psyke-0.6.1.dev4/psyke.egg-info → psyke-0.7.5.dev5}/PKG-INFO +3 -3
  2. psyke-0.7.5.dev5/VERSION +1 -0
  3. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/__init__.py +13 -3
  4. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/__init__.py +1 -5
  5. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/__init__.py +23 -5
  6. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/creepy/__init__.py +5 -16
  7. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/gridex/__init__.py +39 -36
  8. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/gridrex/__init__.py +2 -2
  9. psyke-0.7.5.dev5/psyke/extraction/hypercubic/hex/__init__.py +54 -0
  10. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/hypercube.py +7 -4
  11. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/iter/__init__.py +2 -1
  12. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/pedro/__init__.py +9 -6
  13. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/plot.py +5 -2
  14. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5/psyke.egg-info}/PKG-INFO +3 -3
  15. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/SOURCES.txt +1 -0
  16. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/requires.txt +2 -2
  17. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/setup.py +2 -2
  18. psyke-0.6.1.dev4/VERSION +0 -1
  19. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/LICENSE +0 -0
  20. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/MANIFEST.in +0 -0
  21. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/README.md +0 -0
  22. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/__init__.py +0 -0
  23. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/cream/__init__.py +0 -0
  24. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/exact/__init__.py +0 -0
  25. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/utils.py +0 -0
  26. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/cart/__init__.py +0 -0
  27. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/cart/predictor.py +0 -0
  28. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/cosmik/__init__.py +0 -0
  29. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/divine/__init__.py +0 -0
  30. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/strategy.py +0 -0
  31. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/utils.py +0 -0
  32. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/real/__init__.py +0 -0
  33. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/real/utils.py +0 -0
  34. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/trepan/__init__.py +0 -0
  35. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/trepan/utils.py +0 -0
  36. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/hypercubepredictor.py +0 -0
  37. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/schema/__init__.py +0 -0
  38. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/__init__.py +0 -0
  39. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/crash/__init__.py +0 -0
  40. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/orchid/__init__.py +0 -0
  41. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/__init__.py +0 -0
  42. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/dataframe.py +0 -0
  43. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/logic.py +0 -0
  44. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/metrics.py +0 -0
  45. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/sorted.py +0 -0
  46. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/dependency_links.txt +0 -0
  47. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/not-zip-safe +0 -0
  48. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/top_level.txt +0 -0
  49. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/pyproject.toml +0 -0
  50. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/setup.cfg +0 -0
  51. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/__init__.py +0 -0
  52. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/clustering/__init__.py +0 -0
  53. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/__init__.py +0 -0
  54. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/__init__.py +0 -0
  55. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/test_cart.py +0 -0
  56. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/test_simplified_cart.py +0 -0
  57. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/__init__.py +0 -0
  58. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  59. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
  60. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  61. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
  62. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/test_hypercube.py +0 -0
  63. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/__init__.py +0 -0
  64. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/test_real.py +0 -0
  65. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/test_rule.py +0 -0
  66. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/__init__.py +0 -0
  67. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_node.py +0 -0
  68. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_split.py +0 -0
  69. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_trepan.py +0 -0
  70. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/__init__.py +0 -0
  71. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_prune.py +0 -0
  72. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_simplify.py +0 -0
  73. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_simplify_formatter.py +0 -0
  74. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/__init__.py +0 -0
  75. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/datasets/__init__.py +0 -0
  76. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/predictors/__init__.py +0 -0
  77. {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.6.1.dev4
3
+ Version: 0.7.5.dev5
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -23,8 +23,8 @@ Requires-Python: >=3.9.0, <3.10
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: numpy~=1.26.0
26
- Requires-Dist: pandas~=2.1.0
27
- Requires-Dist: scikit-learn~=1.3.0
26
+ Requires-Dist: pandas~=2.2.0
27
+ Requires-Dist: scikit-learn~=1.4.0
28
28
  Requires-Dist: 2ppy~=0.4.0
29
29
  Requires-Dist: kneed~=0.8.1
30
30
  Requires-Dist: sympy~=1.11
@@ -0,0 +1 @@
1
+ 0.7.5.dev5
@@ -291,14 +291,24 @@ class Extractor(EvaluableModel, ABC):
291
291
  normalization, output, seed)
292
292
 
293
293
  @staticmethod
294
- def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
295
- normalization: dict[str, tuple[float, float]] = None,
294
+ def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
295
+ discretization=None, normalization: dict[str, tuple[float, float]] = None,
296
296
  seed: int = get_default_random_seed()) -> Extractor:
297
297
  """
298
298
  Creates a new GridEx extractor.
299
299
  """
300
300
  from psyke.extraction.hypercubic.gridex import GridEx
301
- return GridEx(predictor, grid, min_examples, threshold, normalization, seed)
301
+ return GridEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
302
+
303
+ @staticmethod
304
+ def hex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
305
+ discretization=None, normalization: dict[str, tuple[float, float]] = None,
306
+ seed: int = get_default_random_seed()) -> Extractor:
307
+ """
308
+ Creates a new HEx extractor.
309
+ """
310
+ from psyke.extraction.hypercubic.hex import HEx
311
+ return HEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
302
312
 
303
313
  @staticmethod
304
314
  def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
@@ -26,11 +26,7 @@ class PedagogicalExtractor(Extractor, ABC):
26
26
  new_y = pd.DataFrame(new_y).set_index(dataframe.index)
27
27
  data = dataframe.iloc[:, :-1].copy().join(new_y)
28
28
  data.columns = dataframe.columns
29
- theory = self._extract(data, mapping, sort)
30
- if isinstance(self, HyperCubeExtractor):
31
- self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
32
- self._surrounding.update(dataframe, self.predictor)
33
- return theory
29
+ return self._extract(data, mapping, sort)
34
30
 
35
31
  def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
36
32
  raise NotImplementedError('extract')
@@ -14,7 +14,7 @@ from psyke.extraction import PedagogicalExtractor
14
14
  from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
15
15
  GenericCube
16
16
  from psyke.hypercubepredictor import HyperCubePredictor
17
- from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
17
+ from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body
18
18
  from psyke.utils import Target
19
19
  from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
20
20
 
@@ -24,6 +24,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
24
24
  HyperCubePredictor.__init__(self, output=output, normalization=normalization)
25
25
  PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
26
26
  self._surrounding = None
27
+ self._default_surrounding_cube = False
27
28
 
28
29
  def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
29
30
  if self._output == Target.CONSTANT:
@@ -37,14 +38,30 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
37
38
  cubes.sort()
38
39
  self._hypercubes = [cube[2] for cube in cubes]
39
40
 
41
+ def _last_cube_as_default(self, theory):
42
+ last_clause = list(theory.clauses)[-1]
43
+ theory.retract(last_clause)
44
+ theory.assertZ(clause(
45
+ last_clause.head, [last_in_body(last_clause.body)] if self._output is Target.REGRESSION else []))
46
+ last_cube = self._hypercubes[-1]
47
+ for dimension in last_cube.dimensions.keys():
48
+ last_cube[dimension] = [-np.inf, np.inf]
49
+ return theory
50
+
51
+ def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
52
+ theory = PedagogicalExtractor.extract(self, dataframe, mapping, sort)
53
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
54
+ self._surrounding.update(dataframe, self.predictor)
55
+ return theory
56
+
40
57
  @staticmethod
41
58
  def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
42
59
  return create_head(dataframe.columns[-1], variables[:-1], output) \
43
60
  if not isinstance(output, LinearRegression) else \
44
61
  create_head(dataframe.columns[-1], variables[:-1], variables[-1])
45
62
 
46
- def _ignore_dimensions(self) -> Iterable[str]:
47
- return []
63
+ def _ignore_dimensions(self, cube: HyperCube) -> Iterable[str]:
64
+ return [d for d in cube.dimensions if cube[d][0] == -np.inf or cube[d][1] == np.inf]
48
65
 
49
66
  def __drop(self, dataframe: pd.DataFrame):
50
67
  self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
@@ -59,9 +76,10 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
59
76
  variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
60
77
  head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
61
78
  self.unscale(cube.output, dataframe.columns[-1]))
62
- body = cube.body(variables, self._ignore_dimensions(), self.unscale, self.normalization)
79
+ body = cube.body(variables, self._ignore_dimensions(cube), self.unscale, self.normalization)
63
80
  new_theory.assertZ(clause(head, body))
64
- return HyperCubeExtractor._prettify_theory(new_theory)
81
+ new_theory = HyperCubeExtractor._prettify_theory(new_theory)
82
+ return self._last_cube_as_default(new_theory) if self._default_surrounding_cube else new_theory
65
83
 
66
84
  @staticmethod
67
85
  def _prettify_theory(theory: Theory) -> Theory:
@@ -4,13 +4,11 @@ from collections import Iterable
4
4
  import numpy as np
5
5
  import pandas as pd
6
6
  from sklearn.base import ClassifierMixin
7
- from tuprolog.core import clause
8
7
  from tuprolog.theory import Theory
9
8
  from psyke import Clustering
10
9
  from psyke.clustering import HyperCubeClustering
11
10
  from psyke.extraction.hypercubic import HyperCubeExtractor
12
11
  from psyke.utils import Target, get_default_random_seed
13
- from psyke.utils.logic import last_in_body
14
12
 
15
13
 
16
14
  class CReEPy(HyperCubeExtractor):
@@ -28,6 +26,7 @@ class CReEPy(HyperCubeExtractor):
28
26
  normalization, seed)
29
27
  self.ranks = ranks
30
28
  self.ignore_threshold = ignore_threshold
29
+ self._default_surrounding_cube = True
31
30
 
32
31
  def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
33
32
  if not isinstance(self.clustering, HyperCubeClustering):
@@ -36,17 +35,7 @@ class CReEPy(HyperCubeExtractor):
36
35
  self.clustering.fit(dataframe)
37
36
  self._hypercubes = self.clustering.get_hypercubes()
38
37
  for cube in self._hypercubes:
39
- for dimension in self._ignore_dimensions():
40
- cube[dimension] = [-np.inf, np.inf]
41
- theory = self._create_theory(dataframe)
42
- last_clause = list(theory.clauses)[-1]
43
- theory.retract(last_clause)
44
- theory.assertZ(clause(
45
- last_clause.head, [last_in_body(last_clause.body)] if self._output is Target.REGRESSION else []))
46
- last_cube = self._hypercubes[-1]
47
- for dimension in last_cube.dimensions.keys():
48
- last_cube[dimension] = [-np.inf, np.inf]
49
- return theory
50
-
51
- def _ignore_dimensions(self) -> Iterable[str]:
52
- return [dimension for dimension, relevance in self.ranks if relevance < self.ignore_threshold]
38
+ for dimension, relevance in self.ranks:
39
+ if relevance < self.ignore_threshold:
40
+ cube[dimension] = [-np.inf, np.inf]
41
+ return self._create_theory(dataframe)
@@ -4,6 +4,7 @@ from itertools import product
4
4
  from typing import Iterable
5
5
  import numpy as np
6
6
  import pandas as pd
7
+ from sklearn.base import ClassifierMixin
7
8
  from tuprolog.theory import Theory
8
9
  from psyke import get_default_random_seed
9
10
  from psyke.utils import Target
@@ -15,26 +16,45 @@ class GridEx(HyperCubeExtractor):
15
16
  Explanator implementing GridEx algorithm, doi:10.1007/978-3-030-82017-6_2.
16
17
  """
17
18
 
18
- def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization=None,
19
- seed=get_default_random_seed()):
20
- super().__init__(predictor, Target.CONSTANT, normalization=normalization)
19
+ def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
20
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
21
+ super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
22
+ discretization, normalization)
21
23
  self.grid = grid
22
24
  self.min_examples = min_examples
23
25
  self.threshold = threshold
24
- self.__generator = rnd.Random(seed)
26
+ self._generator = rnd.Random(seed)
25
27
 
26
28
  def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
27
29
  self._hypercubes = []
28
- if isinstance(np.array(self.predictor.predict(dataframe.iloc[0:1, :-1])).flatten()[0], str):
29
- self._output = Target.CLASSIFICATION
30
30
  surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
31
31
  surrounding.init_diversity(2 * self.threshold)
32
32
  self._iterate(surrounding, dataframe)
33
33
  return self._create_theory(dataframe, sort)
34
34
 
35
- def _ignore_dimensions(self) -> Iterable[str]:
36
- cube = self._hypercubes[0]
37
- return [d for d in cube.dimensions if all(c[d] == cube[d] for c in self._hypercubes)]
35
+ def _create_ranges(self, cube, iteration):
36
+ ranges = {}
37
+ for (feature, (a, b)) in cube.dimensions.items():
38
+ n_bins = self.grid.get(feature, iteration)
39
+ if n_bins == 1:
40
+ ranges[feature] = [(-np.inf, np.inf)]
41
+ else:
42
+ size = (b - a) / n_bins
43
+ ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
44
+ return ranges
45
+
46
+ def _cubes_to_split(self, cube, surrounding, iteration, dataframe, fake, keep_empty=False):
47
+ to_split = []
48
+ for (pn, p) in enumerate(list(product(*self._create_ranges(cube, iteration).values()))):
49
+ cube = self._default_cube()
50
+ for i, f in enumerate(dataframe.columns[:-1]):
51
+ cube.update_dimension(f, p[i])
52
+ n = cube.count(dataframe)
53
+ if n > 0 or keep_empty:
54
+ fake = pd.concat([fake, cube.create_samples(self.min_examples - n, surrounding, self._generator)])
55
+ cube.update(fake, self.predictor)
56
+ to_split.append(cube)
57
+ return to_split, fake
38
58
 
39
59
  def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
40
60
  fake = dataframe.copy()
@@ -44,31 +64,13 @@ class GridEx(HyperCubeExtractor):
44
64
  for iteration in self.grid.iterate():
45
65
  next_iteration = []
46
66
  for cube in prev:
47
- to_split = []
48
67
  if cube.count(dataframe) == 0:
49
68
  continue
50
69
  if cube.diversity < self.threshold:
51
70
  self._hypercubes += [cube]
52
71
  continue
53
- ranges = {}
54
- for (feature, (a, b)) in cube.dimensions.items():
55
- bins = []
56
- n_bins = self.grid.get(feature, iteration)
57
- size = (b - a) / n_bins
58
- for i in range(n_bins):
59
- bins.append((a + size * i, a + size * (i + 1)))
60
- ranges[feature] = bins
61
- for (pn, p) in enumerate(list(product(*ranges.values()))):
62
- cube = self._default_cube()
63
- for i, f in enumerate(dataframe.columns[:-1]):
64
- cube.update_dimension(f, p[i])
65
- n = cube.count(dataframe)
66
- if n > 0:
67
- fake = pd.concat([fake, cube.create_samples(self.min_examples - n, self.__generator)])
68
- cube.update(fake, self.predictor)
69
- to_split += [cube]
70
- to_split = self._merge(to_split, fake)
71
- next_iteration += [cube for cube in to_split]
72
+ to_split, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake)
73
+ next_iteration += [c for c in self._merge(to_split, fake)]
72
74
  prev = next_iteration.copy()
73
75
  self._hypercubes += [cube for cube in next_iteration]
74
76
 
@@ -102,15 +104,16 @@ class GridEx(HyperCubeExtractor):
102
104
  not_in_cache = [cube for cube in to_split]
103
105
  adjacent_cache = {}
104
106
  merge_cache = {}
105
- # TODO: refactor this. A while true with a break is as ugly as hunger.
106
- while True:
107
+ cont = True
108
+ while cont:
107
109
  to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
108
110
  GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
109
111
  self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
110
112
  if len(to_merge) == 0:
111
- break
112
- sorted(to_merge, key=lambda c: c[1].diversity)
113
- best = to_merge[0]
114
- to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
115
- not_in_cache = [best[1]]
113
+ cont = False
114
+ else:
115
+ sorted(to_merge, key=lambda c: c[1].diversity)
116
+ best = to_merge[0]
117
+ to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
118
+ not_in_cache = [best[1]]
116
119
  return to_split
@@ -1,4 +1,4 @@
1
- from psyke import get_default_random_seed
1
+ from psyke import get_default_random_seed, Target
2
2
  from psyke.extraction.hypercubic import Grid, RegressionCube
3
3
  from psyke.extraction.hypercubic.gridex import GridEx
4
4
 
@@ -10,7 +10,7 @@ class GridREx(GridEx):
10
10
 
11
11
  def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization,
12
12
  seed=get_default_random_seed()):
13
- super().__init__(predictor, grid, min_examples, threshold, normalization, seed)
13
+ super().__init__(predictor, grid, min_examples, threshold, Target.REGRESSION, None, normalization, seed)
14
14
 
15
15
  def _default_cube(self) -> RegressionCube:
16
16
  return RegressionCube()
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.linear_model import LinearRegression
4
+
5
+ from psyke import get_default_random_seed, Target
6
+ from psyke.extraction.hypercubic import Grid, HyperCube, GenericCube, ClassificationCube
7
+ from psyke.extraction.hypercubic.gridex import GridEx
8
+
9
+
10
+ class HEx(GridEx):
11
+ """
12
+ Explanator implementing HEx algorithm.
13
+ """
14
+
15
+ def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
16
+ discretization=None, normalization=None, seed: int = get_default_random_seed()):
17
+ super().__init__(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
18
+ self._default_surrounding_cube = True
19
+
20
+ def _different_output(self, this_cube: GenericCube, other_cube: GenericCube) -> bool:
21
+ if isinstance(this_cube.output, str) and this_cube.output == other_cube.output:
22
+ return False
23
+ if isinstance(this_cube.output, float) and abs(this_cube.output - other_cube.output) < self.threshold:
24
+ return False
25
+ if isinstance(this_cube.output, LinearRegression):
26
+ raise NotImplementedError
27
+ return True
28
+
29
+ def _gain(self, parent_cube: GenericCube, new_cube: GenericCube) -> float:
30
+ if isinstance(parent_cube, ClassificationCube):
31
+ return parent_cube.output != new_cube.output
32
+ return parent_cube.diversity - new_cube.diversity > self.threshold / 3.0
33
+
34
+ def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
35
+ fake = dataframe.copy()
36
+ surrounding.update(dataframe, self.predictor)
37
+ prev = [surrounding]
38
+ next_iteration = []
39
+
40
+ for iteration in self.grid.iterate():
41
+ next_iteration = []
42
+ for cube in prev:
43
+ # subcubes =
44
+ # [c for c in self._merge(self._cubes_to_split(cube, iteration, dataframe, fake, True), fake)]
45
+ subcubes, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake, True)
46
+ cleaned = [c for c in subcubes if c.count(dataframe) > 0 and self._gain(cube, c)]
47
+ if len(subcubes) > len(cleaned):
48
+ if len(cleaned) > 0:
49
+ idx = np.any([c.filter_indices(fake.iloc[:, :-1]) for c in cleaned], axis=0)
50
+ cube.update(fake[~idx], self.predictor)
51
+ self._hypercubes = [cube] + self._hypercubes
52
+ next_iteration += self._merge(cleaned, fake)
53
+ prev = next_iteration.copy()
54
+ self._hypercubes = [cube for cube in next_iteration] + self._hypercubes
@@ -158,8 +158,9 @@ class HyperCube:
158
158
  return '*'
159
159
  raise Exception('Too many limits for this feature')
160
160
 
161
- def create_samples(self, n: int = 1, generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
162
- return pd.DataFrame([self._create_tuple(generator) for _ in range(n)])
161
+ def create_samples(self, n: int = 1, surrounding: GenericCube = None,
162
+ generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
163
+ return pd.DataFrame([self._create_tuple(generator, surrounding) for _ in range(n)])
163
164
 
164
165
  @staticmethod
165
166
  def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
@@ -208,8 +209,10 @@ class HyperCube:
208
209
  return RegressionCube(dimensions)
209
210
  return HyperCube(dimensions)
210
211
 
211
- def _create_tuple(self, generator: Random) -> dict:
212
- return {k: generator.uniform(self.get_first(k), self.get_second(k)) for k in self._dimensions.keys()}
212
+ def _create_tuple(self, generator: Random, surrounding: GenericCube) -> dict:
213
+ minmax = {k: (self[k][0] if np.isfinite(self[k][0]) else surrounding[k][0],
214
+ self[k][1] if np.isfinite(self[k][1]) else surrounding[k][1]) for k in self._dimensions.keys()}
215
+ return {k: generator.uniform(minmax[k][0], minmax[k][1]) for k in self._dimensions.keys()}
213
216
 
214
217
  @staticmethod
215
218
  def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
@@ -38,7 +38,8 @@ class ITER(HyperCubeExtractor):
38
38
  expansions = []
39
39
  for limit in cubes:
40
40
  count = limit.cube.count(dataframe)
41
- dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count, self.__generator)])
41
+ dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count,
42
+ generator=self.__generator)])
42
43
  limit.cube.update(dataframe, self.predictor)
43
44
  expansions.append(Expansion(
44
45
  limit.cube, limit.feature, limit.direction,
@@ -13,7 +13,8 @@ from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
13
13
  class PEDRO(SKEOptimizer, IterativeOptimizer):
14
14
  class Algorithm(Enum):
15
15
  GRIDEX = 1,
16
- GRIDREX = 2
16
+ GRIDREX = 2,
17
+ HEX = 3
17
18
 
18
19
  def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
19
20
  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
@@ -23,7 +24,10 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
23
24
  readability_tradeoff, patience, objective, output, normalization, discretization)
24
25
  IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
25
26
  max_depth, patience, output, normalization, discretization)
26
- self.algorithm = algorithm
27
+ self.algorithm = Extractor.gridrex if algorithm == PEDRO.Algorithm.GRIDREX else \
28
+ Extractor.gridex if algorithm == PEDRO.Algorithm.GRIDEX else Extractor.hex
29
+ self.algorithm_name = "GridREx" if algorithm == PEDRO.Algorithm.GRIDREX else \
30
+ "GridEx" if algorithm == PEDRO.Algorithm.GRIDEX else "HEx"
27
31
  self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
28
32
  predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
29
33
  expected = self.dataframe.iloc[:, -1].values
@@ -50,10 +54,9 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
50
54
  params = []
51
55
  patience = self.patience
52
56
  while patience > 0:
53
- print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm, grid, threshold), end="")
54
- extractor = Extractor.gridrex(self.predictor, grid, threshold=threshold, normalization=self.normalization) \
55
- if self.algorithm == PEDRO.Algorithm.GRIDREX \
56
- else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
57
+ print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
58
+ extractor = self.algorithm(self.predictor, grid, min_examples=25,
59
+ threshold=threshold, normalization=self.normalization)
57
60
  _ = extractor.extract(self.dataframe)
58
61
  error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
59
62
  else extractor.mae
@@ -44,8 +44,11 @@ def plot_classification_samples(dataframe, classes, colors, markers, labels, loc
44
44
 
45
45
 
46
46
  def plot_boundaries(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
47
- a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None):
48
- for cube in extractor._hypercubes:
47
+ a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None, reverse=False):
48
+ cubes = extractor._hypercubes.copy()
49
+ if reverse:
50
+ cubes.reverse()
51
+ for cube in cubes:
49
52
  plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
50
53
  fc=colors[cube.output] if fc is None else fc,
51
54
  ec=colors[cube.output] if ec is None else ec, alpha=a, hatch=h, linestyle=ls)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.6.1.dev4
3
+ Version: 0.7.5.dev5
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -23,8 +23,8 @@ Requires-Python: >=3.9.0, <3.10
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: numpy~=1.26.0
26
- Requires-Dist: pandas~=2.1.0
27
- Requires-Dist: scikit-learn~=1.3.0
26
+ Requires-Dist: pandas~=2.2.0
27
+ Requires-Dist: scikit-learn~=1.4.0
28
28
  Requires-Dist: 2ppy~=0.4.0
29
29
  Requires-Dist: kneed~=0.8.1
30
30
  Requires-Dist: sympy~=1.11
@@ -28,6 +28,7 @@ psyke/extraction/hypercubic/creepy/__init__.py
28
28
  psyke/extraction/hypercubic/divine/__init__.py
29
29
  psyke/extraction/hypercubic/gridex/__init__.py
30
30
  psyke/extraction/hypercubic/gridrex/__init__.py
31
+ psyke/extraction/hypercubic/hex/__init__.py
31
32
  psyke/extraction/hypercubic/iter/__init__.py
32
33
  psyke/extraction/real/__init__.py
33
34
  psyke/extraction/real/utils.py
@@ -1,6 +1,6 @@
1
1
  numpy~=1.26.0
2
- pandas~=2.1.0
3
- scikit-learn~=1.3.0
2
+ pandas~=2.2.0
3
+ scikit-learn~=1.4.0
4
4
  2ppy~=0.4.0
5
5
  kneed~=0.8.1
6
6
  sympy~=1.11
@@ -15,8 +15,8 @@ EPOCHS: int = 50
15
15
  BATCH_SIZE: int = 16
16
16
  REQUIREMENTS = [
17
17
  'numpy~=1.26.0',
18
- 'pandas~=2.1.0',
19
- 'scikit-learn~=1.3.0',
18
+ 'pandas~=2.2.0',
19
+ 'scikit-learn~=1.4.0',
20
20
  '2ppy~=0.4.0',
21
21
  'kneed~=0.8.1',
22
22
  'sympy~=1.11'
psyke-0.6.1.dev4/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.6.1.dev4
File without changes
File without changes
File without changes
File without changes
File without changes