psyke 0.8.2.dev18__tar.gz → 0.8.3.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show
  1. {psyke-0.8.2.dev18/psyke.egg-info → psyke-0.8.3.dev2}/PKG-INFO +1 -1
  2. psyke-0.8.3.dev2/VERSION +1 -0
  3. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/clustering/exact/__init__.py +2 -2
  4. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/__init__.py +12 -11
  5. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/creepy/__init__.py +1 -0
  6. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/divine/__init__.py +1 -0
  7. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/gridex/__init__.py +10 -11
  8. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/hex/__init__.py +14 -10
  9. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/hypercube.py +74 -32
  10. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/iter/__init__.py +30 -42
  11. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/hypercubepredictor.py +3 -6
  12. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2/psyke.egg-info}/PKG-INFO +1 -1
  13. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/setup.py +2 -2
  14. psyke-0.8.3.dev2/test/psyke/__init__.py +89 -0
  15. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/cart/test_simplified_cart.py +6 -9
  16. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/test_hypercube.py +2 -2
  17. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/trepan/test_node.py +18 -18
  18. psyke-0.8.2.dev18/VERSION +0 -1
  19. psyke-0.8.2.dev18/test/psyke/__init__.py +0 -149
  20. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/LICENSE +0 -0
  21. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/MANIFEST.in +0 -0
  22. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/README.md +0 -0
  23. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/__init__.py +0 -0
  24. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/clustering/__init__.py +0 -0
  25. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/clustering/cream/__init__.py +0 -0
  26. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/clustering/utils.py +0 -0
  27. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/__init__.py +0 -0
  28. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/cart/__init__.py +0 -0
  29. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/cart/predictor.py +0 -0
  30. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/cosmik/__init__.py +0 -0
  31. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/gridrex/__init__.py +0 -0
  32. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/strategy.py +0 -0
  33. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/utils.py +0 -0
  34. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/real/__init__.py +0 -0
  35. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/real/utils.py +0 -0
  36. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/trepan/__init__.py +0 -0
  37. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/trepan/utils.py +0 -0
  38. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/schema/__init__.py +0 -0
  39. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/tuning/__init__.py +0 -0
  40. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/tuning/crash/__init__.py +0 -0
  41. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/tuning/orchid/__init__.py +0 -0
  42. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/tuning/pedro/__init__.py +0 -0
  43. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/__init__.py +0 -0
  44. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/dataframe.py +0 -0
  45. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/logic.py +0 -0
  46. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/metrics.py +0 -0
  47. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/plot.py +0 -0
  48. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/utils/sorted.py +0 -0
  49. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke.egg-info/SOURCES.txt +0 -0
  50. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke.egg-info/dependency_links.txt +0 -0
  51. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke.egg-info/not-zip-safe +0 -0
  52. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke.egg-info/requires.txt +0 -0
  53. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke.egg-info/top_level.txt +0 -0
  54. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/pyproject.toml +0 -0
  55. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/setup.cfg +0 -0
  56. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/clustering/__init__.py +0 -0
  57. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/__init__.py +0 -0
  58. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/cart/__init__.py +0 -0
  59. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/cart/test_cart.py +0 -0
  60. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/__init__.py +0 -0
  61. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  62. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
  63. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  64. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
  65. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/real/__init__.py +0 -0
  66. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/real/test_real.py +0 -0
  67. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/real/test_rule.py +0 -0
  68. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/trepan/__init__.py +0 -0
  69. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/trepan/test_split.py +0 -0
  70. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/extraction/trepan/test_trepan.py +0 -0
  71. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/utils/__init__.py +0 -0
  72. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/utils/test_prune.py +0 -0
  73. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/utils/test_simplify.py +0 -0
  74. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/psyke/utils/test_simplify_formatter.py +0 -0
  75. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/resources/__init__.py +0 -0
  76. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/resources/datasets/__init__.py +0 -0
  77. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/resources/predictors/__init__.py +0 -0
  78. {psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/test/resources/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.8.2.dev18
3
+ Version: 0.8.3.dev2
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -0,0 +1 @@
1
+ 0.8.3.dev2
@@ -60,8 +60,8 @@ class ExACT(HyperCubeClustering, ABC):
60
60
  def fit(self, dataframe: pd.DataFrame):
61
61
  np.random.seed(self.seed)
62
62
  self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
63
- self._hypercubes = \
64
- self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
63
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, True, self._output)
64
+ self._hypercubes = self._iterate(Node(dataframe, self._surrounding))
65
65
 
66
66
  def get_hypercubes(self) -> Iterable[HyperCube]:
67
67
  return list(self._hypercubes)
@@ -8,7 +8,6 @@ from sklearn.feature_selection import SelectKBest, f_regression, f_classif
8
8
  from sklearn.linear_model import LinearRegression
9
9
  from tuprolog.core import Var, Struct, clause
10
10
  from tuprolog.theory import Theory, mutable_theory
11
- from psyke import logger
12
11
  from psyke.extraction import PedagogicalExtractor
13
12
  from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
14
13
  GenericCube
@@ -23,7 +22,6 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
23
22
  def __init__(self, predictor, output, discretization=None, normalization=None):
24
23
  HyperCubePredictor.__init__(self, output=output, normalization=normalization)
25
24
  PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
26
- self._surrounding = None
27
25
  self._default_surrounding_cube = False
28
26
 
29
27
  def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
@@ -72,7 +70,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
72
70
  output += "The extracted knowledge is not exhaustive; impossible to predict this instance"
73
71
  else:
74
72
  prediction = self._predict_from_cubes(data)
75
- output += f"The output is {prediction}\n"
73
+ output += f"The output is {prediction}"
76
74
 
77
75
  point = Point(list(data.keys()), list(data.values()))
78
76
  cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
@@ -82,7 +80,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
82
80
  for _, _, _, c in cubes:
83
81
  if c.output not in outputs:
84
82
  outputs.append(c.output)
85
- output += f"The output may be {c.output} if"
83
+ output += f"\nThe output may be {c.output} if"
86
84
 
87
85
  for d in point.dimensions.keys():
88
86
  lower, upper = c[d]
@@ -98,12 +96,10 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
98
96
  return prediction, different_prediction_reasons
99
97
 
100
98
  def __get_local_conditions(self, data: dict[str, float], cube: GenericCube) -> dict[list[Value]]:
101
- conditions = {d: [] for d in cube.dimensions}
102
- for d in cube.finite_dimensions:
103
- conditions[d].append(Between(*cube.dimensions[d]))
99
+ conditions = {d: [Between(*cube.dimensions[d])] for d in cube.dimensions}
104
100
  subcubes = cube.subcubes(self._hypercubes)
105
101
  for c in [c for c in subcubes if sum(c in sc and c != sc for sc in subcubes) == 0]:
106
- for d in [d for d in c.finite_dimensions if d in data]:
102
+ for d in [d for d in c.dimensions if d in data]:
107
103
  if c.dimensions[d][0] > data[d] or c.dimensions[d][1] < data[d]:
108
104
  conditions[d].append(Outside(*c.dimensions[d]))
109
105
  return conditions
@@ -171,14 +167,19 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
171
167
  self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
172
168
 
173
169
  def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
174
- self.__drop(dataframe)
170
+ # self.__drop(dataframe)
171
+ for cube in self._hypercubes:
172
+ for dimension in cube.dimensions:
173
+ if abs(cube[dimension][0] - self._surrounding[dimension][0]) < HyperCube.EPSILON * 2:
174
+ cube.set_infinite(dimension, '-')
175
+ if abs(cube[dimension][1] - self._surrounding[dimension][1]) < HyperCube.EPSILON * 2:
176
+ cube.set_infinite(dimension, '+')
177
+
175
178
  if self._default_surrounding_cube:
176
179
  self._hypercubes[-1].set_default()
177
180
 
178
181
  new_theory = mutable_theory()
179
182
  for cube in self._hypercubes:
180
- logger.info(cube.output)
181
- logger.info(cube.dimensions)
182
183
  variables = create_variable_list([], dataframe)
183
184
  variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
184
185
  head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
@@ -33,4 +33,5 @@ class CReEPy(HyperCubeExtractor):
33
33
 
34
34
  self.clustering.fit(dataframe)
35
35
  self._hypercubes = self.clustering.get_hypercubes()
36
+ self._surrounding = self._hypercubes[-1]
36
37
  return self._create_theory(dataframe)
@@ -59,6 +59,7 @@ class DiViNE(HyperCubeExtractor):
59
59
  return idx[np.argmin(distance)][-1]
60
60
 
61
61
  def _extract(self, dataframe: pd.DataFrame) -> Theory:
62
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=Target.CLASSIFICATION)
62
63
  np.random.seed(self.seed)
63
64
  data = self.__clean(dataframe)
64
65
 
@@ -1,5 +1,4 @@
1
1
  from __future__ import annotations
2
- import random as rnd
3
2
  from itertools import product
4
3
  from typing import Iterable
5
4
  import numpy as np
@@ -23,13 +22,13 @@ class GridEx(HyperCubeExtractor):
23
22
  self.grid = grid
24
23
  self.min_examples = min_examples
25
24
  self.threshold = threshold
26
- self._generator = rnd.Random(seed)
25
+ np.random.seed(seed)
27
26
 
28
27
  def _extract(self, dataframe: pd.DataFrame) -> Theory:
29
28
  self._hypercubes = []
30
- surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
31
- surrounding.init_diversity(2 * self.threshold)
32
- self._iterate(surrounding, dataframe)
29
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
30
+ self._surrounding.init_diversity(2 * self.threshold)
31
+ self._iterate(dataframe)
33
32
  return self._create_theory(dataframe)
34
33
 
35
34
  def _create_ranges(self, cube, iteration):
@@ -44,22 +43,22 @@ class GridEx(HyperCubeExtractor):
44
43
  ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
45
44
  return ranges
46
45
 
47
- def _cubes_to_split(self, cube, surrounding, iteration, dataframe, fake, keep_empty=False):
46
+ def _cubes_to_split(self, cube, iteration, dataframe, fake, keep_empty=False):
48
47
  to_split = []
49
- for (pn, p) in enumerate(list(product(*self._create_ranges(cube, iteration).values()))):
48
+ for p in product(*self._create_ranges(cube, iteration).values()):
50
49
  cube = self._default_cube()
51
50
  for i, f in enumerate(dataframe.columns[:-1]):
52
51
  cube.update_dimension(f, p[i])
53
52
  n = cube.count(dataframe)
54
53
  if n > 0 or keep_empty:
55
- fake = pd.concat([fake, cube.create_samples(self.min_examples - n, surrounding, self._generator)])
54
+ fake = pd.concat([fake, cube.create_samples(self.min_examples - n)])
56
55
  cube.update(fake, self.predictor)
57
56
  to_split.append(cube)
58
57
  return to_split, fake
59
58
 
60
- def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
59
+ def _iterate(self, dataframe: pd.DataFrame):
61
60
  fake = dataframe.copy()
62
- prev = [surrounding]
61
+ prev = [self._surrounding]
63
62
  next_iteration = []
64
63
 
65
64
  for iteration in self.grid.iterate():
@@ -70,7 +69,7 @@ class GridEx(HyperCubeExtractor):
70
69
  if cube.diversity < self.threshold:
71
70
  self._hypercubes += [cube]
72
71
  continue
73
- to_split, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake)
72
+ to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
74
73
  next_iteration += [c for c in self._merge(to_split, fake)]
75
74
  prev = next_iteration.copy()
76
75
  self._hypercubes += [cube for cube in next_iteration]
@@ -58,9 +58,10 @@ class HEx(GridEx):
58
58
  self.cube.update(dataframe[self.indices(dataframe) & ~idx], predictor)
59
59
  return cleaned
60
60
 
61
- def linearize(self, dataframe):
62
- children = [c.linearize(dataframe) for c in self.permanent_children(dataframe)]
63
- return [cc for c in children for cc in c if c != []] + list(self.permanent_children(dataframe))
61
+ def linearize(self, dataframe, depth=1):
62
+ children = [c.linearize(dataframe, depth + 1) for c in self.permanent_children(dataframe)]
63
+ return [(cc, dd) for c in children for cc, dd in c if c != []] + \
64
+ [(c, depth) for c in self.permanent_children(dataframe)]
64
65
 
65
66
  def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
66
67
  discretization=None, normalization=None, seed: int = get_default_random_seed()):
@@ -72,10 +73,10 @@ class HEx(GridEx):
72
73
  return parent_cube.output != new_cube.output
73
74
  return parent_cube.error - new_cube.error > self.threshold * .6
74
75
 
75
- def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
76
+ def _iterate(self, dataframe: pd.DataFrame):
76
77
  fake = dataframe.copy()
77
- surrounding.update(dataframe, self.predictor)
78
- root = HEx.Node(surrounding, threshold=self.threshold)
78
+ self._surrounding.update(dataframe, self.predictor)
79
+ root = HEx.Node(self._surrounding, threshold=self.threshold)
79
80
  current = [root]
80
81
 
81
82
  for iteration in self.grid.iterate():
@@ -83,7 +84,7 @@ class HEx(GridEx):
83
84
  for node in current:
84
85
  if node.cube.diversity < self.threshold:
85
86
  continue
86
- children, fake = self._cubes_to_split(node.cube, surrounding, iteration, dataframe, fake, True)
87
+ children, fake = self._cubes_to_split(node.cube, iteration, dataframe, fake, True)
87
88
  node.children = [HEx.Node(c, node, threshold=self.threshold) for c in children]
88
89
  cleaned = node.update(fake, self.predictor, False)
89
90
  node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
@@ -92,9 +93,12 @@ class HEx(GridEx):
92
93
 
93
94
  current = next_iteration.copy()
94
95
  _ = root.update(fake, self.predictor, True)
95
- self._hypercubes = [c.cube for c in root.linearize(fake)]
96
+ self._hypercubes = []
97
+ linearized = root.linearize(fake)
98
+ for depth in sorted(np.unique([d for (_, d) in linearized]), reverse=True):
99
+ self._hypercubes += self._merge([c.cube for (c, d) in linearized if d == depth], fake)
96
100
 
97
101
  if len(self._hypercubes) == 0:
98
- self._hypercubes = [surrounding]
102
+ self._hypercubes = [self._surrounding]
99
103
  elif not min(np.any([c.filter_indices(dataframe.iloc[:, :-1]) for c in self._hypercubes], axis=0)):
100
- self._hypercubes = self._hypercubes + [surrounding]
104
+ self._hypercubes = self._hypercubes + [self._surrounding]
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from numpy import ndarray
9
9
 
10
10
  from psyke.extraction.hypercubic.utils import Dimension, Dimensions, MinUpdate, ZippedDimension, Limit, Expansion
11
- from psyke.schema import Between
11
+ from psyke.schema import Between, GreaterThan, LessThan
12
12
  from psyke.utils import get_default_precision, get_int_precision, Target, get_default_random_seed
13
13
  from psyke.utils.logic import create_term, to_rounded_real, linear_function_creator
14
14
  from sklearn.linear_model import LinearRegression
@@ -68,7 +68,7 @@ class Point:
68
68
 
69
69
  class HyperCube:
70
70
  """
71
- An N-dimensional cube holding a numeric value.
71
+ An N-dimensional cube holding an output numeric value.
72
72
  """
73
73
 
74
74
  EPSILON = get_default_precision() # Precision used when comparing two hypercubes
@@ -83,6 +83,7 @@ class HyperCube:
83
83
  self._error = 0.0
84
84
  self._barycenter = Point([], [])
85
85
  self._default = False
86
+ self._infinite_dimensions = {}
86
87
 
87
88
  def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
88
89
  """
@@ -92,17 +93,35 @@ class HyperCube:
92
93
  :return: true if the object is inside the hypercube, false otherwise
93
94
  """
94
95
  if isinstance(obj, HyperCube):
95
- return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
96
- for k in obj.dimensions])
96
+ for k in obj.dimensions:
97
+ if k not in self._infinite_dimensions:
98
+ if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) < self.get_second(k)):
99
+ return False
100
+ elif len(self._infinite_dimensions[k]) == 2:
101
+ continue
102
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
103
+ return False
104
+ elif '-' in self._infinite_dimensions[k] and obj.get_second(k) >= self.get_second(k):
105
+ return False
97
106
  elif isinstance(obj, dict):
98
- return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in obj.items()])
107
+ for k, v in obj.items():
108
+ if k not in self._infinite_dimensions:
109
+ if not (self.get_first(k) <= v < self.get_second(k)):
110
+ return False
111
+ elif len(self._infinite_dimensions[k]) == 2:
112
+ continue
113
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
114
+ return False
115
+ elif '-' in self._infinite_dimensions[k] and v >= self.get_second(k):
116
+ return False
99
117
  else:
100
118
  raise TypeError("Invalid type for obj parameter")
119
+ return True
101
120
 
102
121
  def __eq__(self, other: HyperCube) -> bool:
103
122
  return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
104
123
  & (abs(dimension.this_dimension[1] - dimension.other_dimension[1]) < HyperCube.EPSILON)
105
- for dimension in self._zip_dimensions(other, True)])
124
+ for dimension in self._zip_dimensions(other)])
106
125
 
107
126
  def __getitem__(self, feature: str) -> Dimension:
108
127
  if feature in self._dimensions.keys():
@@ -124,14 +143,16 @@ class HyperCube:
124
143
  def set_default(self):
125
144
  self._default = True
126
145
 
146
+ def set_infinite(self, dimension: str, direction: str):
147
+ if dimension in self._infinite_dimensions:
148
+ self._infinite_dimensions[dimension].append(direction)
149
+ else:
150
+ self._infinite_dimensions[dimension] = [direction]
151
+
127
152
  @property
128
153
  def dimensions(self) -> Dimensions:
129
154
  return self._dimensions
130
155
 
131
- @property
132
- def finite_dimensions(self) -> Dimensions:
133
- return {k: v for k, v in self._dimensions.items() if np.isfinite(v[0]) and np.isfinite(v[1])}
134
-
135
156
  @property
136
157
  def limit_count(self) -> int:
137
158
  return len(self._limits)
@@ -175,10 +196,8 @@ class HyperCube:
175
196
  def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
176
197
  return dataset[self.filter_indices(dataset)]
177
198
 
178
- def _zip_dimensions(self, other: HyperCube, check_finite: bool = False) -> list[ZippedDimension]:
179
- dimensions = set(self.finite_dimensions).union(set(other.finite_dimensions)) if check_finite else \
180
- set(self.dimensions)
181
- return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in dimensions]
199
+ def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
200
+ return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self.dimensions]
182
201
 
183
202
  def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
184
203
  if isinstance(limit_or_feature, Limit):
@@ -196,9 +215,8 @@ class HyperCube:
196
215
  return '*'
197
216
  raise Exception('Too many limits for this feature')
198
217
 
199
- def create_samples(self, n: int = 1, surrounding: GenericCube = None,
200
- generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
201
- return pd.DataFrame([self._create_tuple(generator, surrounding) for _ in range(n)])
218
+ def create_samples(self, n: int = 1) -> pd.DataFrame:
219
+ return pd.DataFrame([self._create_tuple() for _ in range(n)])
202
220
 
203
221
  @staticmethod
204
222
  def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
@@ -218,10 +236,20 @@ class HyperCube:
218
236
  def count(self, dataset: pd.DataFrame) -> int:
219
237
  return self.filter_dataframe(dataset.iloc[:, :-1]).shape[0]
220
238
 
239
+ def _interval_to_value(self, dimension, unscale):
240
+ if dimension not in self._infinite_dimensions:
241
+ return Between(unscale(self[dimension][0], dimension), unscale(self[dimension][1], dimension))
242
+ if len(self._infinite_dimensions[dimension]) == 2:
243
+ return
244
+ if '+' in self._infinite_dimensions[dimension]:
245
+ return GreaterThan(unscale(self[dimension][0], dimension))
246
+ if '-' in self._infinite_dimensions[dimension]:
247
+ return LessThan(unscale(self[dimension][1], dimension))
248
+
221
249
  def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
222
- dimensions = dict(self.dimensions)
223
- return [create_term(variables[name], Between(unscale(values[0], name), unscale(values[1], name)))
224
- for name, values in dimensions.items() if name not in ignore and not self.is_default]
250
+ values = [(dim, self._interval_to_value(dim, unscale)) for dim in self.dimensions if dim not in ignore]
251
+ return [create_term(variables[name], value) for name, value in values
252
+ if not self.is_default and value is not None]
225
253
 
226
254
  @staticmethod
227
255
  def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
@@ -243,10 +271,8 @@ class HyperCube:
243
271
  return RegressionCube(dimensions)
244
272
  return HyperCube(dimensions)
245
273
 
246
- def _create_tuple(self, generator: Random, surrounding: GenericCube) -> dict:
247
- minmax = {k: (self[k][0] if np.isfinite(self[k][0]) else surrounding[k][0],
248
- self[k][1] if np.isfinite(self[k][1]) else surrounding[k][1]) for k in self._dimensions.keys()}
249
- return {k: generator.uniform(minmax[k][0], minmax[k][1]) for k in self._dimensions.keys()}
274
+ def _create_tuple(self) -> dict:
275
+ return {k: np.random.uniform(self[k][0], self[k][1]) for k in self._dimensions.keys()}
250
276
 
251
277
  @staticmethod
252
278
  def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
@@ -286,12 +312,10 @@ class HyperCube:
286
312
  return self[feature][1]
287
313
 
288
314
  def has_volume(self) -> bool:
289
- return all([dimension[1] - dimension[0] > HyperCube.EPSILON for dimension in self._dimensions.values()
290
- if np.isfinite(dimension[0]) and np.isfinite(dimension[1])])
315
+ return all([dimension[1] - dimension[0] > HyperCube.EPSILON for dimension in self._dimensions.values()])
291
316
 
292
317
  def volume(self) -> float:
293
- return reduce(lambda a, b: a * b, [dimension[1] - dimension[0] for dimension in self._dimensions.values()
294
- if np.isfinite(dimension[0]) and np.isfinite(dimension[1])], 1)
318
+ return reduce(lambda a, b: a * b, [dimension[1] - dimension[0] for dimension in self._dimensions.values()], 1)
295
319
 
296
320
  def diagonal(self) -> float:
297
321
  return reduce(
@@ -477,13 +501,31 @@ class ClosedCube(HyperCube):
477
501
  :param obj: an N-dimensional object (point or hypercube)
478
502
  :return: true if the object is inside the hypercube, false otherwise
479
503
  """
480
- if isinstance(obj, ClosedCube):
481
- return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
482
- for k in obj.dimensions])
504
+ if isinstance(obj, HyperCube):
505
+ for k in obj.dimensions:
506
+ if k not in self._infinite_dimensions:
507
+ if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k)):
508
+ return False
509
+ elif len(self._infinite_dimensions[k]) == 2:
510
+ continue
511
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
512
+ return False
513
+ elif '-' in self._infinite_dimensions[k] and obj.get_second(k) > self.get_second(k):
514
+ return False
483
515
  elif isinstance(obj, dict):
484
- return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in obj.items()])
516
+ for k, v in obj.items():
517
+ if k not in self._infinite_dimensions:
518
+ if not (self.get_first(k) <= v <= self.get_second(k)):
519
+ return False
520
+ elif len(self._infinite_dimensions[k]) == 2:
521
+ continue
522
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
523
+ return False
524
+ elif '-' in self._infinite_dimensions[k] and v > self.get_second(k):
525
+ return False
485
526
  else:
486
527
  raise TypeError("Invalid type for obj parameter")
528
+ return True
487
529
 
488
530
  def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
489
531
  v = np.array([v for _, v in self._dimensions.items()])
@@ -1,5 +1,4 @@
1
1
  from __future__ import annotations
2
- from random import Random
3
2
  from typing import Iterable
4
3
  import numpy as np
5
4
  import pandas as pd
@@ -10,8 +9,6 @@ from psyke.extraction.hypercubic.hypercube import GenericCube
10
9
  from psyke.extraction.hypercubic.utils import MinUpdate, Expansion
11
10
  from psyke.utils import get_default_random_seed, Target
12
11
 
13
- DomainProperties = (Iterable[MinUpdate], GenericCube)
14
-
15
12
 
16
13
  class ITER(HyperCubeExtractor):
17
14
  """
@@ -32,14 +29,13 @@ class ITER(HyperCubeExtractor):
32
29
  self.fill_gaps = fill_gaps
33
30
  self._output = Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else \
34
31
  output if output is not None else Target.CONSTANT
35
- self.__generator = Random(seed)
32
+ self.seed = seed
36
33
 
37
34
  def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
38
35
  expansions = []
39
36
  for limit in cubes:
40
37
  count = limit.cube.count(dataframe)
41
- dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count,
42
- generator=self.__generator)])
38
+ dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count)])
43
39
  limit.cube.update(dataframe, self.predictor)
44
40
  expansions.append(Expansion(
45
41
  limit.cube, limit.feature, limit.direction,
@@ -50,24 +46,21 @@ class ITER(HyperCubeExtractor):
50
46
  return sorted(expansions, key=lambda e: e.distance)[0]
51
47
  return None
52
48
 
53
- def _calculate_min_updates(self, surrounding: GenericCube) -> Iterable[MinUpdate]:
49
+ def _calculate_min_updates(self) -> Iterable[MinUpdate]:
54
50
  return [MinUpdate(name, (interval[1] - interval[0]) * self.min_update) for (name, interval) in
55
- surrounding.dimensions.items()]
51
+ self._surrounding.dimensions.items()]
56
52
 
57
- @staticmethod
58
- def _create_range(cube: GenericCube, domain: DomainProperties, feature: str, direction: str)\
53
+ def _create_range(self, cube: GenericCube, min_updates: Iterable[MinUpdate], feature: str, direction: str)\
59
54
  -> tuple[GenericCube, tuple[float, float]]:
60
- min_updates, surrounding = domain
61
55
  a, b = cube[feature]
62
56
  size = [min_update for min_update in min_updates if min_update.name == feature][0].value
63
- return (cube.copy(), (max(a - size, surrounding.get_first(feature)), a)
64
- if direction == '-' else (b, min(b + size, surrounding.get_second(feature))))
57
+ return (cube.copy(), (max(a - size, self._surrounding.get_first(feature)), a)
58
+ if direction == '-' else (b, min(b + size, self._surrounding.get_second(feature))))
65
59
 
66
- @staticmethod
67
- def _create_temp_cube(cube: GenericCube, domain: DomainProperties,
60
+ def _create_temp_cube(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
68
61
  hypercubes: Iterable[GenericCube], feature: str,
69
62
  direction: str) -> Iterable[Expansion]:
70
- temp_cube, values = ITER._create_range(cube, domain, feature, direction)
63
+ temp_cube, values = self._create_range(cube, min_updates, feature, direction)
71
64
  temp_cube.update_dimension(feature, values)
72
65
  overlap = temp_cube.overlap(hypercubes)
73
66
  while (overlap is not None) & (temp_cube.has_volume()):
@@ -77,23 +70,22 @@ class ITER(HyperCubeExtractor):
77
70
  else:
78
71
  cube.add_limit(feature, direction)
79
72
 
80
- @staticmethod
81
- def _create_temp_cubes(cube: GenericCube, domain: DomainProperties,
73
+ def _create_temp_cubes(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
82
74
  hypercubes: Iterable[GenericCube]) -> Iterable[Expansion]:
83
75
  tmp_cubes = []
84
- for feature in domain[1].dimensions.keys():
76
+ for feature in self._surrounding.dimensions.keys():
85
77
  limit = cube.check_limits(feature)
86
78
  if limit == '*':
87
79
  continue
88
80
  for x in {'-', '+'} - {limit}:
89
- tmp_cubes += ITER._create_temp_cube(cube, domain, hypercubes, feature, x)
81
+ tmp_cubes += self._create_temp_cube(cube, min_updates, hypercubes, feature, x)
90
82
  return tmp_cubes
91
83
 
92
84
  def _cubes_to_update(self, dataframe: pd.DataFrame, to_expand: Iterable[GenericCube],
93
- hypercubes: Iterable[GenericCube], domain: DomainProperties) \
85
+ hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate]) \
94
86
  -> Iterable[tuple[GenericCube, Expansion]]:
95
87
  results = [(hypercube, self._best_cube(dataframe, hypercube, self._create_temp_cubes(
96
- hypercube, domain, hypercubes))) for hypercube in to_expand]
88
+ hypercube, min_updates, hypercubes))) for hypercube in to_expand]
97
89
  return sorted([result for result in results if result[1] is not None], key=lambda x: x[1].distance)
98
90
 
99
91
  def _expand_or_create(self, cube: GenericCube, expansion: Expansion, hypercubes: Iterable[GenericCube]) -> None:
@@ -103,7 +95,7 @@ class ITER(HyperCubeExtractor):
103
95
  cube.expand(expansion, hypercubes)
104
96
 
105
97
  @staticmethod
106
- def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, tuple]:
98
+ def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, float]:
107
99
  if isinstance(output, str):
108
100
  close_sample = dataframe[dataframe.iloc[:, -1] == output].iloc[0].to_dict()
109
101
  else:
@@ -126,36 +118,32 @@ class ITER(HyperCubeExtractor):
126
118
  return [HyperCube.cube_from_point(ITER._find_closer_sample(dataframe, point), output=self._output)
127
119
  for point in points]
128
120
 
129
- def _initialize(self, dataframe: pd.DataFrame) -> tuple[Iterable[GenericCube], DomainProperties]:
121
+ def _initialize(self, dataframe: pd.DataFrame) -> Iterable[MinUpdate]:
130
122
  self._fake_dataframe = dataframe.copy()
131
- surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
132
- min_updates = self._calculate_min_updates(surrounding)
133
- self._hypercubes = self._init_hypercubes(dataframe, min_updates, surrounding)
123
+ self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
124
+ min_updates = self._calculate_min_updates()
125
+ self._init_hypercubes(dataframe, min_updates)
134
126
  for hypercube in self._hypercubes:
135
127
  hypercube.update(dataframe, self.predictor)
136
- return self._hypercubes, (min_updates, surrounding)
137
-
138
- def _init_hypercubes(
139
- self,
140
- dataframe: pd.DataFrame,
141
- min_updates: Iterable[MinUpdate],
142
- surrounding: GenericCube
143
- ) -> Iterable[GenericCube]:
128
+ return min_updates
129
+
130
+ def _init_hypercubes(self, dataframe: pd.DataFrame, min_updates: Iterable[MinUpdate]):
144
131
  while True:
145
132
  hypercubes = self._generate_starting_points(dataframe)
146
133
  for hypercube in hypercubes:
147
- hypercube.expand_all(min_updates, surrounding)
134
+ hypercube.expand_all(min_updates, self._surrounding)
148
135
  self.n_points = self.n_points - 1
149
136
  if not HyperCube.check_overlap(hypercubes, hypercubes):
150
137
  break
151
- return hypercubes
138
+ self._hypercubes = hypercubes
152
139
 
153
- def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], domain: DomainProperties,
140
+ def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate],
154
141
  left_iteration: int) -> int:
142
+ np.random.seed(self.seed)
155
143
  iterations = 0
156
144
  to_expand = [cube for cube in hypercubes if cube.limit_count < (len(dataframe.columns) - 1) * 2]
157
145
  while (len(to_expand) > 0) and (iterations < left_iteration):
158
- updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, domain))
146
+ updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, min_updates))
159
147
  if len(updates) > 0:
160
148
  self._expand_or_create(updates[0][0], updates[0][1], hypercubes)
161
149
  iterations += 1
@@ -171,12 +159,12 @@ class ITER(HyperCubeExtractor):
171
159
  return cube.overlap(hypercubes)
172
160
 
173
161
  def _extract(self, dataframe: pd.DataFrame) -> Theory:
174
- self._hypercubes, domain = self._initialize(dataframe)
162
+ min_updates = self._initialize(dataframe)
175
163
  temp_train = dataframe.copy()
176
164
  fake = dataframe.copy()
177
165
  iterations = 0
178
166
  while temp_train.shape[0] > 0:
179
- iterations += self._iterate(fake, self._hypercubes, domain, self.max_iterations - iterations)
167
+ iterations += self._iterate(fake, self._hypercubes, min_updates, self.max_iterations - iterations)
180
168
  if (iterations >= self.max_iterations) or (not self.fill_gaps):
181
169
  break
182
170
  temp_train = temp_train.iloc[[p is None for p in self.predict(temp_train.iloc[:, :-1])]]
@@ -188,7 +176,7 @@ class ITER(HyperCubeExtractor):
188
176
  if not new_cube.has_volume():
189
177
  break
190
178
  new_cube = HyperCube.cube_from_point(point, self._output)
191
- new_cube.expand_all(domain[0], domain[1], ratio)
179
+ new_cube.expand_all(min_updates, self._surrounding, ratio)
192
180
  overlap = new_cube.overlap(self._hypercubes)
193
181
  ratio *= 2
194
182
  if new_cube.has_volume():
@@ -78,14 +78,11 @@ class HyperCubePredictor(EvaluableModel):
78
78
  def _find_cube(self, data: dict[str, float]) -> GenericCube | None:
79
79
  for dimension in self._dimensions_to_ignore:
80
80
  del data[dimension]
81
- found = None
82
81
  for cube in self._hypercubes:
83
82
  if data in cube:
84
- found = cube.copy()
85
- break
86
- if found is None and self._hypercubes[-1].is_default:
87
- found = self._hypercubes[-1].copy()
88
- return found
83
+ return cube.copy()
84
+ if self._hypercubes[-1].is_default:
85
+ return self._hypercubes[-1].copy()
89
86
 
90
87
  @property
91
88
  def n_rules(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.8.2.dev18
3
+ Version: 0.8.3.dev2
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -81,7 +81,6 @@ class CreateTestPredictors(distutils.cmd.Command):
81
81
  pass
82
82
 
83
83
  def run(self):
84
- from test.psyke import Predictor
85
84
  from psyke.utils import get_default_random_seed
86
85
  from psyke.utils.dataframe import get_discrete_dataset
87
86
  from sklearn.model_selection import train_test_split
@@ -90,6 +89,7 @@ class CreateTestPredictors(distutils.cmd.Command):
90
89
  import ast
91
90
  import pandas as pd
92
91
  from tensorflow.keras import Model
92
+ from test import Predictor
93
93
 
94
94
  # Read the required predictors to run the tests:
95
95
  # model | model_options | dataset
@@ -105,7 +105,7 @@ class CreateTestPredictors(distutils.cmd.Command):
105
105
  if row['bins'] > 0:
106
106
  schema = get_schema(dataset) # int(row['bins'])
107
107
  dataset = get_discrete_dataset(dataset.iloc[:, :-1], schema).join(dataset.iloc[:, -1])
108
- model = get_model(row['model'], options)
108
+ model, _ = get_model(row['model'], options)
109
109
  training_set, test_set = train_test_split(dataset, test_size=0.5,
110
110
  random_state=get_default_random_seed())
111
111
  if isinstance(model, Model):