psyke 0.7.11.dev2__tar.gz → 0.8.0.dev11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show
  1. {psyke-0.7.11.dev2/psyke.egg-info → psyke-0.8.0.dev11}/PKG-INFO +1 -1
  2. psyke-0.8.0.dev11/VERSION +1 -0
  3. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/__init__.py +22 -16
  4. psyke-0.8.0.dev11/psyke/extraction/__init__.py +21 -0
  5. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/cart/__init__.py +6 -15
  6. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/__init__.py +94 -5
  7. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/cosmik/__init__.py +2 -2
  8. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/creepy/__init__.py +1 -1
  9. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/divine/__init__.py +2 -2
  10. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/gridex/__init__.py +2 -2
  11. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/hex/__init__.py +12 -8
  12. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/hypercube.py +48 -11
  13. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/iter/__init__.py +2 -2
  14. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/real/__init__.py +5 -9
  15. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/trepan/__init__.py +2 -2
  16. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/hypercubepredictor.py +14 -10
  17. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11/psyke.egg-info}/PKG-INFO +1 -1
  18. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/__init__.py +3 -8
  19. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/test_hypercube.py +1 -1
  20. psyke-0.7.11.dev2/VERSION +0 -1
  21. psyke-0.7.11.dev2/psyke/extraction/__init__.py +0 -32
  22. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/LICENSE +0 -0
  23. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/MANIFEST.in +0 -0
  24. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/README.md +0 -0
  25. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/clustering/__init__.py +0 -0
  26. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/clustering/cream/__init__.py +0 -0
  27. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/clustering/exact/__init__.py +0 -0
  28. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/clustering/utils.py +0 -0
  29. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/cart/predictor.py +0 -0
  30. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/gridrex/__init__.py +0 -0
  31. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/strategy.py +0 -0
  32. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/utils.py +0 -0
  33. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/real/utils.py +0 -0
  34. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/trepan/utils.py +0 -0
  35. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/schema/__init__.py +0 -0
  36. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/tuning/__init__.py +0 -0
  37. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/tuning/crash/__init__.py +0 -0
  38. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/tuning/orchid/__init__.py +0 -0
  39. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/tuning/pedro/__init__.py +0 -0
  40. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/__init__.py +0 -0
  41. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/dataframe.py +0 -0
  42. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/logic.py +0 -0
  43. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/metrics.py +0 -0
  44. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/plot.py +0 -0
  45. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/utils/sorted.py +0 -0
  46. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke.egg-info/SOURCES.txt +0 -0
  47. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke.egg-info/dependency_links.txt +0 -0
  48. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke.egg-info/not-zip-safe +0 -0
  49. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke.egg-info/requires.txt +0 -0
  50. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke.egg-info/top_level.txt +0 -0
  51. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/pyproject.toml +0 -0
  52. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/setup.cfg +0 -0
  53. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/setup.py +0 -0
  54. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/clustering/__init__.py +0 -0
  55. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/__init__.py +0 -0
  56. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/cart/__init__.py +0 -0
  57. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/cart/test_cart.py +0 -0
  58. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/cart/test_simplified_cart.py +0 -0
  59. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/__init__.py +0 -0
  60. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  61. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
  62. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  63. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
  64. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/real/__init__.py +0 -0
  65. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/real/test_real.py +0 -0
  66. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/real/test_rule.py +0 -0
  67. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/trepan/__init__.py +0 -0
  68. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/trepan/test_node.py +0 -0
  69. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/trepan/test_split.py +0 -0
  70. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/trepan/test_trepan.py +0 -0
  71. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/utils/__init__.py +0 -0
  72. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/utils/test_prune.py +0 -0
  73. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/utils/test_simplify.py +0 -0
  74. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/utils/test_simplify_formatter.py +0 -0
  75. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/resources/__init__.py +0 -0
  76. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/resources/datasets/__init__.py +0 -0
  77. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/resources/predictors/__init__.py +0 -0
  78. {psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/resources/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.7.11.dev2
3
+ Version: 0.8.0.dev11
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -0,0 +1 @@
1
+ 0.8.0.dev11
@@ -48,34 +48,28 @@ class EvaluableModel(object):
48
48
  self.discretization = [] if discretization is None else list(discretization)
49
49
  self.normalization = normalization
50
50
 
51
- def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
51
+ def predict(self, dataframe: pd.DataFrame) -> Iterable:
52
52
  """
53
53
  Predicts the output values of every sample in dataset.
54
54
 
55
55
  :param dataframe: is the set of instances to predict.
56
- :param mapping: for one-hot encoding.
57
56
  :return: a list of predictions.
58
57
  """
59
- return self.__convert(self._predict(dataframe), mapping)
58
+ return self.__convert(self._predict(dataframe))
60
59
 
61
60
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
62
61
  raise NotImplementedError('predict')
63
62
 
64
- def __convert(self, ys: Iterable, mapping: dict[str: int] = None) -> Iterable:
65
- if mapping is not None:
66
- inverse_mapping = {v: k for k, v in mapping.items()}
67
- ys = [inverse_mapping[y] for y in ys]
63
+ def __convert(self, ys: Iterable) -> Iterable:
68
64
  if self.normalization is not None:
69
65
  m, s = self.normalization[list(self.normalization.keys())[-1]]
70
66
  ys = [prediction if prediction is None else prediction * s + m for prediction in ys]
71
67
  return ys
72
68
 
73
- def brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
74
- mapping: dict[str: int] = None) -> Iterable:
75
- return self.__convert(self._brute_predict(dataframe, criterion, n, mapping), mapping)
69
+ def brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
70
+ return self.__convert(self._brute_predict(dataframe, criterion, n))
76
71
 
77
- def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
78
- mapping: dict[str: int] = None) -> Iterable:
72
+ def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
79
73
  raise NotImplementedError('brute_predict')
80
74
 
81
75
  def unscale(self, values, name):
@@ -151,24 +145,36 @@ class Extractor(EvaluableModel, ABC):
151
145
  ----------
152
146
  predictor : the underling black box predictor.
153
147
  discretization : A collection of sets of discretised features.
154
- Each set corresponds to a set of features derived from a single non-discrete feature.
148
+ Each set corresponds to a set of features derived from a single non-discrete feature.
155
149
  """
156
150
 
157
151
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature] = None, normalization=None):
158
152
  super().__init__(discretization, normalization)
159
153
  self.predictor = predictor
160
154
 
161
- def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
155
+ def extract(self, dataframe: pd.DataFrame) -> Theory:
162
156
  """
163
157
  Extracts rules from the underlying predictor.
164
158
 
165
159
  :param dataframe: is the set of instances to be used for the extraction.
166
- :param mapping: for one-hot encoding.
167
- :param sort: alphabetically sort the variables of the head of the rules.
168
160
  :return: the theory created from the extracted rules.
169
161
  """
170
162
  raise NotImplementedError('extract')
171
163
 
164
+ def predict_why(self, data: dict[str, float]):
165
+ """
166
+ Provides a prediction and the corresponding explanation.
167
+ :param data: is the instance to predict.
168
+ """
169
+ raise NotImplementedError('predict_why')
170
+
171
+ def predict_counter(self, data: dict[str, float]):
172
+ """
173
+ Provides a prediction and counterfactual explanations.
174
+ :param data: is the instance to predict.
175
+ """
176
+ raise NotImplementedError('predict_counter')
177
+
172
178
  def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
173
179
  n: int = 3) -> float:
174
180
  """
@@ -0,0 +1,21 @@
1
+ from abc import ABC
2
+
3
+ import pandas as pd
4
+ from tuprolog.theory import Theory
5
+
6
+ from psyke import Extractor
7
+
8
+
9
+ class PedagogicalExtractor(Extractor, ABC):
10
+
11
+ def __init__(self, predictor, discretization=None, normalization=None):
12
+ Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
13
+
14
+ def extract(self, dataframe: pd.DataFrame) -> Theory:
15
+ new_y = pd.DataFrame(self.predictor.predict(dataframe.iloc[:, :-1])).set_index(dataframe.index)
16
+ data = dataframe.iloc[:, :-1].copy().join(new_y)
17
+ data.columns = dataframe.columns
18
+ return self._extract(data)
19
+
20
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
21
+ raise NotImplementedError('extract')
@@ -47,7 +47,7 @@ class Cart(PedagogicalExtractor):
47
47
  simplified.append(nodes.pop(0))
48
48
  return simplified
49
49
 
50
- def _create_theory(self, data: pd.DataFrame, mapping: dict[str: int], sort: bool = True) -> Theory:
50
+ def _create_theory(self, data: pd.DataFrame) -> Theory:
51
51
  new_theory = mutable_theory()
52
52
  nodes = [node for node in self._cart_predictor]
53
53
  nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
@@ -55,12 +55,7 @@ class Cart(PedagogicalExtractor):
55
55
  if self.normalization is not None:
56
56
  m, s = self.normalization[data.columns[-1]]
57
57
  prediction = prediction * s + m
58
- if mapping is not None and prediction in mapping.values():
59
- for k, v in mapping.items():
60
- if v == prediction:
61
- prediction = k
62
- break
63
- variables = create_variable_list(self.discretization, data, sort)
58
+ variables = create_variable_list(self.discretization, data)
64
59
  new_theory.assertZ(
65
60
  clause(
66
61
  create_head(data.columns[-1], list(variables.values()), prediction),
@@ -69,15 +64,11 @@ class Cart(PedagogicalExtractor):
69
64
  )
70
65
  return new_theory
71
66
 
72
- def _extract(self, data: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
73
- self._cart_predictor.predictor = DecisionTreeClassifier(random_state=TREE_SEED) \
74
- if isinstance(data.iloc[0, -1], str) or mapping is not None else DecisionTreeRegressor(random_state=TREE_SEED)
75
- if mapping is not None:
76
- data.iloc[:, -1] = data.iloc[:, -1].apply(lambda x: mapping[x] if x in mapping.keys() else x)
77
- self._cart_predictor.predictor.max_depth = self.depth
78
- self._cart_predictor.predictor.max_leaf_nodes = self.leaves
67
+ def _extract(self, data: pd.DataFrame) -> Theory:
68
+ tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
69
+ self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth, max_leaf_nodes=self.leaves)
79
70
  self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
80
- return self._create_theory(data, mapping, sort)
71
+ return self._create_theory(data)
81
72
 
82
73
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
83
74
  return self._cart_predictor.predict(dataframe)
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from abc import ABC
4
+ from itertools import groupby
4
5
  from typing import Iterable
5
6
  import numpy as np
6
7
  import pandas as pd
@@ -14,7 +15,8 @@ from psyke.extraction import PedagogicalExtractor
14
15
  from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
15
16
  GenericCube
16
17
  from psyke.hypercubepredictor import HyperCubePredictor
17
- from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body
18
+ from psyke.schema import Between, Outside, Value
19
+ from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body, PRECISION
18
20
  from psyke.utils import Target
19
21
  from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
20
22
 
@@ -48,12 +50,99 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
48
50
  last_cube[dimension] = [-np.inf, np.inf]
49
51
  return theory
50
52
 
51
- def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
52
- theory = PedagogicalExtractor.extract(self, dataframe, mapping, sort)
53
+ def extract(self, dataframe: pd.DataFrame) -> Theory:
54
+ theory = PedagogicalExtractor.extract(self, dataframe)
53
55
  self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
54
56
  self._surrounding.update(dataframe, self.predictor)
55
57
  return theory
56
58
 
59
+ def predict_counter(self, data: dict[str, float]):
60
+ cube = self._find_cube(data)
61
+ if cube is None:
62
+ print("The extracted knowledge is not exhaustive; impossible to predict this instance")
63
+ else:
64
+ print("The output is", self._predict_from_cubes(data))
65
+
66
+ point = Point(list(data.keys()), list(data.values()))
67
+ cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
68
+ cubes = sorted([(cube.surface_distance(point), cube.volume(), cube) for cube in cubes])
69
+ outputs = []
70
+ for _, _, c in cubes:
71
+ if c.output not in outputs:
72
+ outputs.append(c.output)
73
+ print("The output may be", c.output, 'if')
74
+
75
+ for d in c.dimensions.keys():
76
+ lower, upper = c[d]
77
+ p = point[d]
78
+ if p < lower:
79
+ print(' ', d, '=', round(lower, 1))
80
+ elif p > upper:
81
+ print(' ', d, '=', round(upper, 1))
82
+
83
+ def __get_local_conditions(self, cube: GenericCube) -> dict[list[Value]]:
84
+ conditions = {d: [] for d in cube.dimensions}
85
+ for d in cube.finite_dimensions:
86
+ conditions[d].append(Between(*cube.dimensions[d]))
87
+ subcubes = cube.subcubes(self._hypercubes)
88
+ for c in [c for c in subcubes if sum(c in sc and c != sc for sc in subcubes) == 0]:
89
+ for d in c.finite_dimensions:
90
+ conditions[d].append(Outside(*c.dimensions[d]))
91
+ return conditions
92
+
93
+ def predict_why(self, data: dict[str, float]):
94
+ cube = self._find_cube(data)
95
+ if cube is None:
96
+ print("The extracted knowledge is not exhaustive; impossible to predict this instance")
97
+ else:
98
+ output = self._predict_from_cubes(data)
99
+ print(f"The output is {output} because")
100
+ conditions = self.__get_local_conditions(cube)
101
+ for d in conditions:
102
+ simplified = HyperCubeExtractor.__simplify(conditions[d])
103
+ for i, condition in enumerate(simplified):
104
+ if i == 0:
105
+ print(' ', d, 'is', end=' ')
106
+ else:
107
+ print('and', end=' ')
108
+ if isinstance(condition, Outside):
109
+ print('not', end=' ')
110
+ print('between', round(condition.lower, 1), 'and', round(condition.upper, 1), end=' ')
111
+ if i + 1 == len(simplified):
112
+ print()
113
+
114
+ @staticmethod
115
+ def __simplify(conditions):
116
+ simplified = []
117
+ for condition in conditions:
118
+ to_add = True
119
+ for i, simple in enumerate(simplified):
120
+ if isinstance(condition, Outside) and isinstance(simple, Outside):
121
+ if simple.lower <= condition.lower <= simple.upper or \
122
+ simple.lower <= condition.upper <= simple.upper or \
123
+ condition.lower <= simple.lower <= simple.upper <= condition.upper:
124
+ simplified[i].upper = max(condition.upper, simple.upper)
125
+ simplified[i].lower = min(condition.lower, simple.lower)
126
+ to_add = False
127
+ break
128
+ elif isinstance(condition, Outside) and isinstance(simple, Between):
129
+ if simple.lower >= condition.upper or simple.upper <= condition.lower:
130
+ to_add = False
131
+ break
132
+ elif condition.lower <= simple.lower <= condition.upper <= simple.upper:
133
+ simplified[i].lower = condition.upper
134
+ to_add = False
135
+ break
136
+ elif simple.lower <= condition.lower <= simple.upper <= condition.upper:
137
+ simplified[i].upper = condition.lower
138
+ to_add = False
139
+ break
140
+ elif condition.lower <= simple.lower <= simple.upper <= condition.upper:
141
+ raise ValueError
142
+ if to_add:
143
+ simplified.append(condition)
144
+ return simplified
145
+
57
146
  @staticmethod
58
147
  def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
59
148
  return create_head(dataframe.columns[-1], variables[:-1], output) \
@@ -66,13 +155,13 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
66
155
  def __drop(self, dataframe: pd.DataFrame):
67
156
  self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
68
157
 
69
- def _create_theory(self, dataframe: pd.DataFrame, sort: bool = False) -> Theory:
158
+ def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
70
159
  self.__drop(dataframe)
71
160
  new_theory = mutable_theory()
72
161
  for cube in self._hypercubes:
73
162
  logger.info(cube.output)
74
163
  logger.info(cube.dimensions)
75
- variables = create_variable_list([], dataframe, sort)
164
+ variables = create_variable_list([], dataframe)
76
165
  variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
77
166
  head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
78
167
  self.unscale(cube.output, dataframe.columns[-1]))
@@ -24,7 +24,7 @@ class COSMiK(HyperCubeExtractor):
24
24
  self.close_to_center = close_to_center
25
25
  self.seed = seed
26
26
 
27
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
27
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
28
28
  np.random.seed(self.seed)
29
29
  X, y = dataframe.iloc[:, :-1], dataframe.iloc[:, -1]
30
30
 
@@ -44,4 +44,4 @@ class COSMiK(HyperCubeExtractor):
44
44
  cube.update(dataframe, self.predictor)
45
45
 
46
46
  self._sort_cubes()
47
- return self._create_theory(dataframe, sort)
47
+ return self._create_theory(dataframe)
@@ -28,7 +28,7 @@ class CReEPy(HyperCubeExtractor):
28
28
  self.ignore_threshold = ignore_threshold
29
29
  self._default_surrounding_cube = True
30
30
 
31
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
31
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
32
32
  if not isinstance(self.clustering, HyperCubeClustering):
33
33
  raise TypeError("clustering must be a HyperCubeClustering")
34
34
 
@@ -58,7 +58,7 @@ class DiViNE(HyperCubeExtractor):
58
58
  distance, idx = tree.query([list(point.dimensions.values()) for point in cube.corners()], k=1)
59
59
  return idx[np.argmin(distance)][-1]
60
60
 
61
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
61
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
62
62
  np.random.seed(self.seed)
63
63
  data = self.__clean(dataframe)
64
64
 
@@ -82,4 +82,4 @@ class DiViNE(HyperCubeExtractor):
82
82
  if len(discarded) > 0:
83
83
  data = pd.concat([data] + [d.to_dataframe() for d in discarded]).reset_index(drop=True)
84
84
  self._sort_cubes()
85
- return self._create_theory(dataframe, sort)
85
+ return self._create_theory(dataframe)
@@ -25,12 +25,12 @@ class GridEx(HyperCubeExtractor):
25
25
  self.threshold = threshold
26
26
  self._generator = rnd.Random(seed)
27
27
 
28
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
28
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
29
29
  self._hypercubes = []
30
30
  surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
31
31
  surrounding.init_diversity(2 * self.threshold)
32
32
  self._iterate(surrounding, dataframe)
33
- return self._create_theory(dataframe, sort)
33
+ return self._create_theory(dataframe)
34
34
 
35
35
  def _create_ranges(self, cube, iteration):
36
36
  ranges = {}
@@ -16,19 +16,23 @@ class HEx(GridEx):
16
16
  """
17
17
 
18
18
  class Node:
19
- def __init__(self, cube: GenericCube, parent: HEx.Node = None, gain: bool = True, threshold: float = None):
19
+ def __init__(self, cube: GenericCube, parent: HEx.Node = None, threshold: float = None):
20
20
  self.cube = cube
21
21
  self.parent = parent
22
22
  self.children: Iterable[HEx.Node] = []
23
- self.gain = gain if not threshold else self.check(threshold)
23
+ self.threshold = threshold
24
+ self.gain = True if parent is None else self.check()
24
25
 
25
- def check(self, threshold: float) -> bool:
26
+ def check(self) -> bool:
26
27
  other = self.parent
27
- while not other.gain:
28
- other = other.parent
28
+ try:
29
+ while not other.gain:
30
+ other = other.parent
31
+ except AttributeError:
32
+ return True
29
33
  if isinstance(other.cube, ClassificationCube):
30
34
  return other.cube.output != self.cube.output
31
- return other.cube.error - self.cube.error > threshold * .6
35
+ return other.cube.error - self.cube.error > self.threshold * .6
32
36
 
33
37
  def indices(self, dataframe: pd.DataFrame):
34
38
  return self.cube.filter_indices(dataframe.iloc[:, :-1])
@@ -71,7 +75,7 @@ class HEx(GridEx):
71
75
  def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
72
76
  fake = dataframe.copy()
73
77
  surrounding.update(dataframe, self.predictor)
74
- root = HEx.Node(surrounding)
78
+ root = HEx.Node(surrounding, threshold=self.threshold)
75
79
  current = [root]
76
80
 
77
81
  for iteration in self.grid.iterate():
@@ -82,7 +86,7 @@ class HEx(GridEx):
82
86
  cleaned = node.update(fake, self.predictor, False)
83
87
  node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
84
88
  [c for c, _ in cleaned], fake)]
85
- next_iteration += [n for n in node.permanent_children(fake)]
89
+ next_iteration += [n for n in node.children]
86
90
 
87
91
  current = next_iteration.copy()
88
92
  _ = root.update(fake, self.predictor, True)
@@ -45,6 +45,16 @@ class Point:
45
45
  def __eq__(self, other: Point) -> bool:
46
46
  return all([abs(self[dimension] - other[dimension]) < Point.EPSILON for dimension in self._dimensions])
47
47
 
48
+ def distance(self, other: Point, metric: str='Euclidean') -> float:
49
+ distances = [abs(self[dimension] - other[dimension]) for dimension in self._dimensions]
50
+ if metric == 'Euclidean':
51
+ distance = sum(np.array(distances)**2)**0.5
52
+ elif metric == 'Manhattan':
53
+ distance = sum(distances)
54
+ else:
55
+ raise ValueError("metric should be 'Euclidean' or 'Manhattan'")
56
+ return distance
57
+
48
58
  @property
49
59
  def dimensions(self) -> dict[str, float | str]:
50
60
  return self._dimensions
@@ -73,19 +83,25 @@ class HyperCube:
73
83
  self._error = 0.0
74
84
  self._barycenter = Point([], [])
75
85
 
76
- def __contains__(self, point: dict[str, float]) -> bool:
86
+ def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
77
87
  """
78
- Note that a point (dict[str, float]) is inside a hypercube if ALL its dimensions' values satisfy:
79
- min_dim <= value < max_dim
80
- :param point: an N-dimensional point
81
- :return: true if the point is inside the hypercube, false otherwise
88
+ Note that a point is inside a hypercube if ALL its dimensions' values satisfy:
89
+ min_dim <= object dimension < max_dim
90
+ :param obj: an N-dimensional object (point or hypercube)
91
+ :return: true if the object is inside the hypercube, false otherwise
82
92
  """
83
- return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in point.items()])
93
+ if isinstance(obj, HyperCube):
94
+ return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
95
+ for k in obj.dimensions])
96
+ elif isinstance(obj, dict):
97
+ return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in obj.items()])
98
+ else:
99
+ raise TypeError("Invalid type for obj parameter")
84
100
 
85
101
  def __eq__(self, other: HyperCube) -> bool:
86
102
  return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
87
103
  & (abs(dimension.this_dimension[1] - dimension.other_dimension[1]) < HyperCube.EPSILON)
88
- for dimension in self._zip_dimensions(other)])
104
+ for dimension in self._zip_dimensions(other, True)])
89
105
 
90
106
  def __getitem__(self, feature: str) -> Dimension:
91
107
  if feature in self._dimensions.keys():
@@ -104,6 +120,10 @@ class HyperCube:
104
120
  def dimensions(self) -> Dimensions:
105
121
  return self._dimensions
106
122
 
123
+ @property
124
+ def finite_dimensions(self) -> Dimensions:
125
+ return {k: v for k, v in self._dimensions.items() if np.isfinite(v[0]) and np.isfinite(v[1])}
126
+
107
127
  @property
108
128
  def limit_count(self) -> int:
109
129
  return len(self._limits)
@@ -124,6 +144,9 @@ class HyperCube:
124
144
  def barycenter(self) -> Point:
125
145
  return self._barycenter
126
146
 
147
+ def subcubes(self, cubes: Iterable[GenericCube]) -> Iterable[GenericCube]:
148
+ return [c for c in cubes if c in self and c != self]
149
+
127
150
  def _fit_dimension(self, dimension: dict[str, tuple[float, float]]) -> dict[str, tuple[float, float]]:
128
151
  new_dimension: dict[str, tuple[float, float]] = {}
129
152
  for key, value in dimension.items():
@@ -144,8 +167,10 @@ class HyperCube:
144
167
  def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
145
168
  return dataset[self.filter_indices(dataset)]
146
169
 
147
- def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
148
- return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self._dimensions.keys()]
170
+ def _zip_dimensions(self, other: HyperCube, check_finite: bool = False) -> list[ZippedDimension]:
171
+ dimensions = set(self.finite_dimensions).union(set(other.finite_dimensions)) if check_finite else \
172
+ set(self.dimensions)
173
+ return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in dimensions]
149
174
 
150
175
  def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
151
176
  if isinstance(limit_or_feature, Limit):
@@ -433,8 +458,20 @@ class ClosedCube(HyperCube):
433
458
  def __init__(self, dimension: dict[str, tuple] = None):
434
459
  super().__init__(dimension=dimension)
435
460
 
436
- def __contains__(self, point: dict[str, float]) -> bool:
437
- return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in point.items()])
461
+ def __contains__(self, obj: dict[str, float] | ClosedCube) -> bool:
462
+ """
463
+ Note that an object is inside a hypercube if ALL its dimensions' values satisfy:
464
+ min_dim <= object dimension <= max_dim
465
+ :param obj: an N-dimensional object (point or hypercube)
466
+ :return: true if the object is inside the hypercube, false otherwise
467
+ """
468
+ if isinstance(obj, ClosedCube):
469
+ return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
470
+ for k in obj.dimensions])
471
+ elif isinstance(obj, dict):
472
+ return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in obj.items()])
473
+ else:
474
+ raise TypeError("Invalid type for obj parameter")
438
475
 
439
476
  def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
440
477
  v = np.array([v for _, v in self._dimensions.items()])
@@ -170,7 +170,7 @@ class ITER(HyperCubeExtractor):
170
170
  min(overlapping_cube.get_first(feature), b) if direction == '+' else b)
171
171
  return cube.overlap(hypercubes)
172
172
 
173
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
173
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
174
174
  self._hypercubes, domain = self._initialize(dataframe)
175
175
  temp_train = dataframe.copy()
176
176
  fake = dataframe.copy()
@@ -193,4 +193,4 @@ class ITER(HyperCubeExtractor):
193
193
  ratio *= 2
194
194
  if new_cube.has_volume():
195
195
  self._hypercubes += [new_cube]
196
- return self._create_theory(dataframe, sort)
196
+ return self._create_theory(dataframe)
@@ -58,10 +58,10 @@ class REAL(PedagogicalExtractor):
58
58
  rules.append(self._create_new_rule(sample))
59
59
  return ruleset.optimize()
60
60
 
61
- def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet, sort: bool = True) -> MutableTheory:
61
+ def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet) -> MutableTheory:
62
62
  theory = mutable_theory()
63
63
  for key, rule in ruleset.flatten():
64
- variables = create_variable_list(self.discretization, sort=sort)
64
+ variables = create_variable_list(self.discretization)
65
65
  theory.assertZ(self._create_clause(dataset, variables, key, rule))
66
66
  return theory
67
67
 
@@ -111,16 +111,12 @@ class REAL(PedagogicalExtractor):
111
111
  samples_all = samples_0.append(samples_1)
112
112
  return samples_all, len(set(self.predictor.predict(samples_all))) == 1
113
113
 
114
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
114
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
115
115
  # Order the dataset by column to preserve reproducibility.
116
116
  dataframe = dataframe.sort_values(by=list(dataframe.columns.values), ascending=False)
117
- # Always perform output mapping in the same (sorted) way to preserve reproducibility.
118
- if mapping is None:
119
- self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
120
- else:
121
- self._output_mapping = {value: index for index, value in enumerate(sorted(set(mapping[dataframe.iloc[:, -1]])))}
117
+ self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
122
118
  self._ruleset = self._get_or_set(HashableDataFrame(dataframe))
123
- return self._create_theory(dataframe, self._ruleset, sort)
119
+ return self._create_theory(dataframe, self._ruleset)
124
120
 
125
121
  def _predict(self, dataframe) -> Iterable:
126
122
  return np.array([self._internal_predict(data.transpose()) for _, data in dataframe.iterrows()])
@@ -136,7 +136,7 @@ class Trepan(PedagogicalExtractor):
136
136
  nodes.append(child)
137
137
  return len(to_remove)
138
138
 
139
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
139
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
140
140
  queue = self._init(dataframe)
141
141
  while len(queue) > 0:
142
142
  node = queue.pop()
@@ -149,7 +149,7 @@ class Trepan(PedagogicalExtractor):
149
149
  queue.add_all(best)
150
150
  node.children += list(best)
151
151
  self._optimize()
152
- return self._create_theory(dataframe.columns[-1], sort)
152
+ return self._create_theory(dataframe.columns[-1])
153
153
 
154
154
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
155
155
  return np.array(
@@ -20,8 +20,7 @@ class HyperCubePredictor(EvaluableModel):
20
20
  def _predict(self, dataframe: pd.DataFrame) -> Iterable:
21
21
  return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
22
22
 
23
- def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
24
- mapping: dict[str: int] = None) -> Iterable:
23
+ def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
25
24
  predictions = np.array(self._predict(dataframe))
26
25
  idx = [prediction is None for prediction in predictions]
27
26
  if sum(idx) > 0:
@@ -46,10 +45,9 @@ class HyperCubePredictor(EvaluableModel):
46
45
  return HyperCubePredictor._get_cube_output(cubes[idx], row)
47
46
 
48
47
  def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
49
- distances = [(
50
- cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
51
- ) for cube in self._hypercubes]
52
- return min(distances)[-1]
48
+ return min([(
49
+ cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
50
+ ) for cube in self._hypercubes])[-1]
53
51
 
54
52
  def _create_brute_tree(self, criterion: str = 'center', n: int = 2) -> (BallTree, list[GenericCube]):
55
53
  admissible_criteria = ['surface', 'center', 'corner', 'perimeter', 'density', 'default']
@@ -68,12 +66,18 @@ class HyperCubePredictor(EvaluableModel):
68
66
  [point[1] for point in points]
69
67
 
70
68
  def _predict_from_cubes(self, data: dict[str, float]) -> float | str | None:
69
+ cube = self._find_cube(data)
70
+ if cube is None:
71
+ return None
72
+ elif self._output == Target.CLASSIFICATION:
73
+ return HyperCubePredictor._get_cube_output(cube, data)
74
+ else:
75
+ return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
76
+
77
+ def _find_cube(self, data: dict[str, float]) -> GenericCube | None:
71
78
  for cube in self._hypercubes:
72
79
  if data in cube:
73
- if self._output == Target.CLASSIFICATION:
74
- return HyperCubePredictor._get_cube_output(cube, data)
75
- else:
76
- return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
80
+ return cube
77
81
  return None
78
82
 
79
83
  @property
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: psyke
3
- Version: 0.7.11.dev2
3
+ Version: 0.8.0.dev11
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -66,8 +66,7 @@ def initialize(file: str) -> list[dict[str:Theory]]:
66
66
  params['grid'] = Grid(int(row['grid']), AdaptiveStrategy(ranked, n))
67
67
 
68
68
  extractor = get_extractor(row['extractor_type'], params)
69
- mapping = None if 'output_mapping' not in row.keys() or row['output_mapping'] == '' else ast.literal_eval(row['output_mapping'])
70
- theory = extractor.extract(training_set, mapping) if mapping is not None else extractor.extract(training_set)
69
+ theory = extractor.extract(training_set)
71
70
 
72
71
  # Compute predictions from rules
73
72
  index = test_set.shape[1] - 1
@@ -78,12 +77,8 @@ def initialize(file: str) -> list[dict[str:Theory]]:
78
77
  solver = prolog_solver(static_kb=mutable_theory(theory).assertZ(get_in_rule()).assertZ(get_not_in_rule()))
79
78
  substitutions = [solver.solveOnce(data_to_struct(data)) for _, data in ordered_test_set.iterrows()]
80
79
  expected = [cast(query.solved_query.get_arg_at(index)) for query in substitutions if query.is_yes]
81
- if mapping is not None:
82
- predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1], mapping)
83
- if prediction is not None]
84
- else:
85
- predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1])
86
- if prediction is not None]
80
+ predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1])
81
+ if prediction is not None]
87
82
 
88
83
  yield {
89
84
  'extractor': extractor,
@@ -246,7 +246,7 @@ class TestHypercube(AbstractTestHypercube):
246
246
 
247
247
  def test_zip_dimensions(self):
248
248
  cube = HyperCube({'X': self.y, 'Y': self.x})
249
- expected = [ZippedDimension(d, self.cube[d], cube[d]) for d in self.dimensions.keys()]
249
+ expected = [ZippedDimension(d, self.cube[d], cube[d]) for d in set(self.dimensions)]
250
250
  self.assertEqual(self.cube._zip_dimensions(cube), expected)
251
251
 
252
252
  def test_fit_dimension(self):
psyke-0.7.11.dev2/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.7.11.dev2
@@ -1,32 +0,0 @@
1
- from abc import ABC
2
-
3
- import pandas as pd
4
- from numpy import argmax
5
- from tuprolog.theory import Theory
6
-
7
- from psyke import Extractor
8
-
9
-
10
- class PedagogicalExtractor(Extractor, ABC):
11
-
12
- def __init__(self, predictor, discretization=None, normalization=None):
13
- Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
14
-
15
- def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
16
- from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube
17
- new_y = self.predictor.predict(dataframe.iloc[:, :-1])
18
- if mapping is not None:
19
- if hasattr(new_y[0], 'shape'):
20
- # One-hot encoding for multi-class tasks
21
- if len(new_y[0].shape) > 0 and new_y[0].shape[0] > 1:
22
- new_y = [argmax(y, axis=0) for y in new_y]
23
- # One-hot encoding for binary class tasks
24
- else:
25
- new_y = [round(y[0]) for y in new_y]
26
- new_y = pd.DataFrame(new_y).set_index(dataframe.index)
27
- data = dataframe.iloc[:, :-1].copy().join(new_y)
28
- data.columns = dataframe.columns
29
- return self._extract(data, mapping, sort)
30
-
31
- def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
32
- raise NotImplementedError('extract')
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes