psyke 0.6.1.dev4__tar.gz → 0.7.5.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of psyke might be problematic. Click here for more details.
- {psyke-0.6.1.dev4/psyke.egg-info → psyke-0.7.5.dev5}/PKG-INFO +3 -3
- psyke-0.7.5.dev5/VERSION +1 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/__init__.py +13 -3
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/__init__.py +1 -5
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/__init__.py +23 -5
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/creepy/__init__.py +5 -16
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/gridex/__init__.py +39 -36
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/gridrex/__init__.py +2 -2
- psyke-0.7.5.dev5/psyke/extraction/hypercubic/hex/__init__.py +54 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/hypercube.py +7 -4
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/iter/__init__.py +2 -1
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/pedro/__init__.py +9 -6
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/plot.py +5 -2
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5/psyke.egg-info}/PKG-INFO +3 -3
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/SOURCES.txt +1 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/requires.txt +2 -2
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/setup.py +2 -2
- psyke-0.6.1.dev4/VERSION +0 -1
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/LICENSE +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/MANIFEST.in +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/README.md +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/cream/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/exact/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/clustering/utils.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/cart/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/cart/predictor.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/cosmik/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/divine/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/strategy.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/hypercubic/utils.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/real/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/real/utils.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/trepan/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/extraction/trepan/utils.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/hypercubepredictor.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/schema/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/crash/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/tuning/orchid/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/dataframe.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/logic.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/metrics.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke/utils/sorted.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/dependency_links.txt +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/not-zip-safe +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/psyke.egg-info/top_level.txt +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/pyproject.toml +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/setup.cfg +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/clustering/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/test_cart.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/cart/test_simplified_cart.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/test_hypercube.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/test_real.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/real/test_rule.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_node.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_split.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/trepan/test_trepan.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_prune.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_simplify.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/utils/test_simplify_formatter.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/datasets/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/predictors/__init__.py +0 -0
- {psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/resources/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: psyke
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.5.dev5
|
|
4
4
|
Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
|
|
5
5
|
Home-page: https://github.com/psykei/psyke-python
|
|
6
6
|
Author: Matteo Magnini
|
|
@@ -23,8 +23,8 @@ Requires-Python: >=3.9.0, <3.10
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: numpy~=1.26.0
|
|
26
|
-
Requires-Dist: pandas~=2.
|
|
27
|
-
Requires-Dist: scikit-learn~=1.
|
|
26
|
+
Requires-Dist: pandas~=2.2.0
|
|
27
|
+
Requires-Dist: scikit-learn~=1.4.0
|
|
28
28
|
Requires-Dist: 2ppy~=0.4.0
|
|
29
29
|
Requires-Dist: kneed~=0.8.1
|
|
30
30
|
Requires-Dist: sympy~=1.11
|
psyke-0.7.5.dev5/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.7.5.dev5
|
|
@@ -291,14 +291,24 @@ class Extractor(EvaluableModel, ABC):
|
|
|
291
291
|
normalization, output, seed)
|
|
292
292
|
|
|
293
293
|
@staticmethod
|
|
294
|
-
def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
|
|
295
|
-
normalization: dict[str, tuple[float, float]] = None,
|
|
294
|
+
def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
|
|
295
|
+
discretization=None, normalization: dict[str, tuple[float, float]] = None,
|
|
296
296
|
seed: int = get_default_random_seed()) -> Extractor:
|
|
297
297
|
"""
|
|
298
298
|
Creates a new GridEx extractor.
|
|
299
299
|
"""
|
|
300
300
|
from psyke.extraction.hypercubic.gridex import GridEx
|
|
301
|
-
return GridEx(predictor, grid, min_examples, threshold, normalization, seed)
|
|
301
|
+
return GridEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def hex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
|
|
305
|
+
discretization=None, normalization: dict[str, tuple[float, float]] = None,
|
|
306
|
+
seed: int = get_default_random_seed()) -> Extractor:
|
|
307
|
+
"""
|
|
308
|
+
Creates a new HEx extractor.
|
|
309
|
+
"""
|
|
310
|
+
from psyke.extraction.hypercubic.hex import HEx
|
|
311
|
+
return HEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
|
|
302
312
|
|
|
303
313
|
@staticmethod
|
|
304
314
|
def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
|
|
@@ -26,11 +26,7 @@ class PedagogicalExtractor(Extractor, ABC):
|
|
|
26
26
|
new_y = pd.DataFrame(new_y).set_index(dataframe.index)
|
|
27
27
|
data = dataframe.iloc[:, :-1].copy().join(new_y)
|
|
28
28
|
data.columns = dataframe.columns
|
|
29
|
-
|
|
30
|
-
if isinstance(self, HyperCubeExtractor):
|
|
31
|
-
self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
|
|
32
|
-
self._surrounding.update(dataframe, self.predictor)
|
|
33
|
-
return theory
|
|
29
|
+
return self._extract(data, mapping, sort)
|
|
34
30
|
|
|
35
31
|
def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
|
|
36
32
|
raise NotImplementedError('extract')
|
|
@@ -14,7 +14,7 @@ from psyke.extraction import PedagogicalExtractor
|
|
|
14
14
|
from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
|
|
15
15
|
GenericCube
|
|
16
16
|
from psyke.hypercubepredictor import HyperCubePredictor
|
|
17
|
-
from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
|
|
17
|
+
from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body
|
|
18
18
|
from psyke.utils import Target
|
|
19
19
|
from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
|
|
20
20
|
|
|
@@ -24,6 +24,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
|
|
|
24
24
|
HyperCubePredictor.__init__(self, output=output, normalization=normalization)
|
|
25
25
|
PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
|
|
26
26
|
self._surrounding = None
|
|
27
|
+
self._default_surrounding_cube = False
|
|
27
28
|
|
|
28
29
|
def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
|
|
29
30
|
if self._output == Target.CONSTANT:
|
|
@@ -37,14 +38,30 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
|
|
|
37
38
|
cubes.sort()
|
|
38
39
|
self._hypercubes = [cube[2] for cube in cubes]
|
|
39
40
|
|
|
41
|
+
def _last_cube_as_default(self, theory):
|
|
42
|
+
last_clause = list(theory.clauses)[-1]
|
|
43
|
+
theory.retract(last_clause)
|
|
44
|
+
theory.assertZ(clause(
|
|
45
|
+
last_clause.head, [last_in_body(last_clause.body)] if self._output is Target.REGRESSION else []))
|
|
46
|
+
last_cube = self._hypercubes[-1]
|
|
47
|
+
for dimension in last_cube.dimensions.keys():
|
|
48
|
+
last_cube[dimension] = [-np.inf, np.inf]
|
|
49
|
+
return theory
|
|
50
|
+
|
|
51
|
+
def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
|
|
52
|
+
theory = PedagogicalExtractor.extract(self, dataframe, mapping, sort)
|
|
53
|
+
self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
|
|
54
|
+
self._surrounding.update(dataframe, self.predictor)
|
|
55
|
+
return theory
|
|
56
|
+
|
|
40
57
|
@staticmethod
|
|
41
58
|
def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
|
|
42
59
|
return create_head(dataframe.columns[-1], variables[:-1], output) \
|
|
43
60
|
if not isinstance(output, LinearRegression) else \
|
|
44
61
|
create_head(dataframe.columns[-1], variables[:-1], variables[-1])
|
|
45
62
|
|
|
46
|
-
def _ignore_dimensions(self) -> Iterable[str]:
|
|
47
|
-
return []
|
|
63
|
+
def _ignore_dimensions(self, cube: HyperCube) -> Iterable[str]:
|
|
64
|
+
return [d for d in cube.dimensions if cube[d][0] == -np.inf or cube[d][1] == np.inf]
|
|
48
65
|
|
|
49
66
|
def __drop(self, dataframe: pd.DataFrame):
|
|
50
67
|
self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
|
|
@@ -59,9 +76,10 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
|
|
|
59
76
|
variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
|
|
60
77
|
head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
|
|
61
78
|
self.unscale(cube.output, dataframe.columns[-1]))
|
|
62
|
-
body = cube.body(variables, self._ignore_dimensions(), self.unscale, self.normalization)
|
|
79
|
+
body = cube.body(variables, self._ignore_dimensions(cube), self.unscale, self.normalization)
|
|
63
80
|
new_theory.assertZ(clause(head, body))
|
|
64
|
-
|
|
81
|
+
new_theory = HyperCubeExtractor._prettify_theory(new_theory)
|
|
82
|
+
return self._last_cube_as_default(new_theory) if self._default_surrounding_cube else new_theory
|
|
65
83
|
|
|
66
84
|
@staticmethod
|
|
67
85
|
def _prettify_theory(theory: Theory) -> Theory:
|
|
@@ -4,13 +4,11 @@ from collections import Iterable
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from sklearn.base import ClassifierMixin
|
|
7
|
-
from tuprolog.core import clause
|
|
8
7
|
from tuprolog.theory import Theory
|
|
9
8
|
from psyke import Clustering
|
|
10
9
|
from psyke.clustering import HyperCubeClustering
|
|
11
10
|
from psyke.extraction.hypercubic import HyperCubeExtractor
|
|
12
11
|
from psyke.utils import Target, get_default_random_seed
|
|
13
|
-
from psyke.utils.logic import last_in_body
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class CReEPy(HyperCubeExtractor):
|
|
@@ -28,6 +26,7 @@ class CReEPy(HyperCubeExtractor):
|
|
|
28
26
|
normalization, seed)
|
|
29
27
|
self.ranks = ranks
|
|
30
28
|
self.ignore_threshold = ignore_threshold
|
|
29
|
+
self._default_surrounding_cube = True
|
|
31
30
|
|
|
32
31
|
def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
|
|
33
32
|
if not isinstance(self.clustering, HyperCubeClustering):
|
|
@@ -36,17 +35,7 @@ class CReEPy(HyperCubeExtractor):
|
|
|
36
35
|
self.clustering.fit(dataframe)
|
|
37
36
|
self._hypercubes = self.clustering.get_hypercubes()
|
|
38
37
|
for cube in self._hypercubes:
|
|
39
|
-
for dimension in self.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
theory.retract(last_clause)
|
|
44
|
-
theory.assertZ(clause(
|
|
45
|
-
last_clause.head, [last_in_body(last_clause.body)] if self._output is Target.REGRESSION else []))
|
|
46
|
-
last_cube = self._hypercubes[-1]
|
|
47
|
-
for dimension in last_cube.dimensions.keys():
|
|
48
|
-
last_cube[dimension] = [-np.inf, np.inf]
|
|
49
|
-
return theory
|
|
50
|
-
|
|
51
|
-
def _ignore_dimensions(self) -> Iterable[str]:
|
|
52
|
-
return [dimension for dimension, relevance in self.ranks if relevance < self.ignore_threshold]
|
|
38
|
+
for dimension, relevance in self.ranks:
|
|
39
|
+
if relevance < self.ignore_threshold:
|
|
40
|
+
cube[dimension] = [-np.inf, np.inf]
|
|
41
|
+
return self._create_theory(dataframe)
|
|
@@ -4,6 +4,7 @@ from itertools import product
|
|
|
4
4
|
from typing import Iterable
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
+
from sklearn.base import ClassifierMixin
|
|
7
8
|
from tuprolog.theory import Theory
|
|
8
9
|
from psyke import get_default_random_seed
|
|
9
10
|
from psyke.utils import Target
|
|
@@ -15,26 +16,45 @@ class GridEx(HyperCubeExtractor):
|
|
|
15
16
|
Explanator implementing GridEx algorithm, doi:10.1007/978-3-030-82017-6_2.
|
|
16
17
|
"""
|
|
17
18
|
|
|
18
|
-
def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float,
|
|
19
|
-
seed=get_default_random_seed()):
|
|
20
|
-
super().__init__(predictor, Target.
|
|
19
|
+
def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
|
|
20
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()):
|
|
21
|
+
super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
|
|
22
|
+
discretization, normalization)
|
|
21
23
|
self.grid = grid
|
|
22
24
|
self.min_examples = min_examples
|
|
23
25
|
self.threshold = threshold
|
|
24
|
-
self.
|
|
26
|
+
self._generator = rnd.Random(seed)
|
|
25
27
|
|
|
26
28
|
def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
|
|
27
29
|
self._hypercubes = []
|
|
28
|
-
if isinstance(np.array(self.predictor.predict(dataframe.iloc[0:1, :-1])).flatten()[0], str):
|
|
29
|
-
self._output = Target.CLASSIFICATION
|
|
30
30
|
surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
|
|
31
31
|
surrounding.init_diversity(2 * self.threshold)
|
|
32
32
|
self._iterate(surrounding, dataframe)
|
|
33
33
|
return self._create_theory(dataframe, sort)
|
|
34
34
|
|
|
35
|
-
def
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
def _create_ranges(self, cube, iteration):
|
|
36
|
+
ranges = {}
|
|
37
|
+
for (feature, (a, b)) in cube.dimensions.items():
|
|
38
|
+
n_bins = self.grid.get(feature, iteration)
|
|
39
|
+
if n_bins == 1:
|
|
40
|
+
ranges[feature] = [(-np.inf, np.inf)]
|
|
41
|
+
else:
|
|
42
|
+
size = (b - a) / n_bins
|
|
43
|
+
ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
|
|
44
|
+
return ranges
|
|
45
|
+
|
|
46
|
+
def _cubes_to_split(self, cube, surrounding, iteration, dataframe, fake, keep_empty=False):
|
|
47
|
+
to_split = []
|
|
48
|
+
for (pn, p) in enumerate(list(product(*self._create_ranges(cube, iteration).values()))):
|
|
49
|
+
cube = self._default_cube()
|
|
50
|
+
for i, f in enumerate(dataframe.columns[:-1]):
|
|
51
|
+
cube.update_dimension(f, p[i])
|
|
52
|
+
n = cube.count(dataframe)
|
|
53
|
+
if n > 0 or keep_empty:
|
|
54
|
+
fake = pd.concat([fake, cube.create_samples(self.min_examples - n, surrounding, self._generator)])
|
|
55
|
+
cube.update(fake, self.predictor)
|
|
56
|
+
to_split.append(cube)
|
|
57
|
+
return to_split, fake
|
|
38
58
|
|
|
39
59
|
def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
|
|
40
60
|
fake = dataframe.copy()
|
|
@@ -44,31 +64,13 @@ class GridEx(HyperCubeExtractor):
|
|
|
44
64
|
for iteration in self.grid.iterate():
|
|
45
65
|
next_iteration = []
|
|
46
66
|
for cube in prev:
|
|
47
|
-
to_split = []
|
|
48
67
|
if cube.count(dataframe) == 0:
|
|
49
68
|
continue
|
|
50
69
|
if cube.diversity < self.threshold:
|
|
51
70
|
self._hypercubes += [cube]
|
|
52
71
|
continue
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
bins = []
|
|
56
|
-
n_bins = self.grid.get(feature, iteration)
|
|
57
|
-
size = (b - a) / n_bins
|
|
58
|
-
for i in range(n_bins):
|
|
59
|
-
bins.append((a + size * i, a + size * (i + 1)))
|
|
60
|
-
ranges[feature] = bins
|
|
61
|
-
for (pn, p) in enumerate(list(product(*ranges.values()))):
|
|
62
|
-
cube = self._default_cube()
|
|
63
|
-
for i, f in enumerate(dataframe.columns[:-1]):
|
|
64
|
-
cube.update_dimension(f, p[i])
|
|
65
|
-
n = cube.count(dataframe)
|
|
66
|
-
if n > 0:
|
|
67
|
-
fake = pd.concat([fake, cube.create_samples(self.min_examples - n, self.__generator)])
|
|
68
|
-
cube.update(fake, self.predictor)
|
|
69
|
-
to_split += [cube]
|
|
70
|
-
to_split = self._merge(to_split, fake)
|
|
71
|
-
next_iteration += [cube for cube in to_split]
|
|
72
|
+
to_split, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake)
|
|
73
|
+
next_iteration += [c for c in self._merge(to_split, fake)]
|
|
72
74
|
prev = next_iteration.copy()
|
|
73
75
|
self._hypercubes += [cube for cube in next_iteration]
|
|
74
76
|
|
|
@@ -102,15 +104,16 @@ class GridEx(HyperCubeExtractor):
|
|
|
102
104
|
not_in_cache = [cube for cube in to_split]
|
|
103
105
|
adjacent_cache = {}
|
|
104
106
|
merge_cache = {}
|
|
105
|
-
|
|
106
|
-
while
|
|
107
|
+
cont = True
|
|
108
|
+
while cont:
|
|
107
109
|
to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
|
|
108
110
|
GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
|
|
109
111
|
self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
|
|
110
112
|
if len(to_merge) == 0:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
113
|
+
cont = False
|
|
114
|
+
else:
|
|
115
|
+
sorted(to_merge, key=lambda c: c[1].diversity)
|
|
116
|
+
best = to_merge[0]
|
|
117
|
+
to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
|
|
118
|
+
not_in_cache = [best[1]]
|
|
116
119
|
return to_split
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from psyke import get_default_random_seed
|
|
1
|
+
from psyke import get_default_random_seed, Target
|
|
2
2
|
from psyke.extraction.hypercubic import Grid, RegressionCube
|
|
3
3
|
from psyke.extraction.hypercubic.gridex import GridEx
|
|
4
4
|
|
|
@@ -10,7 +10,7 @@ class GridREx(GridEx):
|
|
|
10
10
|
|
|
11
11
|
def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization,
|
|
12
12
|
seed=get_default_random_seed()):
|
|
13
|
-
super().__init__(predictor, grid, min_examples, threshold, normalization, seed)
|
|
13
|
+
super().__init__(predictor, grid, min_examples, threshold, Target.REGRESSION, None, normalization, seed)
|
|
14
14
|
|
|
15
15
|
def _default_cube(self) -> RegressionCube:
|
|
16
16
|
return RegressionCube()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.linear_model import LinearRegression
|
|
4
|
+
|
|
5
|
+
from psyke import get_default_random_seed, Target
|
|
6
|
+
from psyke.extraction.hypercubic import Grid, HyperCube, GenericCube, ClassificationCube
|
|
7
|
+
from psyke.extraction.hypercubic.gridex import GridEx
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class HEx(GridEx):
|
|
11
|
+
"""
|
|
12
|
+
Explanator implementing HEx algorithm.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
|
|
16
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()):
|
|
17
|
+
super().__init__(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
|
|
18
|
+
self._default_surrounding_cube = True
|
|
19
|
+
|
|
20
|
+
def _different_output(self, this_cube: GenericCube, other_cube: GenericCube) -> bool:
|
|
21
|
+
if isinstance(this_cube.output, str) and this_cube.output == other_cube.output:
|
|
22
|
+
return False
|
|
23
|
+
if isinstance(this_cube.output, float) and abs(this_cube.output - other_cube.output) < self.threshold:
|
|
24
|
+
return False
|
|
25
|
+
if isinstance(this_cube.output, LinearRegression):
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def _gain(self, parent_cube: GenericCube, new_cube: GenericCube) -> float:
|
|
30
|
+
if isinstance(parent_cube, ClassificationCube):
|
|
31
|
+
return parent_cube.output != new_cube.output
|
|
32
|
+
return parent_cube.diversity - new_cube.diversity > self.threshold / 3.0
|
|
33
|
+
|
|
34
|
+
def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
|
|
35
|
+
fake = dataframe.copy()
|
|
36
|
+
surrounding.update(dataframe, self.predictor)
|
|
37
|
+
prev = [surrounding]
|
|
38
|
+
next_iteration = []
|
|
39
|
+
|
|
40
|
+
for iteration in self.grid.iterate():
|
|
41
|
+
next_iteration = []
|
|
42
|
+
for cube in prev:
|
|
43
|
+
# subcubes =
|
|
44
|
+
# [c for c in self._merge(self._cubes_to_split(cube, iteration, dataframe, fake, True), fake)]
|
|
45
|
+
subcubes, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake, True)
|
|
46
|
+
cleaned = [c for c in subcubes if c.count(dataframe) > 0 and self._gain(cube, c)]
|
|
47
|
+
if len(subcubes) > len(cleaned):
|
|
48
|
+
if len(cleaned) > 0:
|
|
49
|
+
idx = np.any([c.filter_indices(fake.iloc[:, :-1]) for c in cleaned], axis=0)
|
|
50
|
+
cube.update(fake[~idx], self.predictor)
|
|
51
|
+
self._hypercubes = [cube] + self._hypercubes
|
|
52
|
+
next_iteration += self._merge(cleaned, fake)
|
|
53
|
+
prev = next_iteration.copy()
|
|
54
|
+
self._hypercubes = [cube for cube in next_iteration] + self._hypercubes
|
|
@@ -158,8 +158,9 @@ class HyperCube:
|
|
|
158
158
|
return '*'
|
|
159
159
|
raise Exception('Too many limits for this feature')
|
|
160
160
|
|
|
161
|
-
def create_samples(self, n: int = 1,
|
|
162
|
-
|
|
161
|
+
def create_samples(self, n: int = 1, surrounding: GenericCube = None,
|
|
162
|
+
generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
|
|
163
|
+
return pd.DataFrame([self._create_tuple(generator, surrounding) for _ in range(n)])
|
|
163
164
|
|
|
164
165
|
@staticmethod
|
|
165
166
|
def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
|
|
@@ -208,8 +209,10 @@ class HyperCube:
|
|
|
208
209
|
return RegressionCube(dimensions)
|
|
209
210
|
return HyperCube(dimensions)
|
|
210
211
|
|
|
211
|
-
def _create_tuple(self, generator: Random) -> dict:
|
|
212
|
-
|
|
212
|
+
def _create_tuple(self, generator: Random, surrounding: GenericCube) -> dict:
|
|
213
|
+
minmax = {k: (self[k][0] if np.isfinite(self[k][0]) else surrounding[k][0],
|
|
214
|
+
self[k][1] if np.isfinite(self[k][1]) else surrounding[k][1]) for k in self._dimensions.keys()}
|
|
215
|
+
return {k: generator.uniform(minmax[k][0], minmax[k][1]) for k in self._dimensions.keys()}
|
|
213
216
|
|
|
214
217
|
@staticmethod
|
|
215
218
|
def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
|
|
@@ -38,7 +38,8 @@ class ITER(HyperCubeExtractor):
|
|
|
38
38
|
expansions = []
|
|
39
39
|
for limit in cubes:
|
|
40
40
|
count = limit.cube.count(dataframe)
|
|
41
|
-
dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count,
|
|
41
|
+
dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count,
|
|
42
|
+
generator=self.__generator)])
|
|
42
43
|
limit.cube.update(dataframe, self.predictor)
|
|
43
44
|
expansions.append(Expansion(
|
|
44
45
|
limit.cube, limit.feature, limit.direction,
|
|
@@ -13,7 +13,8 @@ from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
|
|
|
13
13
|
class PEDRO(SKEOptimizer, IterativeOptimizer):
|
|
14
14
|
class Algorithm(Enum):
|
|
15
15
|
GRIDEX = 1,
|
|
16
|
-
GRIDREX = 2
|
|
16
|
+
GRIDREX = 2,
|
|
17
|
+
HEX = 3
|
|
17
18
|
|
|
18
19
|
def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
|
|
19
20
|
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
|
|
@@ -23,7 +24,10 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
|
|
|
23
24
|
readability_tradeoff, patience, objective, output, normalization, discretization)
|
|
24
25
|
IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
|
|
25
26
|
max_depth, patience, output, normalization, discretization)
|
|
26
|
-
self.algorithm = algorithm
|
|
27
|
+
self.algorithm = Extractor.gridrex if algorithm == PEDRO.Algorithm.GRIDREX else \
|
|
28
|
+
Extractor.gridex if algorithm == PEDRO.Algorithm.GRIDEX else Extractor.hex
|
|
29
|
+
self.algorithm_name = "GridREx" if algorithm == PEDRO.Algorithm.GRIDREX else \
|
|
30
|
+
"GridEx" if algorithm == PEDRO.Algorithm.GRIDEX else "HEx"
|
|
27
31
|
self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
|
|
28
32
|
predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
|
|
29
33
|
expected = self.dataframe.iloc[:, -1].values
|
|
@@ -50,10 +54,9 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
|
|
|
50
54
|
params = []
|
|
51
55
|
patience = self.patience
|
|
52
56
|
while patience > 0:
|
|
53
|
-
print("{}. {}. Threshold = {:.2f}. ".format(self.
|
|
54
|
-
extractor =
|
|
55
|
-
|
|
56
|
-
else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
|
|
57
|
+
print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
|
|
58
|
+
extractor = self.algorithm(self.predictor, grid, min_examples=25,
|
|
59
|
+
threshold=threshold, normalization=self.normalization)
|
|
57
60
|
_ = extractor.extract(self.dataframe)
|
|
58
61
|
error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
|
|
59
62
|
else extractor.mae
|
|
@@ -44,8 +44,11 @@ def plot_classification_samples(dataframe, classes, colors, markers, labels, loc
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def plot_boundaries(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
|
|
47
|
-
a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None):
|
|
48
|
-
|
|
47
|
+
a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None, reverse=False):
|
|
48
|
+
cubes = extractor._hypercubes.copy()
|
|
49
|
+
if reverse:
|
|
50
|
+
cubes.reverse()
|
|
51
|
+
for cube in cubes:
|
|
49
52
|
plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
|
|
50
53
|
fc=colors[cube.output] if fc is None else fc,
|
|
51
54
|
ec=colors[cube.output] if ec is None else ec, alpha=a, hatch=h, linestyle=ls)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: psyke
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.5.dev5
|
|
4
4
|
Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
|
|
5
5
|
Home-page: https://github.com/psykei/psyke-python
|
|
6
6
|
Author: Matteo Magnini
|
|
@@ -23,8 +23,8 @@ Requires-Python: >=3.9.0, <3.10
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: numpy~=1.26.0
|
|
26
|
-
Requires-Dist: pandas~=2.
|
|
27
|
-
Requires-Dist: scikit-learn~=1.
|
|
26
|
+
Requires-Dist: pandas~=2.2.0
|
|
27
|
+
Requires-Dist: scikit-learn~=1.4.0
|
|
28
28
|
Requires-Dist: 2ppy~=0.4.0
|
|
29
29
|
Requires-Dist: kneed~=0.8.1
|
|
30
30
|
Requires-Dist: sympy~=1.11
|
|
@@ -28,6 +28,7 @@ psyke/extraction/hypercubic/creepy/__init__.py
|
|
|
28
28
|
psyke/extraction/hypercubic/divine/__init__.py
|
|
29
29
|
psyke/extraction/hypercubic/gridex/__init__.py
|
|
30
30
|
psyke/extraction/hypercubic/gridrex/__init__.py
|
|
31
|
+
psyke/extraction/hypercubic/hex/__init__.py
|
|
31
32
|
psyke/extraction/hypercubic/iter/__init__.py
|
|
32
33
|
psyke/extraction/real/__init__.py
|
|
33
34
|
psyke/extraction/real/utils.py
|
psyke-0.6.1.dev4/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.6.1.dev4
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{psyke-0.6.1.dev4 → psyke-0.7.5.dev5}/test/psyke/extraction/hypercubic/gridex/test_gridex.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|