psyke 0.5.2.dev4__py3-none-any.whl → 0.5.4.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of psyke might be problematic. Click here for more details.
- psyke/__init__.py +13 -11
- psyke/clustering/__init__.py +2 -2
- psyke/clustering/cream/__init__.py +4 -4
- psyke/clustering/exact/__init__.py +6 -3
- psyke/extraction/hypercubic/creepy/__init__.py +5 -3
- psyke/hypercubepredictor.py +5 -4
- psyke/tuning/__init__.py +38 -28
- psyke/tuning/crash/__init__.py +33 -70
- psyke/tuning/orchid/__init__.py +19 -22
- psyke/tuning/pedro/__init__.py +49 -45
- {psyke-0.5.2.dev4.dist-info → psyke-0.5.4.dev1.dist-info}/METADATA +1 -1
- {psyke-0.5.2.dev4.dist-info → psyke-0.5.4.dev1.dist-info}/RECORD +15 -15
- {psyke-0.5.2.dev4.dist-info → psyke-0.5.4.dev1.dist-info}/LICENSE +0 -0
- {psyke-0.5.2.dev4.dist-info → psyke-0.5.4.dev1.dist-info}/WHEEL +0 -0
- {psyke-0.5.2.dev4.dist-info → psyke-0.5.4.dev1.dist-info}/top_level.txt +0 -0
psyke/__init__.py
CHANGED
|
@@ -44,9 +44,9 @@ class EvaluableModel(object):
|
|
|
44
44
|
V = 3,
|
|
45
45
|
FMI = 4
|
|
46
46
|
|
|
47
|
-
def __init__(self,
|
|
48
|
-
self.normalization = normalization
|
|
47
|
+
def __init__(self, discretization=None, normalization=None):
|
|
49
48
|
self.discretization = discretization
|
|
49
|
+
self.normalization = normalization
|
|
50
50
|
|
|
51
51
|
def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
|
|
52
52
|
"""
|
|
@@ -312,13 +312,14 @@ class Extractor(EvaluableModel, ABC):
|
|
|
312
312
|
@staticmethod
|
|
313
313
|
def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
|
|
314
314
|
gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
|
|
315
|
-
discretization=None, normalization: dict[str, tuple[float, float]] = None
|
|
315
|
+
discretization=None, normalization: dict[str, tuple[float, float]] = None,
|
|
316
|
+
seed: int = get_default_random_seed()) -> Extractor:
|
|
316
317
|
"""
|
|
317
318
|
Creates a new CReEPy extractor.
|
|
318
319
|
"""
|
|
319
320
|
from psyke.extraction.hypercubic.creepy import CReEPy
|
|
320
321
|
return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
|
|
321
|
-
discretization, normalization, clustering)
|
|
322
|
+
discretization, normalization, clustering, seed)
|
|
322
323
|
|
|
323
324
|
@staticmethod
|
|
324
325
|
def real(predictor, discretization=None) -> Extractor:
|
|
@@ -341,8 +342,8 @@ class Extractor(EvaluableModel, ABC):
|
|
|
341
342
|
|
|
342
343
|
|
|
343
344
|
class Clustering(EvaluableModel, ABC):
|
|
344
|
-
def __init__(self, normalization=None):
|
|
345
|
-
super().__init__(normalization)
|
|
345
|
+
def __init__(self, discretization=None, normalization=None):
|
|
346
|
+
super().__init__(discretization, normalization)
|
|
346
347
|
|
|
347
348
|
def fit(self, dataframe: pd.DataFrame):
|
|
348
349
|
raise NotImplementedError('fit')
|
|
@@ -351,18 +352,19 @@ class Clustering(EvaluableModel, ABC):
|
|
|
351
352
|
raise NotImplementedError('explain')
|
|
352
353
|
|
|
353
354
|
@staticmethod
|
|
354
|
-
def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
|
|
355
|
-
|
|
355
|
+
def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
|
|
356
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
|
|
356
357
|
"""
|
|
357
358
|
Creates a new ExACT instance.
|
|
358
359
|
"""
|
|
359
360
|
from psyke.clustering.exact import ExACT
|
|
360
|
-
return ExACT(depth, error_threshold, output, gauss_components)
|
|
361
|
+
return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
|
|
361
362
|
|
|
362
363
|
@staticmethod
|
|
363
|
-
def cream(depth: int, error_threshold: float, output, gauss_components: int = 2
|
|
364
|
+
def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
|
|
365
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
|
|
364
366
|
"""
|
|
365
367
|
Creates a new CREAM instance.
|
|
366
368
|
"""
|
|
367
369
|
from psyke.clustering.cream import CREAM
|
|
368
|
-
return CREAM(depth, error_threshold, output, gauss_components)
|
|
370
|
+
return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
|
psyke/clustering/__init__.py
CHANGED
|
@@ -8,8 +8,8 @@ from psyke.hypercubepredictor import HyperCubePredictor
|
|
|
8
8
|
|
|
9
9
|
class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
|
|
10
10
|
|
|
11
|
-
def __init__(self, output: Target = Target.CONSTANT, normalization=None):
|
|
12
|
-
HyperCubePredictor.__init__(self, output=output, normalization=normalization)
|
|
11
|
+
def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
|
|
12
|
+
HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
|
|
13
13
|
|
|
14
14
|
def get_hypercubes(self) -> Iterable[HyperCube]:
|
|
15
15
|
raise NotImplementedError('get_hypercubes')
|
|
@@ -5,7 +5,7 @@ from typing import Iterable
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from psyke.utils import Target, get_default_random_seed
|
|
9
9
|
from psyke.clustering.exact import ExACT
|
|
10
10
|
from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
|
|
11
11
|
from psyke.clustering.utils import select_gaussian_mixture
|
|
@@ -16,9 +16,9 @@ class CREAM(ExACT):
|
|
|
16
16
|
Explanator implementing CREAM algorithm.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(self, depth: int, error_threshold: float,
|
|
20
|
-
|
|
21
|
-
super().__init__(depth, error_threshold, output, gauss_components)
|
|
19
|
+
def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
|
|
20
|
+
discretization=None, normalization=None, seed: int = get_default_random_seed()):
|
|
21
|
+
super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
|
|
22
22
|
|
|
23
23
|
def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
|
|
24
24
|
cubes = []
|
|
@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
|
|
|
13
13
|
from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
|
|
14
14
|
from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
|
|
15
15
|
from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
|
|
16
|
-
from psyke.utils import Target
|
|
16
|
+
from psyke.utils import Target, get_default_random_seed
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class ExACT(HyperCubeClustering, ABC):
|
|
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
|
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
|
|
25
|
-
gauss_components: int = 2, normalization=None
|
|
26
|
-
|
|
25
|
+
gauss_components: int = 2, discretization=None, normalization=None,
|
|
26
|
+
seed: int = get_default_random_seed()):
|
|
27
|
+
super().__init__(output, discretization, normalization)
|
|
27
28
|
self.depth = depth
|
|
28
29
|
self.error_threshold = error_threshold
|
|
29
30
|
self.gauss_components = gauss_components
|
|
30
31
|
self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
|
|
31
32
|
self._predictor.n_neighbors = 1
|
|
33
|
+
self.seed = seed
|
|
32
34
|
|
|
33
35
|
def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
|
|
34
36
|
cubes = []
|
|
@@ -56,6 +58,7 @@ class ExACT(HyperCubeClustering, ABC):
|
|
|
56
58
|
)
|
|
57
59
|
|
|
58
60
|
def fit(self, dataframe: pd.DataFrame):
|
|
61
|
+
np.random.seed(self.seed)
|
|
59
62
|
self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
|
|
60
63
|
self._hypercubes = \
|
|
61
64
|
self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
|
|
@@ -9,7 +9,7 @@ from tuprolog.theory import Theory
|
|
|
9
9
|
from psyke import Clustering
|
|
10
10
|
from psyke.clustering import HyperCubeClustering
|
|
11
11
|
from psyke.extraction.hypercubic import HyperCubeExtractor
|
|
12
|
-
from psyke.utils import Target
|
|
12
|
+
from psyke.utils import Target, get_default_random_seed
|
|
13
13
|
from psyke.utils.logic import last_in_body
|
|
14
14
|
|
|
15
15
|
|
|
@@ -20,10 +20,12 @@ class CReEPy(HyperCubeExtractor):
|
|
|
20
20
|
|
|
21
21
|
def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
|
|
22
22
|
gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
|
|
23
|
-
discretization=None, normalization=None, clustering=Clustering.exact
|
|
23
|
+
discretization=None, normalization=None, clustering=Clustering.exact,
|
|
24
|
+
seed: int = get_default_random_seed()):
|
|
24
25
|
super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
|
|
25
26
|
discretization, normalization)
|
|
26
|
-
self.clustering = clustering(depth, error_threshold, self._output, gauss_components
|
|
27
|
+
self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
|
|
28
|
+
normalization, seed)
|
|
27
29
|
self.ranks = ranks
|
|
28
30
|
self.ignore_threshold = ignore_threshold
|
|
29
31
|
|
psyke/hypercubepredictor.py
CHANGED
|
@@ -11,17 +11,18 @@ from psyke.extraction.hypercubic import RegressionCube, GenericCube, Point
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class HyperCubePredictor(EvaluableModel):
|
|
14
|
-
def __init__(self, output=Target.CONSTANT, normalization=None):
|
|
15
|
-
super().__init__(normalization)
|
|
14
|
+
def __init__(self, output=Target.CONSTANT, discretization=None, normalization=None):
|
|
15
|
+
super().__init__(discretization, normalization)
|
|
16
16
|
self._hypercubes = []
|
|
17
17
|
self._output = output
|
|
18
|
+
self._surrounding = None
|
|
18
19
|
|
|
19
20
|
def _predict(self, dataframe: pd.DataFrame) -> Iterable:
|
|
20
21
|
return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
|
|
21
22
|
|
|
22
23
|
def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
|
|
23
24
|
mapping: dict[str: int] = None) -> Iterable:
|
|
24
|
-
predictions = self._predict(dataframe)
|
|
25
|
+
predictions = np.array(self._predict(dataframe))
|
|
25
26
|
idx = [prediction is None for prediction in predictions]
|
|
26
27
|
if sum(idx) > 0:
|
|
27
28
|
if criterion == 'default':
|
|
@@ -46,7 +47,7 @@ class HyperCubePredictor(EvaluableModel):
|
|
|
46
47
|
|
|
47
48
|
def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
|
|
48
49
|
distances = [(
|
|
49
|
-
cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
|
|
50
|
+
cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
|
|
50
51
|
) for cube in self._hypercubes]
|
|
51
52
|
return min(distances)[-1]
|
|
52
53
|
|
psyke/tuning/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from enum import Enum
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
+
from psyke.extraction.hypercubic import Grid
|
|
6
7
|
from psyke.utils import Target
|
|
7
8
|
|
|
8
9
|
|
|
@@ -12,14 +13,12 @@ class Objective(Enum):
|
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class Optimizer:
|
|
15
|
-
def __init__(self, dataframe: pd.DataFrame,
|
|
16
|
-
|
|
17
|
-
readability_tradeoff: float = 0.1, patience: int = 5,
|
|
16
|
+
def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
|
|
17
|
+
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
|
|
18
18
|
normalization=None, discretization=None):
|
|
19
19
|
self.dataframe = dataframe
|
|
20
|
-
self.algorithm = algorithm
|
|
21
20
|
self.output = output
|
|
22
|
-
self.
|
|
21
|
+
self.max_error_increase = max_error_increase
|
|
23
22
|
self.min_rule_decrease = min_rule_decrease
|
|
24
23
|
self.readability_tradeoff = readability_tradeoff
|
|
25
24
|
self.patience = patience
|
|
@@ -30,23 +29,13 @@ class Optimizer:
|
|
|
30
29
|
def search(self):
|
|
31
30
|
raise NotImplementedError
|
|
32
31
|
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
return (best[1] - other[1]) * 2
|
|
36
|
-
return 1 / (
|
|
37
|
-
(1 - other[0] / best[0]) ** self.readability_tradeoff *
|
|
38
|
-
np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
@staticmethod
|
|
42
|
-
def _best(params):
|
|
43
|
-
param_dict = {Optimizer.__score(t): t for t in params}
|
|
32
|
+
def _best(self, params):
|
|
33
|
+
param_dict = {self._score(t): t for t in params}
|
|
44
34
|
min_param = min(param_dict)
|
|
45
35
|
return min_param, param_dict[min_param]
|
|
46
36
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
return param[0] * np.ceil(param[1] / 5)
|
|
37
|
+
def _score(self, param):
|
|
38
|
+
return param[0] * np.ceil(param[1] * self.readability_tradeoff)
|
|
50
39
|
|
|
51
40
|
def _best_param(self, param):
|
|
52
41
|
param_dict = {t[param]: t for t in self.params}
|
|
@@ -54,24 +43,45 @@ class Optimizer:
|
|
|
54
43
|
return min_param, param_dict[min_param]
|
|
55
44
|
|
|
56
45
|
def get_best(self):
|
|
57
|
-
names = [
|
|
58
|
-
params = [
|
|
46
|
+
names = ["Combined", "Predictive loss", "N rules"]
|
|
47
|
+
params = [self._best(self.params), self._best_param(0), self._best_param(1)]
|
|
59
48
|
for n, p in zip(names, params):
|
|
60
49
|
self._print_params(n, p[1])
|
|
61
50
|
print()
|
|
62
|
-
return
|
|
51
|
+
return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
|
|
63
52
|
|
|
64
53
|
def _print_params(self, n, param):
|
|
65
54
|
raise NotImplementedError
|
|
66
55
|
|
|
67
56
|
|
|
68
|
-
class
|
|
69
|
-
def __init__(self, predictor,
|
|
70
|
-
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
|
|
71
|
-
|
|
57
|
+
class SKEOptimizer(Optimizer, ABC):
|
|
58
|
+
def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
|
|
59
|
+
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
|
|
60
|
+
objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
|
|
72
61
|
normalization=None, discretization=None):
|
|
73
|
-
super().__init__(dataframe,
|
|
62
|
+
super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
|
|
74
63
|
patience, normalization, discretization)
|
|
75
64
|
self.predictor = predictor
|
|
76
|
-
self.max_depth = max_depth
|
|
77
65
|
self.objective = objective
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class IterativeOptimizer(Optimizer, ABC):
|
|
69
|
+
def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
|
|
70
|
+
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
|
|
71
|
+
patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
|
|
72
|
+
super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
|
|
73
|
+
patience, normalization, discretization)
|
|
74
|
+
self.max_depth = max_depth
|
|
75
|
+
|
|
76
|
+
def _iteration_improvement(self, best, other):
|
|
77
|
+
if other[0] == best[0]:
|
|
78
|
+
return (best[1] - other[1]) * 2
|
|
79
|
+
return 1 / (
|
|
80
|
+
(1 - other[0] / best[0]) ** self.readability_tradeoff *
|
|
81
|
+
np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _check_iteration_improvement(self, best, current):
|
|
85
|
+
improvement = \
|
|
86
|
+
self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
|
|
87
|
+
return current, improvement < 1.2
|
psyke/tuning/crash/__init__.py
CHANGED
|
@@ -1,91 +1,54 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
1
|
import pandas as pd
|
|
5
2
|
|
|
6
|
-
from psyke import
|
|
7
|
-
from psyke.tuning import
|
|
3
|
+
from psyke.tuning import Objective, SKEOptimizer
|
|
4
|
+
from psyke.tuning.orchid import OrCHiD
|
|
8
5
|
from psyke.utils import Target
|
|
9
6
|
|
|
10
7
|
|
|
11
|
-
class CRASH(
|
|
12
|
-
|
|
13
|
-
ExACT = 1,
|
|
14
|
-
CREAM = 2
|
|
15
|
-
|
|
16
|
-
def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
|
|
8
|
+
class CRASH(SKEOptimizer):
|
|
9
|
+
def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
|
|
17
10
|
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
|
|
18
|
-
|
|
19
|
-
objective: Objective = Objective.MODEL, normalization=None):
|
|
20
|
-
super().__init__(predictor,
|
|
21
|
-
|
|
22
|
-
self.
|
|
11
|
+
max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
|
|
12
|
+
objective: Objective = Objective.MODEL, normalization=None, discretization=None):
|
|
13
|
+
super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
|
|
14
|
+
patience, objective, output, normalization, discretization)
|
|
15
|
+
self.max_depth = max_depth
|
|
16
|
+
self.max_gauss_components = max_gauss_components
|
|
23
17
|
|
|
24
18
|
def search(self):
|
|
25
|
-
self.params =
|
|
19
|
+
self.params = []
|
|
20
|
+
for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
|
|
21
|
+
self.params += self.__search_algorithm(algorithm)
|
|
26
22
|
|
|
27
|
-
def
|
|
23
|
+
def __search_algorithm(self, algorithm):
|
|
28
24
|
params = []
|
|
29
25
|
best = None
|
|
30
26
|
|
|
31
|
-
for
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
) if best is not None else np.inf
|
|
38
|
-
|
|
39
|
-
best = b
|
|
40
|
-
params += p
|
|
41
|
-
|
|
42
|
-
if len(params) > 1 and improvement < 1.2:
|
|
43
|
-
break
|
|
44
|
-
return params
|
|
45
|
-
|
|
46
|
-
def __search_threshold(self, depth):
|
|
47
|
-
step = self.model_mae / 2.0
|
|
48
|
-
threshold = self.model_mae * 0.9
|
|
49
|
-
params = []
|
|
50
|
-
patience = self.patience
|
|
51
|
-
while patience > 0:
|
|
52
|
-
print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
|
|
53
|
-
extractor = Extractor.creepy(
|
|
54
|
-
self.predictor, depth=depth, error_threshold=threshold, output=self.output,
|
|
55
|
-
gauss_components=10, normalization=self.normalization,
|
|
56
|
-
clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
|
|
57
|
-
)
|
|
58
|
-
_ = extractor.extract(self.dataframe)
|
|
59
|
-
mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
|
|
60
|
-
extractor.mae(self.dataframe)), extractor.n_rules
|
|
61
|
-
print(f"MAE = {mae:.2f}, {n} rules")
|
|
62
|
-
|
|
63
|
-
if len(params) == 0:
|
|
64
|
-
params.append((mae, n, depth, threshold))
|
|
65
|
-
threshold += step
|
|
66
|
-
continue
|
|
67
|
-
|
|
68
|
-
if (n == 1) or (mae == 0.0):
|
|
69
|
-
params.append((mae, n, depth, threshold))
|
|
27
|
+
for gauss_components in range(2, self.max_gauss_components + 1):
|
|
28
|
+
data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
|
|
29
|
+
else self.dataframe
|
|
30
|
+
current_params = self.__search_components(data, algorithm, gauss_components)
|
|
31
|
+
current_best = self._best(current_params)[1]
|
|
32
|
+
if best is not None and self._score(best) <= self._score(current_best):
|
|
70
33
|
break
|
|
34
|
+
best = current_best
|
|
35
|
+
params += current_params
|
|
71
36
|
|
|
72
|
-
if mae > params[0][0] * self.max_mae_increase:
|
|
73
|
-
break
|
|
74
|
-
|
|
75
|
-
improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
|
|
76
|
-
|
|
77
|
-
if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
|
|
78
|
-
patience -= 1
|
|
79
|
-
step = max(step, abs(mae - threshold) / max(patience, 1))
|
|
80
|
-
if mae != params[-1][0] or n != params[-1][1]:
|
|
81
|
-
params.append((mae, n, depth, threshold))
|
|
82
|
-
threshold += step
|
|
83
37
|
return params
|
|
84
38
|
|
|
39
|
+
def __search_components(self, data, algorithm, gauss_components):
|
|
40
|
+
orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
|
|
41
|
+
self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
|
|
42
|
+
self.normalization, self.discretization)
|
|
43
|
+
orchid.search()
|
|
44
|
+
return [(*p, gauss_components, algorithm) for p in orchid.params]
|
|
45
|
+
|
|
85
46
|
def _print_params(self, name, params):
|
|
86
|
-
print("
|
|
47
|
+
print("*****************************")
|
|
87
48
|
print(f"Best {name}")
|
|
88
|
-
print("
|
|
49
|
+
print("*****************************")
|
|
89
50
|
print(f"MAE = {params[0]:.2f}, {params[1]} rules")
|
|
51
|
+
print(f"Algorithm = {params[5]}")
|
|
90
52
|
print(f"Threshold = {params[3]:.2f}")
|
|
91
53
|
print(f"Depth = {params[2]}")
|
|
54
|
+
print(f"Gaussian components = {params[4]}")
|
psyke/tuning/orchid/__init__.py
CHANGED
|
@@ -4,53 +4,50 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from psyke import Clustering, EvaluableModel
|
|
7
|
-
from psyke.tuning import Optimizer
|
|
7
|
+
from psyke.tuning import Optimizer, IterativeOptimizer
|
|
8
8
|
from psyke.utils import Target
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class OrCHiD(
|
|
11
|
+
class OrCHiD(IterativeOptimizer):
|
|
12
12
|
class Algorithm(Enum):
|
|
13
13
|
ExACT = 1,
|
|
14
14
|
CREAM = 2
|
|
15
15
|
|
|
16
16
|
def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
|
|
17
|
-
|
|
18
|
-
patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
|
|
19
|
-
super().__init__(dataframe,
|
|
20
|
-
|
|
21
|
-
self.
|
|
17
|
+
max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
|
|
18
|
+
patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
|
|
19
|
+
super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
|
|
20
|
+
output, normalization, discretization)
|
|
21
|
+
self.algorithm = algorithm
|
|
22
|
+
self.gauss_components = gauss_components
|
|
22
23
|
|
|
23
24
|
def search(self):
|
|
24
25
|
self.params = self.__search_depth()
|
|
25
26
|
|
|
26
27
|
def __search_depth(self):
|
|
27
|
-
params = []
|
|
28
|
-
best = None
|
|
28
|
+
params, best = [], None
|
|
29
29
|
|
|
30
30
|
for depth in range(1, self.max_depth + 1):
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
current_params = self.__search_threshold(depth)
|
|
32
|
+
current_best = self._best(current_params)[1]
|
|
33
33
|
print()
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
) if best is not None else np.inf
|
|
37
|
-
|
|
38
|
-
best = b
|
|
39
|
-
params += p
|
|
34
|
+
best, to_break = self._check_iteration_improvement(best, current_best)
|
|
35
|
+
params += current_params
|
|
40
36
|
|
|
41
|
-
if len(params) > 1 and
|
|
37
|
+
if len(params) > 1 and to_break:
|
|
42
38
|
break
|
|
43
39
|
return params
|
|
44
40
|
|
|
45
41
|
def __search_threshold(self, depth):
|
|
46
42
|
step = 1.0
|
|
47
|
-
threshold = 1.0
|
|
43
|
+
threshold = 1.0
|
|
48
44
|
params = []
|
|
49
45
|
patience = self.patience
|
|
50
46
|
while patience > 0:
|
|
51
|
-
print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
|
|
47
|
+
print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
|
|
48
|
+
f"Gaussian components = {self.gauss_components}. ", end="")
|
|
52
49
|
clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
|
|
53
|
-
depth=depth, error_threshold=threshold, gauss_components=
|
|
50
|
+
depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
|
|
54
51
|
)
|
|
55
52
|
clustering.fit(self.dataframe)
|
|
56
53
|
task, metric = \
|
|
@@ -72,7 +69,7 @@ class OrCHiD(Optimizer):
|
|
|
72
69
|
params.append((p, n, depth, threshold))
|
|
73
70
|
break
|
|
74
71
|
|
|
75
|
-
if p > params[0][0] * self.
|
|
72
|
+
if p > params[0][0] * self.max_error_increase:
|
|
76
73
|
break
|
|
77
74
|
|
|
78
75
|
improvement = (params[-1][0] / p) + (1 - n / params[-1][1])
|
psyke/tuning/pedro/__init__.py
CHANGED
|
@@ -1,30 +1,52 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from enum import Enum
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from sklearn.metrics import accuracy_score
|
|
6
|
+
|
|
7
|
+
from psyke import Extractor, Target
|
|
5
8
|
from psyke.extraction.hypercubic import Grid, FeatureRanker
|
|
6
9
|
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
|
|
7
|
-
from psyke.tuning import Objective,
|
|
10
|
+
from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
|
|
8
11
|
|
|
9
12
|
|
|
10
|
-
class PEDRO(
|
|
13
|
+
class PEDRO(SKEOptimizer, IterativeOptimizer):
|
|
11
14
|
class Algorithm(Enum):
|
|
12
15
|
GRIDEX = 1,
|
|
13
16
|
GRIDREX = 2
|
|
14
17
|
|
|
15
|
-
def __init__(self, predictor, dataframe: pd.DataFrame,
|
|
18
|
+
def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
|
|
16
19
|
min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
|
|
17
20
|
patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
|
|
18
|
-
normalization=None):
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
output: Target = Target.CONSTANT, normalization=None, discretization=None):
|
|
22
|
+
SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
|
|
23
|
+
readability_tradeoff, patience, objective, output, normalization, discretization)
|
|
24
|
+
IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
|
|
25
|
+
max_depth, patience, output, normalization, discretization)
|
|
26
|
+
self.algorithm = algorithm
|
|
21
27
|
self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
|
|
22
|
-
|
|
23
|
-
|
|
28
|
+
predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
|
|
29
|
+
expected = self.dataframe.iloc[:, -1].values
|
|
30
|
+
self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
|
|
31
|
+
abs(predictions - expected).mean()
|
|
32
|
+
|
|
33
|
+
def _search_depth(self, strategy, critical, max_partitions):
|
|
34
|
+
params, best = [], None
|
|
35
|
+
|
|
36
|
+
for iterations in range(self.max_depth):
|
|
37
|
+
current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
|
|
38
|
+
current_best = self._best(current_params)[1]
|
|
39
|
+
print()
|
|
40
|
+
best, to_break = self._check_iteration_improvement(best, current_best)
|
|
41
|
+
params += current_params
|
|
42
|
+
|
|
43
|
+
if len(params) > 1 and to_break:
|
|
44
|
+
break
|
|
45
|
+
return params
|
|
24
46
|
|
|
25
47
|
def __search_threshold(self, grid, critical, max_partitions):
|
|
26
|
-
step = self.
|
|
27
|
-
threshold = self.
|
|
48
|
+
step = self.error / 2.0
|
|
49
|
+
threshold = self.error * 0.5
|
|
28
50
|
params = []
|
|
29
51
|
patience = self.patience
|
|
30
52
|
while patience > 0:
|
|
@@ -33,12 +55,14 @@ class PEDRO(GridOptimizer):
|
|
|
33
55
|
if self.algorithm == PEDRO.Algorithm.GRIDREX \
|
|
34
56
|
else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
|
|
35
57
|
_ = extractor.extract(self.dataframe)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
58
|
+
error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
|
|
59
|
+
else extractor.mae
|
|
60
|
+
error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
|
|
61
|
+
error_function(self.dataframe)), extractor.n_rules
|
|
62
|
+
print("MAE = {:.2f}, {} rules".format(error, n))
|
|
39
63
|
|
|
40
64
|
if len(params) == 0:
|
|
41
|
-
params.append((
|
|
65
|
+
params.append((error, n, threshold, grid))
|
|
42
66
|
threshold += step
|
|
43
67
|
continue
|
|
44
68
|
|
|
@@ -46,44 +70,24 @@ class PEDRO(GridOptimizer):
|
|
|
46
70
|
break
|
|
47
71
|
|
|
48
72
|
if n == 1:
|
|
49
|
-
params.append((
|
|
73
|
+
params.append((error, n, threshold, grid))
|
|
50
74
|
break
|
|
51
75
|
|
|
52
|
-
if
|
|
76
|
+
if error > params[0][0] * self.max_error_increase:
|
|
53
77
|
break
|
|
54
78
|
|
|
55
|
-
improvement = (params[-1][0] /
|
|
79
|
+
improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
|
|
56
80
|
|
|
57
81
|
if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
|
|
58
82
|
patience -= 1
|
|
59
|
-
step = max(step, abs(
|
|
83
|
+
step = max(step, abs(error - threshold) / max(patience, 1))
|
|
60
84
|
elif not critical:
|
|
61
85
|
patience = self.patience
|
|
62
|
-
if
|
|
63
|
-
params.append((
|
|
86
|
+
if error != params[-1][0] or n != params[-1][1]:
|
|
87
|
+
params.append((error, n, threshold, grid))
|
|
64
88
|
threshold += step
|
|
65
89
|
return params
|
|
66
90
|
|
|
67
|
-
def __search_depth(self, strategy, critical, max_partitions):
|
|
68
|
-
params = []
|
|
69
|
-
best = None
|
|
70
|
-
|
|
71
|
-
for iterations in range(self.max_depth):
|
|
72
|
-
grid = Grid(iterations + 1, strategy)
|
|
73
|
-
p = self.__search_threshold(grid, critical, max_partitions)
|
|
74
|
-
b = GridOptimizer._best(p)[1]
|
|
75
|
-
print()
|
|
76
|
-
improvement = self._depth_improvement(
|
|
77
|
-
[best[0], best[1]], [b[0], b[1]]
|
|
78
|
-
) if best is not None else np.inf
|
|
79
|
-
|
|
80
|
-
best = b
|
|
81
|
-
params += p
|
|
82
|
-
|
|
83
|
-
if len(params) > 1 and improvement < 1.2:
|
|
84
|
-
break
|
|
85
|
-
return params
|
|
86
|
-
|
|
87
91
|
def __contains(self, strategies, strategy):
|
|
88
92
|
for s in strategies:
|
|
89
93
|
if strategy.equals(s, self.dataframe.columns[:-1]):
|
|
@@ -116,16 +120,16 @@ class PEDRO(GridOptimizer):
|
|
|
116
120
|
|
|
117
121
|
params = []
|
|
118
122
|
for strategy in strategies:
|
|
119
|
-
params += self.
|
|
120
|
-
|
|
121
|
-
|
|
123
|
+
params += self._search_depth(strategy,
|
|
124
|
+
strategy.partition_number(self.dataframe.columns[:-1]) > avg,
|
|
125
|
+
base_partitions * 3)
|
|
122
126
|
self.params = params
|
|
123
127
|
|
|
124
128
|
def _print_params(self, name, params):
|
|
125
129
|
print("**********************")
|
|
126
130
|
print(f"Best {name}")
|
|
127
131
|
print("**********************")
|
|
128
|
-
print(f"
|
|
132
|
+
print(f"Error = {params[0]:.2f}, {params[1]} rules")
|
|
129
133
|
print(f"Threshold = {params[2]:.2f}")
|
|
130
134
|
print(f"Iterations = {params[3].iterations}")
|
|
131
135
|
print(f"Strategy = {params[3].strategy}")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
psyke/__init__.py,sha256=
|
|
2
|
-
psyke/hypercubepredictor.py,sha256=
|
|
3
|
-
psyke/clustering/__init__.py,sha256=
|
|
1
|
+
psyke/__init__.py,sha256=oi97R35NM2IvZ5kjm89sSXuMGFmd9RZuxlgniVywVuo,17575
|
|
2
|
+
psyke/hypercubepredictor.py,sha256=AEhpPzCxqIRUOtAUw-jZ9XueNtCf1zsFSPBvRzLSG6c,4229
|
|
3
|
+
psyke/clustering/__init__.py,sha256=36MokTVwwWR_-o0mesvXHaYEYVTK2pn2m0ZY4G3Y3qU,581
|
|
4
4
|
psyke/clustering/utils.py,sha256=S0YwCKyHVYp9qUAQVzCMrTwcQFPJ5TD14Jwn10DE-Z4,1616
|
|
5
|
-
psyke/clustering/cream/__init__.py,sha256=
|
|
6
|
-
psyke/clustering/exact/__init__.py,sha256=
|
|
5
|
+
psyke/clustering/cream/__init__.py,sha256=W6k7vdjuUdA_azYA4vb5JtpWrofhDJ0DbM2jsnRKzfw,2994
|
|
6
|
+
psyke/clustering/exact/__init__.py,sha256=GpMGOcN2bGn3wfaUKOdis3vnLEtAx9j886qsk-O4N7k,5243
|
|
7
7
|
psyke/extraction/__init__.py,sha256=_-j8zrRqulumYLmlxJ6qUxKmzT4epZu39kpZZIfLC4s,1622
|
|
8
8
|
psyke/extraction/cart/__init__.py,sha256=IilEP4DxSAK9_x5ehPTvopuwlQqBMpGMiNRo-f90rqQ,4179
|
|
9
9
|
psyke/extraction/cart/predictor.py,sha256=2-2mv5fI0lTwwfTaEonxKh0ZUdhxuIEE6OP_rJxgmqc,3019
|
|
@@ -12,7 +12,7 @@ psyke/extraction/hypercubic/hypercube.py,sha256=o98MA6yJNSw4DaV9PkLTtowwCMA2V64u
|
|
|
12
12
|
psyke/extraction/hypercubic/strategy.py,sha256=X-roIsfcpJyMdo2px5JtbhP7-XE-zUNkaEK7XGXoWA8,1636
|
|
13
13
|
psyke/extraction/hypercubic/utils.py,sha256=D2FN5CCm_T3h23DmLFoTnIcFo7LvIq__ktl4hjUqkcA,1525
|
|
14
14
|
psyke/extraction/hypercubic/cosmik/__init__.py,sha256=8eVz_mZizIVU-AkE-FuGG3YBtQsrN3WFXjY-tZzY7Wc,1778
|
|
15
|
-
psyke/extraction/hypercubic/creepy/__init__.py,sha256=
|
|
15
|
+
psyke/extraction/hypercubic/creepy/__init__.py,sha256=pG8O1IH-x14OWRxPUbU8w4N59XYGfpvpfmWShHwKFiY,2410
|
|
16
16
|
psyke/extraction/hypercubic/divine/__init__.py,sha256=-MO-uWeDkGZDTYu87puEuUi85Mmefo-HYRcA8Jn4K0Q,3496
|
|
17
17
|
psyke/extraction/hypercubic/gridex/__init__.py,sha256=al2tBUc2YHsiMtu2T4mTNB_-8wY4rqYjV1AYqRdiNoY,5636
|
|
18
18
|
psyke/extraction/hypercubic/gridrex/__init__.py,sha256=RtPJ5Nokcbk2H9pJAMvua3VzYOnT0HPakbPD4uAfEFk,562
|
|
@@ -22,18 +22,18 @@ psyke/extraction/real/utils.py,sha256=eHGU-Y0inn_8jrk9lMcuRUKXpsTkI-s_myXSWz4bAL
|
|
|
22
22
|
psyke/extraction/trepan/__init__.py,sha256=1aiV7nZa4qGJhF8vASCeakzyV_vr-ojeO7ONH7oAj0Y,6640
|
|
23
23
|
psyke/extraction/trepan/utils.py,sha256=iSUJ1ooNQT_VO1KfBZuIUeUsyUbGdQf_pSEE87vMeQg,2320
|
|
24
24
|
psyke/schema/__init__.py,sha256=gOUWx3gYSkRehlJ5opK0Q16-Tv5fwSTl19k7kzIHALU,15760
|
|
25
|
-
psyke/tuning/__init__.py,sha256=
|
|
26
|
-
psyke/tuning/crash/__init__.py,sha256=
|
|
27
|
-
psyke/tuning/orchid/__init__.py,sha256=
|
|
28
|
-
psyke/tuning/pedro/__init__.py,sha256=
|
|
25
|
+
psyke/tuning/__init__.py,sha256=I-07lLZb02DoIm9AGXPPPOkB55ANu8RU4TMy2j30Pxg,3574
|
|
26
|
+
psyke/tuning/crash/__init__.py,sha256=1c806aCGnRI9mwhDxXamgieX-d4U9i5BV1RqLlF3cho,2535
|
|
27
|
+
psyke/tuning/orchid/__init__.py,sha256=s64iABbteik27CrRPHSVHNZX25JKlDu7YYjhseOizxw,3618
|
|
28
|
+
psyke/tuning/pedro/__init__.py,sha256=cyPPLHmauJw0BEj7Ph8Fvg7hpoRyrknXtUrUn3ubH-o,6180
|
|
29
29
|
psyke/utils/__init__.py,sha256=F-fgBT9CkthIwW8dDCuF5OoQDVMBNvIsZyvNqkgZNUA,1767
|
|
30
30
|
psyke/utils/dataframe.py,sha256=cPbCl_paACCtO0twCiHKUcEKIYiT89WDwQ-f5I9oKrg,6841
|
|
31
31
|
psyke/utils/logic.py,sha256=7bbW6qcKof5PlqoQ0n5Kt3Obcot-KqGAvpE8rMXvEPE,12419
|
|
32
32
|
psyke/utils/metrics.py,sha256=Oo5BOonOSfo0qYsXWT5dmypZ7jiStByFC2MKEU0uMHg,2250
|
|
33
33
|
psyke/utils/plot.py,sha256=HVk0psjispUTUQ0do6jnlEUrdZ75q7RQkz7jsj7JUWM,7541
|
|
34
34
|
psyke/utils/sorted.py,sha256=C3CPW2JisND30BRk5c1sAAHs3Lb_wsRB2qZrYFuRnfM,678
|
|
35
|
-
psyke-0.5.
|
|
36
|
-
psyke-0.5.
|
|
37
|
-
psyke-0.5.
|
|
38
|
-
psyke-0.5.
|
|
39
|
-
psyke-0.5.
|
|
35
|
+
psyke-0.5.4.dev1.dist-info/LICENSE,sha256=KP9K6Hgezf_xdMFW7ORyKz9uA8Y8k52YJn292wcP-_E,11354
|
|
36
|
+
psyke-0.5.4.dev1.dist-info/METADATA,sha256=yPQIViGqjVMPeYvei9K2jvi61Pmu7RFmCI0WQ4TYRNQ,8107
|
|
37
|
+
psyke-0.5.4.dev1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
38
|
+
psyke-0.5.4.dev1.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
|
|
39
|
+
psyke-0.5.4.dev1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|