PyPI - psyke - Versions diffs - 0.5.2.dev3__py3-none-any.whl → 0.5.4.dev1__py3-none-any.whl - Mend

psyke 0.5.2.dev3py3-none-any.whl → 0.5.4.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psyke might be problematic. Click here for more details.

Files changed (15) hide show

psyke/__init__.py +13 -11
psyke/clustering/__init__.py +2 -2
psyke/clustering/cream/__init__.py +4 -4
psyke/clustering/exact/__init__.py +6 -3
psyke/extraction/hypercubic/creepy/__init__.py +5 -3
psyke/hypercubepredictor.py +5 -4
psyke/tuning/__init__.py +38 -28
psyke/tuning/crash/__init__.py +33 -70
psyke/tuning/orchid/__init__.py +19 -22
psyke/tuning/pedro/__init__.py +49 -45
{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/METADATA +1 -1
{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/RECORD +15 -15
{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/LICENSE +0 -0
{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/WHEEL +0 -0
{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/top_level.txt +0 -0

psyke/__init__.py CHANGED Viewed

@@ -44,9 +44,9 @@ class EvaluableModel(object):
         V = 3,
         FMI = 4
-    def __init__(self, normalization=None, discretization=None):
-        self.normalization = normalization
+    def __init__(self, discretization=None, normalization=None):
         self.discretization = discretization
+        self.normalization = normalization
     def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
         """
@@ -312,13 +312,14 @@ class Extractor(EvaluableModel, ABC):
     @staticmethod
     def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
                gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
-               discretization=None, normalization: dict[str, tuple[float, float]] = None) -> Extractor:
+               discretization=None, normalization: dict[str, tuple[float, float]] = None,
+               seed: int = get_default_random_seed()) -> Extractor:
         """
         Creates a new CReEPy extractor.
         """
         from psyke.extraction.hypercubic.creepy import CReEPy
         return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
-                      discretization, normalization, clustering)
+                      discretization, normalization, clustering, seed)
     @staticmethod
     def real(predictor, discretization=None) -> Extractor:
@@ -341,8 +342,8 @@ class Extractor(EvaluableModel, ABC):
 class Clustering(EvaluableModel, ABC):
-    def __init__(self, normalization=None):
-        super().__init__(normalization)
+    def __init__(self, discretization=None, normalization=None):
+        super().__init__(discretization, normalization)
     def fit(self, dataframe: pd.DataFrame):
         raise NotImplementedError('fit')
@@ -351,18 +352,19 @@ class Clustering(EvaluableModel, ABC):
         raise NotImplementedError('explain')
     @staticmethod
-    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-              gauss_components: int = 2) -> Clustering:
+    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new ExACT instance.
         """
         from psyke.clustering.exact import ExACT
-        return ExACT(depth, error_threshold, output, gauss_components)
+        return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     @staticmethod
-    def cream(depth: int, error_threshold: float, output, gauss_components: int = 2) -> Clustering:
+    def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new CREAM instance.
         """
         from psyke.clustering.cream import CREAM
-        return CREAM(depth, error_threshold, output, gauss_components)
+        return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)

psyke/clustering/__init__.py CHANGED Viewed

@@ -8,8 +8,8 @@ from psyke.hypercubepredictor import HyperCubePredictor
 class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
-    def __init__(self, output: Target = Target.CONSTANT, normalization=None):
-        HyperCubePredictor.__init__(self, output=output, normalization=normalization)
+    def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
+        HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
     def get_hypercubes(self) -> Iterable[HyperCube]:
         raise NotImplementedError('get_hypercubes')

psyke/clustering/cream/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Iterable
 import numpy as np
 import pandas as pd
-import psyke.utils
+from psyke.utils import Target, get_default_random_seed
 from psyke.clustering.exact import ExACT
 from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
 from psyke.clustering.utils import select_gaussian_mixture
@@ -16,9 +16,9 @@ class CREAM(ExACT):
     Explanator implementing CREAM algorithm.
     """
-    def __init__(self, depth: int, error_threshold: float,
-                 output: psyke.utils.Target = psyke.utils.Target.CONSTANT, gauss_components: int = 5):
-        super().__init__(depth, error_threshold, output, gauss_components)
+    def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []

psyke/clustering/exact/__init__.py CHANGED Viewed

@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
 from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
 from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
 class ExACT(HyperCubeClustering, ABC):
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
     """
     def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-                 gauss_components: int = 2, normalization=None):
-        super().__init__(output, normalization)
+                 gauss_components: int = 2, discretization=None, normalization=None,
+                 seed: int = get_default_random_seed()):
+        super().__init__(output, discretization, normalization)
         self.depth = depth
         self.error_threshold = error_threshold
         self.gauss_components = gauss_components
         self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
         self._predictor.n_neighbors = 1
+        self.seed = seed
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []
@@ -56,6 +58,7 @@ class ExACT(HyperCubeClustering, ABC):
         )
     def fit(self, dataframe: pd.DataFrame):
+        np.random.seed(self.seed)
         self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
         self._hypercubes = \
             self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))

psyke/extraction/hypercubic/creepy/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from tuprolog.theory import Theory
 from psyke import Clustering
 from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import HyperCubeExtractor
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
 from psyke.utils.logic import last_in_body
@@ -20,10 +20,12 @@ class CReEPy(HyperCubeExtractor):
     def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
                  gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
-                 discretization=None, normalization=None, clustering=Clustering.exact):
+                 discretization=None, normalization=None, clustering=Clustering.exact,
+                 seed: int = get_default_random_seed()):
         super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
                          discretization, normalization)
-        self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
+        self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
+                                     normalization, seed)
         self.ranks = ranks
         self.ignore_threshold = ignore_threshold

psyke/hypercubepredictor.py CHANGED Viewed

@@ -11,17 +11,18 @@ from psyke.extraction.hypercubic import RegressionCube, GenericCube, Point
 class HyperCubePredictor(EvaluableModel):
-    def __init__(self, output=Target.CONSTANT, normalization=None):
-        super().__init__(normalization)
+    def __init__(self, output=Target.CONSTANT, discretization=None, normalization=None):
+        super().__init__(discretization, normalization)
         self._hypercubes = []
         self._output = output
+        self._surrounding = None
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
     def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
                        mapping: dict[str: int] = None) -> Iterable:
-        predictions = self._predict(dataframe)
+        predictions = np.array(self._predict(dataframe))
         idx = [prediction is None for prediction in predictions]
         if sum(idx) > 0:
             if criterion == 'default':
@@ -46,7 +47,7 @@ class HyperCubePredictor(EvaluableModel):
     def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
         distances = [(
-            cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
+            cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
         ) for cube in self._hypercubes]
         return min(distances)[-1]

psyke/tuning/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from enum import Enum
 import numpy as np
 import pandas as pd
+from psyke.extraction.hypercubic import Grid
 from psyke.utils import Target
@@ -12,14 +13,12 @@ class Objective(Enum):
 class Optimizer:
-    def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
-                 readability_tradeoff: float = 0.1, patience: int = 5,
+    def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
                  normalization=None, discretization=None):
         self.dataframe = dataframe
-        self.algorithm = algorithm
         self.output = output
-        self.max_mae_increase = max_mae_increase
+        self.max_error_increase = max_error_increase
         self.min_rule_decrease = min_rule_decrease
         self.readability_tradeoff = readability_tradeoff
         self.patience = patience
@@ -30,23 +29,13 @@ class Optimizer:
     def search(self):
         raise NotImplementedError
-    def _depth_improvement(self, best, other):
-        if other[0] == best[0]:
-            return (best[1] - other[1]) * 2
-        return 1 / (
-                (1 - other[0] / best[0]) ** self.readability_tradeoff *
-                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
-        )
-    @staticmethod
-    def _best(params):
-        param_dict = {Optimizer.__score(t): t for t in params}
+    def _best(self, params):
+        param_dict = {self._score(t): t for t in params}
         min_param = min(param_dict)
         return min_param, param_dict[min_param]
-    @staticmethod
-    def __score(param):
-        return param[0] * np.ceil(param[1] / 5)
+    def _score(self, param):
+        return param[0] * np.ceil(param[1] * self.readability_tradeoff)
     def _best_param(self, param):
         param_dict = {t[param]: t for t in self.params}
@@ -54,24 +43,45 @@ class Optimizer:
         return min_param, param_dict[min_param]
     def get_best(self):
-        names = [self.algorithm, "Predictive loss", "N rules"]
-        params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
+        names = ["Combined", "Predictive loss", "N rules"]
+        params = [self._best(self.params), self._best_param(0), self._best_param(1)]
         for n, p in zip(names, params):
             self._print_params(n, p[1])
             print()
-        return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
+        return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
     def _print_params(self, n, param):
         raise NotImplementedError
-class GridOptimizer(Optimizer, ABC):
-    def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
-                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
+class SKEOptimizer(Optimizer, ABC):
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
+                 objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
                  normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
                          patience, normalization, discretization)
         self.predictor = predictor
-        self.max_depth = max_depth
         self.objective = objective
+class IterativeOptimizer(Optimizer, ABC):
+    def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
+                 patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, normalization, discretization)
+        self.max_depth = max_depth
+    def _iteration_improvement(self, best, other):
+        if other[0] == best[0]:
+            return (best[1] - other[1]) * 2
+        return 1 / (
+                (1 - other[0] / best[0]) ** self.readability_tradeoff *
+                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
+        )
+    def _check_iteration_improvement(self, best, current):
+        improvement = \
+            self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
+        return current, improvement < 1.2

psyke/tuning/crash/__init__.py CHANGED Viewed

@@ -1,91 +1,54 @@
-from enum import Enum
-import numpy as np
 import pandas as pd
-from psyke import Extractor, Clustering
-from psyke.tuning import Objective, Optimizer
+from psyke.tuning import Objective, SKEOptimizer
+from psyke.tuning.orchid import OrCHiD
 from psyke.utils import Target
-class CRASH(Optimizer):
-    class Algorithm(Enum):
-        ExACT = 1,
-        CREAM = 2
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
+class CRASH(SKEOptimizer):
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
                  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
-                 objective: Objective = Objective.MODEL, normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
-        self.output = output
+                 max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
+                 objective: Objective = Objective.MODEL, normalization=None, discretization=None):
+        super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, objective, output, normalization, discretization)
+        self.max_depth = max_depth
+        self.max_gauss_components = max_gauss_components
     def search(self):
-        self.params = self.__search_depth()
+        self.params = []
+        for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
+            self.params += self.__search_algorithm(algorithm)
-    def __search_depth(self):
+    def __search_algorithm(self, algorithm):
         params = []
         best = None
-        for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
-    def __search_threshold(self, depth):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.9
-        params = []
-        patience = self.patience
-        while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
-            extractor = Extractor.creepy(
-                self.predictor, depth=depth, error_threshold=threshold, output=self.output,
-                gauss_components=10, normalization=self.normalization,
-                clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
-            )
-            _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print(f"MAE = {mae:.2f}, {n} rules")
-            if len(params) == 0:
-                params.append((mae, n, depth, threshold))
-                threshold += step
-                continue
-            if (n == 1) or (mae == 0.0):
-                params.append((mae, n, depth, threshold))
+        for gauss_components in range(2, self.max_gauss_components + 1):
+            data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
+                else self.dataframe
+            current_params = self.__search_components(data, algorithm, gauss_components)
+            current_best = self._best(current_params)[1]
+            if best is not None and self._score(best) <= self._score(current_best):
                 break
+            best = current_best
+            params += current_params
-            if mae > params[0][0] * self.max_mae_increase:
-                break
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
-            if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
-                patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, depth, threshold))
-            threshold += step
         return params
+    def __search_components(self, data, algorithm, gauss_components):
+        orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
+                        self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
+                        self.normalization, self.discretization)
+        orchid.search()
+        return [(*p, gauss_components, algorithm) for p in orchid.params]
     def _print_params(self, name, params):
-        print("**********************")
+        print("*****************************")
         print(f"Best {name}")
-        print("**********************")
+        print("*****************************")
         print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Algorithm = {params[5]}")
         print(f"Threshold = {params[3]:.2f}")
         print(f"Depth = {params[2]}")
+        print(f"Gaussian components = {params[4]}")

psyke/tuning/orchid/__init__.py CHANGED Viewed

@@ -4,53 +4,50 @@ import numpy as np
 import pandas as pd
 from psyke import Clustering, EvaluableModel
-from psyke.tuning import Optimizer
+from psyke.tuning import Optimizer, IterativeOptimizer
 from psyke.utils import Target
-class OrCHiD(Optimizer):
+class OrCHiD(IterativeOptimizer):
     class Algorithm(Enum):
         ExACT = 1,
         CREAM = 2
     def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
-                 patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         patience, normalization, discretization)
-        self.max_depth = max_depth
+                 max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
+                 patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
+        super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
+                         output, normalization, discretization)
+        self.algorithm = algorithm
+        self.gauss_components = gauss_components
     def search(self):
         self.params = self.__search_depth()
     def __search_depth(self):
-        params = []
-        best = None
+        params, best = [], None
         for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
+            current_params = self.__search_threshold(depth)
+            current_best = self._best(current_params)[1]
             print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
-            if len(params) > 1 and improvement < 1.2:
+            if len(params) > 1 and to_break:
                 break
         return params
     def __search_threshold(self, depth):
         step = 1.0
-        threshold = 1.0  # self.max_mae_increase * 0.9
+        threshold = 1.0
         params = []
         patience = self.patience
         while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
+            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
+                  f"Gaussian components = {self.gauss_components}. ", end="")
             clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
-                depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
+                depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
             )
             clustering.fit(self.dataframe)
             task, metric = \
@@ -72,7 +69,7 @@ class OrCHiD(Optimizer):
                 params.append((p, n, depth, threshold))
                 break
-            if p > params[0][0] * self.max_mae_increase:
+            if p > params[0][0] * self.max_error_increase:
                 break
             improvement = (params[-1][0] / p) + (1 - n / params[-1][1])

psyke/tuning/pedro/__init__.py CHANGED Viewed

@@ -1,30 +1,52 @@
 import numpy as np
 import pandas as pd
 from enum import Enum
-from psyke import Extractor
+from sklearn.metrics import accuracy_score
+from psyke import Extractor, Target
 from psyke.extraction.hypercubic import Grid, FeatureRanker
 from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
-from psyke.tuning import Objective, GridOptimizer
+from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
-class PEDRO(GridOptimizer):
+class PEDRO(SKEOptimizer, IterativeOptimizer):
     class Algorithm(Enum):
         GRIDEX = 1,
         GRIDREX = 2
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
                  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
                  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
-                 normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
+                 output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
+                              readability_tradeoff, patience, objective, output, normalization, discretization)
+        IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                                    max_depth, patience, output, normalization, discretization)
+        self.algorithm = algorithm
         self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
-        self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
-                             self.dataframe.iloc[:, -1].values).mean()
+        predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
+        expected = self.dataframe.iloc[:, -1].values
+        self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
+            abs(predictions - expected).mean()
+    def _search_depth(self, strategy, critical, max_partitions):
+        params, best = [], None
+        for iterations in range(self.max_depth):
+            current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
+            current_best = self._best(current_params)[1]
+            print()
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
+            if len(params) > 1 and to_break:
+                break
+        return params
     def __search_threshold(self, grid, critical, max_partitions):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.5
+        step = self.error / 2.0
+        threshold = self.error * 0.5
         params = []
         patience = self.patience
         while patience > 0:
@@ -33,12 +55,14 @@ class PEDRO(GridOptimizer):
                 if self.algorithm == PEDRO.Algorithm.GRIDREX \
                 else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
             _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print("MAE = {:.2f}, {} rules".format(mae, n))
+            error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
+                else extractor.mae
+            error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
+                        error_function(self.dataframe)), extractor.n_rules
+            print("MAE = {:.2f}, {} rules".format(error, n))
             if len(params) == 0:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 threshold += step
                 continue
@@ -46,44 +70,24 @@ class PEDRO(GridOptimizer):
                 break
             if n == 1:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 break
-            if mae > params[0][0] * self.max_mae_increase:
+            if error > params[0][0] * self.max_error_increase:
                 break
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
+            improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
             if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
                 patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
+                step = max(step, abs(error - threshold) / max(patience, 1))
             elif not critical:
                 patience = self.patience
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, threshold, grid))
+            if error != params[-1][0] or n != params[-1][1]:
+                params.append((error, n, threshold, grid))
             threshold += step
         return params
-    def __search_depth(self, strategy, critical, max_partitions):
-        params = []
-        best = None
-        for iterations in range(self.max_depth):
-            grid = Grid(iterations + 1, strategy)
-            p = self.__search_threshold(grid, critical, max_partitions)
-            b = GridOptimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
     def __contains(self, strategies, strategy):
         for s in strategies:
             if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -116,16 +120,16 @@ class PEDRO(GridOptimizer):
         params = []
         for strategy in strategies:
-            params += self.__search_depth(strategy,
-                                          strategy.partition_number(self.dataframe.columns[:-1]) > avg,
-                                          base_partitions * 3)
+            params += self._search_depth(strategy,
+                                         strategy.partition_number(self.dataframe.columns[:-1]) > avg,
+                                         base_partitions * 3)
         self.params = params
     def _print_params(self, name, params):
         print("**********************")
         print(f"Best {name}")
         print("**********************")
-        print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Error = {params[0]:.2f}, {params[1]} rules")
         print(f"Threshold = {params[2]:.2f}")
         print(f"Iterations = {params[3].iterations}")
         print(f"Strategy = {params[3].strategy}")

{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.5.2.dev3
+Version: 0.5.4.dev1
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-psyke/__init__.py,sha256=u1B61ld8R6c8IrVu8cnw1SO1hiamXxYRg_ffS41HTrU,17194
-psyke/hypercubepredictor.py,sha256=ngy0k5ESck3FDwq8UViGw8wEdwm0uU1IJjSCxm_MSrA,4147
-psyke/clustering/__init__.py,sha256=lST9WKPZ5fT7sgHHk7BJi3OBd1_8douSA01FRl_xH7s,529
+psyke/__init__.py,sha256=oi97R35NM2IvZ5kjm89sSXuMGFmd9RZuxlgniVywVuo,17575
+psyke/hypercubepredictor.py,sha256=AEhpPzCxqIRUOtAUw-jZ9XueNtCf1zsFSPBvRzLSG6c,4229
+psyke/clustering/__init__.py,sha256=36MokTVwwWR_-o0mesvXHaYEYVTK2pn2m0ZY4G3Y3qU,581
 psyke/clustering/utils.py,sha256=S0YwCKyHVYp9qUAQVzCMrTwcQFPJ5TD14Jwn10DE-Z4,1616
-psyke/clustering/cream/__init__.py,sha256=YDlhlDfrBnop3-1GjEJeFYNlPM68YPhM9Kb7TA1psi8,2864
-psyke/clustering/exact/__init__.py,sha256=txIseVHlFMBWcDifMc9mFYdcnIi3W3n3SQ2H1WRfNUc,5066
+psyke/clustering/cream/__init__.py,sha256=W6k7vdjuUdA_azYA4vb5JtpWrofhDJ0DbM2jsnRKzfw,2994
+psyke/clustering/exact/__init__.py,sha256=GpMGOcN2bGn3wfaUKOdis3vnLEtAx9j886qsk-O4N7k,5243
 psyke/extraction/__init__.py,sha256=_-j8zrRqulumYLmlxJ6qUxKmzT4epZu39kpZZIfLC4s,1622
 psyke/extraction/cart/__init__.py,sha256=IilEP4DxSAK9_x5ehPTvopuwlQqBMpGMiNRo-f90rqQ,4179
 psyke/extraction/cart/predictor.py,sha256=2-2mv5fI0lTwwfTaEonxKh0ZUdhxuIEE6OP_rJxgmqc,3019
@@ -12,7 +12,7 @@ psyke/extraction/hypercubic/hypercube.py,sha256=o98MA6yJNSw4DaV9PkLTtowwCMA2V64u
 psyke/extraction/hypercubic/strategy.py,sha256=X-roIsfcpJyMdo2px5JtbhP7-XE-zUNkaEK7XGXoWA8,1636
 psyke/extraction/hypercubic/utils.py,sha256=D2FN5CCm_T3h23DmLFoTnIcFo7LvIq__ktl4hjUqkcA,1525
 psyke/extraction/hypercubic/cosmik/__init__.py,sha256=8eVz_mZizIVU-AkE-FuGG3YBtQsrN3WFXjY-tZzY7Wc,1778
-psyke/extraction/hypercubic/creepy/__init__.py,sha256=SBgnc4iKHwXRulDijJnNW3eIDHtkvpA6bzG6N1E97i8,2255
+psyke/extraction/hypercubic/creepy/__init__.py,sha256=pG8O1IH-x14OWRxPUbU8w4N59XYGfpvpfmWShHwKFiY,2410
 psyke/extraction/hypercubic/divine/__init__.py,sha256=-MO-uWeDkGZDTYu87puEuUi85Mmefo-HYRcA8Jn4K0Q,3496
 psyke/extraction/hypercubic/gridex/__init__.py,sha256=al2tBUc2YHsiMtu2T4mTNB_-8wY4rqYjV1AYqRdiNoY,5636
 psyke/extraction/hypercubic/gridrex/__init__.py,sha256=RtPJ5Nokcbk2H9pJAMvua3VzYOnT0HPakbPD4uAfEFk,562
@@ -22,18 +22,18 @@ psyke/extraction/real/utils.py,sha256=eHGU-Y0inn_8jrk9lMcuRUKXpsTkI-s_myXSWz4bAL
 psyke/extraction/trepan/__init__.py,sha256=1aiV7nZa4qGJhF8vASCeakzyV_vr-ojeO7ONH7oAj0Y,6640
 psyke/extraction/trepan/utils.py,sha256=iSUJ1ooNQT_VO1KfBZuIUeUsyUbGdQf_pSEE87vMeQg,2320
 psyke/schema/__init__.py,sha256=gOUWx3gYSkRehlJ5opK0Q16-Tv5fwSTl19k7kzIHALU,15760
-psyke/tuning/__init__.py,sha256=f3NS883R5RXinqd7EGEeb0kisY5SwSxAcvzbtLPdKG4,2886
-psyke/tuning/crash/__init__.py,sha256=EH129fDnYM3u-6DqqJAhlhETNdiBQ9LNPGGtOm30I_s,3450
-psyke/tuning/orchid/__init__.py,sha256=1CvqdQoRNZt23zQSPnqFIEzDU4xeN1Yk296HdEg1_sE,3513
-psyke/tuning/pedro/__init__.py,sha256=4E6nCjIu0OEO8OK7yyGQKzO5o8Kbc34IDLpTRDiGYrk,5567
+psyke/tuning/__init__.py,sha256=I-07lLZb02DoIm9AGXPPPOkB55ANu8RU4TMy2j30Pxg,3574
+psyke/tuning/crash/__init__.py,sha256=1c806aCGnRI9mwhDxXamgieX-d4U9i5BV1RqLlF3cho,2535
+psyke/tuning/orchid/__init__.py,sha256=s64iABbteik27CrRPHSVHNZX25JKlDu7YYjhseOizxw,3618
+psyke/tuning/pedro/__init__.py,sha256=cyPPLHmauJw0BEj7Ph8Fvg7hpoRyrknXtUrUn3ubH-o,6180
 psyke/utils/__init__.py,sha256=F-fgBT9CkthIwW8dDCuF5OoQDVMBNvIsZyvNqkgZNUA,1767
 psyke/utils/dataframe.py,sha256=cPbCl_paACCtO0twCiHKUcEKIYiT89WDwQ-f5I9oKrg,6841
 psyke/utils/logic.py,sha256=7bbW6qcKof5PlqoQ0n5Kt3Obcot-KqGAvpE8rMXvEPE,12419
 psyke/utils/metrics.py,sha256=Oo5BOonOSfo0qYsXWT5dmypZ7jiStByFC2MKEU0uMHg,2250
 psyke/utils/plot.py,sha256=HVk0psjispUTUQ0do6jnlEUrdZ75q7RQkz7jsj7JUWM,7541
 psyke/utils/sorted.py,sha256=C3CPW2JisND30BRk5c1sAAHs3Lb_wsRB2qZrYFuRnfM,678
-psyke-0.5.2.dev3.dist-info/LICENSE,sha256=KP9K6Hgezf_xdMFW7ORyKz9uA8Y8k52YJn292wcP-_E,11354
-psyke-0.5.2.dev3.dist-info/METADATA,sha256=a_gn7Yi-R2fyo2RVDCK-7l3_u7TVQvoB0gJJGx0GiDI,8107
-psyke-0.5.2.dev3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-psyke-0.5.2.dev3.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
-psyke-0.5.2.dev3.dist-info/RECORD,,
+psyke-0.5.4.dev1.dist-info/LICENSE,sha256=KP9K6Hgezf_xdMFW7ORyKz9uA8Y8k52YJn292wcP-_E,11354
+psyke-0.5.4.dev1.dist-info/METADATA,sha256=yPQIViGqjVMPeYvei9K2jvi61Pmu7RFmCI0WQ4TYRNQ,8107
+psyke-0.5.4.dev1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+psyke-0.5.4.dev1.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
+psyke-0.5.4.dev1.dist-info/RECORD,,

{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/LICENSE RENAMED Viewed

File without changes

{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/WHEEL RENAMED Viewed

File without changes

{psyke-0.5.2.dev3.dist-info → psyke-0.5.4.dev1.dist-info}/top_level.txt RENAMED Viewed

File without changes

psyke 0.5.2.dev3__py3-none-any.whl → 0.5.4.dev1__py3-none-any.whl

Potentially problematic release.

psyke 0.5.2.dev3py3-none-any.whl → 0.5.4.dev1py3-none-any.whl