PyPI - psyke - Versions diffs - 0.5.2.dev4__tar.gz → 0.5.4.dev1__tar.gz - Mend

psyke 0.5.2.dev4tar.gz → 0.5.4.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show

{psyke-0.5.2.dev4/psyke.egg-info → psyke-0.5.4.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.5.2.dev4
+Version: 0.5.4.dev1
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

psyke-0.5.4.dev1/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.5.4.dev1

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/__init__.py RENAMED Viewed

@@ -44,9 +44,9 @@ class EvaluableModel(object):
         V = 3,
         FMI = 4
-    def __init__(self, normalization=None, discretization=None):
-        self.normalization = normalization
+    def __init__(self, discretization=None, normalization=None):
         self.discretization = discretization
+        self.normalization = normalization
     def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
         """
@@ -312,13 +312,14 @@ class Extractor(EvaluableModel, ABC):
     @staticmethod
     def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
                gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
-               discretization=None, normalization: dict[str, tuple[float, float]] = None) -> Extractor:
+               discretization=None, normalization: dict[str, tuple[float, float]] = None,
+               seed: int = get_default_random_seed()) -> Extractor:
         """
         Creates a new CReEPy extractor.
         """
         from psyke.extraction.hypercubic.creepy import CReEPy
         return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
-                      discretization, normalization, clustering)
+                      discretization, normalization, clustering, seed)
     @staticmethod
     def real(predictor, discretization=None) -> Extractor:
@@ -341,8 +342,8 @@ class Extractor(EvaluableModel, ABC):
 class Clustering(EvaluableModel, ABC):
-    def __init__(self, normalization=None):
-        super().__init__(normalization)
+    def __init__(self, discretization=None, normalization=None):
+        super().__init__(discretization, normalization)
     def fit(self, dataframe: pd.DataFrame):
         raise NotImplementedError('fit')
@@ -351,18 +352,19 @@ class Clustering(EvaluableModel, ABC):
         raise NotImplementedError('explain')
     @staticmethod
-    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-              gauss_components: int = 2) -> Clustering:
+    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new ExACT instance.
         """
         from psyke.clustering.exact import ExACT
-        return ExACT(depth, error_threshold, output, gauss_components)
+        return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     @staticmethod
-    def cream(depth: int, error_threshold: float, output, gauss_components: int = 2) -> Clustering:
+    def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new CREAM instance.
         """
         from psyke.clustering.cream import CREAM
-        return CREAM(depth, error_threshold, output, gauss_components)
+        return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/__init__.py RENAMED Viewed

@@ -8,8 +8,8 @@ from psyke.hypercubepredictor import HyperCubePredictor
 class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
-    def __init__(self, output: Target = Target.CONSTANT, normalization=None):
-        HyperCubePredictor.__init__(self, output=output, normalization=normalization)
+    def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
+        HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
     def get_hypercubes(self) -> Iterable[HyperCube]:
         raise NotImplementedError('get_hypercubes')

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/cream/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import Iterable
 import numpy as np
 import pandas as pd
-import psyke.utils
+from psyke.utils import Target, get_default_random_seed
 from psyke.clustering.exact import ExACT
 from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
 from psyke.clustering.utils import select_gaussian_mixture
@@ -16,9 +16,9 @@ class CREAM(ExACT):
     Explanator implementing CREAM algorithm.
     """
-    def __init__(self, depth: int, error_threshold: float,
-                 output: psyke.utils.Target = psyke.utils.Target.CONSTANT, gauss_components: int = 5):
-        super().__init__(depth, error_threshold, output, gauss_components)
+    def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/clustering/exact/__init__.py RENAMED Viewed

@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
 from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
 from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
 class ExACT(HyperCubeClustering, ABC):
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
     """
     def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-                 gauss_components: int = 2, normalization=None):
-        super().__init__(output, normalization)
+                 gauss_components: int = 2, discretization=None, normalization=None,
+                 seed: int = get_default_random_seed()):
+        super().__init__(output, discretization, normalization)
         self.depth = depth
         self.error_threshold = error_threshold
         self.gauss_components = gauss_components
         self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
         self._predictor.n_neighbors = 1
+        self.seed = seed
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []
@@ -56,6 +58,7 @@ class ExACT(HyperCubeClustering, ABC):
         )
     def fit(self, dataframe: pd.DataFrame):
+        np.random.seed(self.seed)
         self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
         self._hypercubes = \
             self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/extraction/hypercubic/creepy/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ from tuprolog.theory import Theory
 from psyke import Clustering
 from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import HyperCubeExtractor
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
 from psyke.utils.logic import last_in_body
@@ -20,10 +20,12 @@ class CReEPy(HyperCubeExtractor):
     def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
                  gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
-                 discretization=None, normalization=None, clustering=Clustering.exact):
+                 discretization=None, normalization=None, clustering=Clustering.exact,
+                 seed: int = get_default_random_seed()):
         super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
                          discretization, normalization)
-        self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
+        self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
+                                     normalization, seed)
         self.ranks = ranks
         self.ignore_threshold = ignore_threshold

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/hypercubepredictor.py RENAMED Viewed

@@ -11,17 +11,18 @@ from psyke.extraction.hypercubic import RegressionCube, GenericCube, Point
 class HyperCubePredictor(EvaluableModel):
-    def __init__(self, output=Target.CONSTANT, normalization=None):
-        super().__init__(normalization)
+    def __init__(self, output=Target.CONSTANT, discretization=None, normalization=None):
+        super().__init__(discretization, normalization)
         self._hypercubes = []
         self._output = output
+        self._surrounding = None
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
     def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
                        mapping: dict[str: int] = None) -> Iterable:
-        predictions = self._predict(dataframe)
+        predictions = np.array(self._predict(dataframe))
         idx = [prediction is None for prediction in predictions]
         if sum(idx) > 0:
             if criterion == 'default':
@@ -46,7 +47,7 @@ class HyperCubePredictor(EvaluableModel):
     def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
         distances = [(
-            cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
+            cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
         ) for cube in self._hypercubes]
         return min(distances)[-1]

psyke-0.5.4.dev1/psyke/tuning/__init__.py ADDED Viewed

@@ -0,0 +1,87 @@
+from abc import ABC
+from enum import Enum
+import numpy as np
+import pandas as pd
+from psyke.extraction.hypercubic import Grid
+from psyke.utils import Target
+class Objective(Enum):
+    MODEL = 1,
+    DATA = 2
+class Optimizer:
+    def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
+                 normalization=None, discretization=None):
+        self.dataframe = dataframe
+        self.output = output
+        self.max_error_increase = max_error_increase
+        self.min_rule_decrease = min_rule_decrease
+        self.readability_tradeoff = readability_tradeoff
+        self.patience = patience
+        self.params = None
+        self.normalization = normalization
+        self.discretization = discretization
+    def search(self):
+        raise NotImplementedError
+    def _best(self, params):
+        param_dict = {self._score(t): t for t in params}
+        min_param = min(param_dict)
+        return min_param, param_dict[min_param]
+    def _score(self, param):
+        return param[0] * np.ceil(param[1] * self.readability_tradeoff)
+    def _best_param(self, param):
+        param_dict = {t[param]: t for t in self.params}
+        min_param = min(param_dict)
+        return min_param, param_dict[min_param]
+    def get_best(self):
+        names = ["Combined", "Predictive loss", "N rules"]
+        params = [self._best(self.params), self._best_param(0), self._best_param(1)]
+        for n, p in zip(names, params):
+            self._print_params(n, p[1])
+            print()
+        return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
+    def _print_params(self, n, param):
+        raise NotImplementedError
+class SKEOptimizer(Optimizer, ABC):
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
+                 objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
+                 normalization=None, discretization=None):
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, normalization, discretization)
+        self.predictor = predictor
+        self.objective = objective
+class IterativeOptimizer(Optimizer, ABC):
+    def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
+                 patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, normalization, discretization)
+        self.max_depth = max_depth
+    def _iteration_improvement(self, best, other):
+        if other[0] == best[0]:
+            return (best[1] - other[1]) * 2
+        return 1 / (
+                (1 - other[0] / best[0]) ** self.readability_tradeoff *
+                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
+        )
+    def _check_iteration_improvement(self, best, current):
+        improvement = \
+            self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
+        return current, improvement < 1.2

psyke-0.5.4.dev1/psyke/tuning/crash/__init__.py ADDED Viewed

@@ -0,0 +1,54 @@
+import pandas as pd
+from psyke.tuning import Objective, SKEOptimizer
+from psyke.tuning.orchid import OrCHiD
+from psyke.utils import Target
+class CRASH(SKEOptimizer):
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
+                 max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
+                 objective: Objective = Objective.MODEL, normalization=None, discretization=None):
+        super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, objective, output, normalization, discretization)
+        self.max_depth = max_depth
+        self.max_gauss_components = max_gauss_components
+    def search(self):
+        self.params = []
+        for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
+            self.params += self.__search_algorithm(algorithm)
+    def __search_algorithm(self, algorithm):
+        params = []
+        best = None
+        for gauss_components in range(2, self.max_gauss_components + 1):
+            data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
+                else self.dataframe
+            current_params = self.__search_components(data, algorithm, gauss_components)
+            current_best = self._best(current_params)[1]
+            if best is not None and self._score(best) <= self._score(current_best):
+                break
+            best = current_best
+            params += current_params
+        return params
+    def __search_components(self, data, algorithm, gauss_components):
+        orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
+                        self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
+                        self.normalization, self.discretization)
+        orchid.search()
+        return [(*p, gauss_components, algorithm) for p in orchid.params]
+    def _print_params(self, name, params):
+        print("*****************************")
+        print(f"Best {name}")
+        print("*****************************")
+        print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Algorithm = {params[5]}")
+        print(f"Threshold = {params[3]:.2f}")
+        print(f"Depth = {params[2]}")
+        print(f"Gaussian components = {params[4]}")

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/tuning/orchid/__init__.py RENAMED Viewed

@@ -4,53 +4,50 @@ import numpy as np
 import pandas as pd
 from psyke import Clustering, EvaluableModel
-from psyke.tuning import Optimizer
+from psyke.tuning import Optimizer, IterativeOptimizer
 from psyke.utils import Target
-class OrCHiD(Optimizer):
+class OrCHiD(IterativeOptimizer):
     class Algorithm(Enum):
         ExACT = 1,
         CREAM = 2
     def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
-                 patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         patience, normalization, discretization)
-        self.max_depth = max_depth
+                 max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
+                 patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
+        super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
+                         output, normalization, discretization)
+        self.algorithm = algorithm
+        self.gauss_components = gauss_components
     def search(self):
         self.params = self.__search_depth()
     def __search_depth(self):
-        params = []
-        best = None
+        params, best = [], None
         for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
+            current_params = self.__search_threshold(depth)
+            current_best = self._best(current_params)[1]
             print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
-            if len(params) > 1 and improvement < 1.2:
+            if len(params) > 1 and to_break:
                 break
         return params
     def __search_threshold(self, depth):
         step = 1.0
-        threshold = 1.0  # self.max_mae_increase * 0.9
+        threshold = 1.0
         params = []
         patience = self.patience
         while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
+            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
+                  f"Gaussian components = {self.gauss_components}. ", end="")
             clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
-                depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
+                depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
             )
             clustering.fit(self.dataframe)
             task, metric = \
@@ -72,7 +69,7 @@ class OrCHiD(Optimizer):
                 params.append((p, n, depth, threshold))
                 break
-            if p > params[0][0] * self.max_mae_increase:
+            if p > params[0][0] * self.max_error_increase:
                 break
             improvement = (params[-1][0] / p) + (1 - n / params[-1][1])

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1}/psyke/tuning/pedro/__init__.py RENAMED Viewed

@@ -1,30 +1,52 @@
 import numpy as np
 import pandas as pd
 from enum import Enum
-from psyke import Extractor
+from sklearn.metrics import accuracy_score
+from psyke import Extractor, Target
 from psyke.extraction.hypercubic import Grid, FeatureRanker
 from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
-from psyke.tuning import Objective, GridOptimizer
+from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
-class PEDRO(GridOptimizer):
+class PEDRO(SKEOptimizer, IterativeOptimizer):
     class Algorithm(Enum):
         GRIDEX = 1,
         GRIDREX = 2
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
                  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
                  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
-                 normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
+                 output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
+                              readability_tradeoff, patience, objective, output, normalization, discretization)
+        IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                                    max_depth, patience, output, normalization, discretization)
+        self.algorithm = algorithm
         self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
-        self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
-                             self.dataframe.iloc[:, -1].values).mean()
+        predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
+        expected = self.dataframe.iloc[:, -1].values
+        self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
+            abs(predictions - expected).mean()
+    def _search_depth(self, strategy, critical, max_partitions):
+        params, best = [], None
+        for iterations in range(self.max_depth):
+            current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
+            current_best = self._best(current_params)[1]
+            print()
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
+            if len(params) > 1 and to_break:
+                break
+        return params
     def __search_threshold(self, grid, critical, max_partitions):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.5
+        step = self.error / 2.0
+        threshold = self.error * 0.5
         params = []
         patience = self.patience
         while patience > 0:
@@ -33,12 +55,14 @@ class PEDRO(GridOptimizer):
                 if self.algorithm == PEDRO.Algorithm.GRIDREX \
                 else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
             _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print("MAE = {:.2f}, {} rules".format(mae, n))
+            error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
+                else extractor.mae
+            error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
+                        error_function(self.dataframe)), extractor.n_rules
+            print("MAE = {:.2f}, {} rules".format(error, n))
             if len(params) == 0:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 threshold += step
                 continue
@@ -46,44 +70,24 @@ class PEDRO(GridOptimizer):
                 break
             if n == 1:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 break
-            if mae > params[0][0] * self.max_mae_increase:
+            if error > params[0][0] * self.max_error_increase:
                 break
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
+            improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
             if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
                 patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
+                step = max(step, abs(error - threshold) / max(patience, 1))
             elif not critical:
                 patience = self.patience
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, threshold, grid))
+            if error != params[-1][0] or n != params[-1][1]:
+                params.append((error, n, threshold, grid))
             threshold += step
         return params
-    def __search_depth(self, strategy, critical, max_partitions):
-        params = []
-        best = None
-        for iterations in range(self.max_depth):
-            grid = Grid(iterations + 1, strategy)
-            p = self.__search_threshold(grid, critical, max_partitions)
-            b = GridOptimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
     def __contains(self, strategies, strategy):
         for s in strategies:
             if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -116,16 +120,16 @@ class PEDRO(GridOptimizer):
         params = []
         for strategy in strategies:
-            params += self.__search_depth(strategy,
-                                          strategy.partition_number(self.dataframe.columns[:-1]) > avg,
-                                          base_partitions * 3)
+            params += self._search_depth(strategy,
+                                         strategy.partition_number(self.dataframe.columns[:-1]) > avg,
+                                         base_partitions * 3)
         self.params = params
     def _print_params(self, name, params):
         print("**********************")
         print(f"Best {name}")
         print("**********************")
-        print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Error = {params[0]:.2f}, {params[1]} rules")
         print(f"Threshold = {params[2]:.2f}")
         print(f"Iterations = {params[3].iterations}")
         print(f"Strategy = {params[3].strategy}")

{psyke-0.5.2.dev4 → psyke-0.5.4.dev1/psyke.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.5.2.dev4
+Version: 0.5.4.dev1
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

psyke-0.5.2.dev4/VERSION DELETED Viewed

	@@ -1 +0,0 @@
1	- 0.5.2.dev4

psyke-0.5.2.dev4/psyke/tuning/__init__.py DELETED Viewed

@@ -1,77 +0,0 @@
-from abc import ABC
-from enum import Enum
-import numpy as np
-import pandas as pd
-from psyke.utils import Target
-class Objective(Enum):
-    MODEL = 1,
-    DATA = 2
-class Optimizer:
-    def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
-                 readability_tradeoff: float = 0.1, patience: int = 5,
-                 normalization=None, discretization=None):
-        self.dataframe = dataframe
-        self.algorithm = algorithm
-        self.output = output
-        self.max_mae_increase = max_mae_increase
-        self.min_rule_decrease = min_rule_decrease
-        self.readability_tradeoff = readability_tradeoff
-        self.patience = patience
-        self.params = None
-        self.normalization = normalization
-        self.discretization = discretization
-    def search(self):
-        raise NotImplementedError
-    def _depth_improvement(self, best, other):
-        if other[0] == best[0]:
-            return (best[1] - other[1]) * 2
-        return 1 / (
-                (1 - other[0] / best[0]) ** self.readability_tradeoff *
-                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
-        )
-    @staticmethod
-    def _best(params):
-        param_dict = {Optimizer.__score(t): t for t in params}
-        min_param = min(param_dict)
-        return min_param, param_dict[min_param]
-    @staticmethod
-    def __score(param):
-        return param[0] * np.ceil(param[1] / 5)
-    def _best_param(self, param):
-        param_dict = {t[param]: t for t in self.params}
-        min_param = min(param_dict)
-        return min_param, param_dict[min_param]
-    def get_best(self):
-        names = [self.algorithm, "Predictive loss", "N rules"]
-        params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
-        for n, p in zip(names, params):
-            self._print_params(n, p[1])
-            print()
-        return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
-    def _print_params(self, n, param):
-        raise NotImplementedError
-class GridOptimizer(Optimizer, ABC):
-    def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
-                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
-                 normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         patience, normalization, discretization)
-        self.predictor = predictor
-        self.max_depth = max_depth
-        self.objective = objective

psyke-0.5.2.dev4/psyke/tuning/crash/__init__.py DELETED Viewed

@@ -1,91 +0,0 @@
-from enum import Enum
-import numpy as np
-import pandas as pd
-from psyke import Extractor, Clustering
-from psyke.tuning import Objective, Optimizer
-from psyke.utils import Target
-class CRASH(Optimizer):
-    class Algorithm(Enum):
-        ExACT = 1,
-        CREAM = 2
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
-                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
-                 objective: Objective = Objective.MODEL, normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
-        self.output = output
-    def search(self):
-        self.params = self.__search_depth()
-    def __search_depth(self):
-        params = []
-        best = None
-        for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
-    def __search_threshold(self, depth):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.9
-        params = []
-        patience = self.patience
-        while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
-            extractor = Extractor.creepy(
-                self.predictor, depth=depth, error_threshold=threshold, output=self.output,
-                gauss_components=10, normalization=self.normalization,
-                clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
-            )
-            _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print(f"MAE = {mae:.2f}, {n} rules")
-            if len(params) == 0:
-                params.append((mae, n, depth, threshold))
-                threshold += step
-                continue
-            if (n == 1) or (mae == 0.0):
-                params.append((mae, n, depth, threshold))
-                break
-            if mae > params[0][0] * self.max_mae_increase:
-                break
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
-            if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
-                patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, depth, threshold))
-            threshold += step
-        return params
-    def _print_params(self, name, params):
-        print("**********************")
-        print(f"Best {name}")
-        print("**********************")
-        print(f"MAE = {params[0]:.2f}, {params[1]} rules")
-        print(f"Threshold = {params[3]:.2f}")
-        print(f"Depth = {params[2]}")