PyPI - psyke - Versions diffs - 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl - Mend

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

psyke/__init__.py +231 -85
psyke/clustering/__init__.py +9 -4
psyke/clustering/cream/__init__.py +6 -10
psyke/clustering/exact/__init__.py +17 -11
psyke/clustering/utils.py +0 -1
psyke/extraction/__init__.py +25 -0
psyke/extraction/cart/CartPredictor.py +128 -0
psyke/extraction/cart/FairTree.py +205 -0
psyke/extraction/cart/FairTreePredictor.py +56 -0
psyke/extraction/cart/__init__.py +48 -62
psyke/extraction/hypercubic/__init__.py +187 -47
psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
psyke/extraction/hypercubic/creepy/__init__.py +24 -29
psyke/extraction/hypercubic/divine/__init__.py +86 -0
psyke/extraction/hypercubic/ginger/__init__.py +100 -0
psyke/extraction/hypercubic/gridex/__init__.py +45 -84
psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
psyke/extraction/hypercubic/hex/__init__.py +104 -0
psyke/extraction/hypercubic/hypercube.py +275 -72
psyke/extraction/hypercubic/iter/__init__.py +45 -46
psyke/extraction/hypercubic/strategy.py +13 -9
psyke/extraction/real/__init__.py +24 -29
psyke/extraction/real/utils.py +2 -2
psyke/extraction/trepan/__init__.py +24 -19
psyke/genetic/__init__.py +0 -0
psyke/genetic/fgin/__init__.py +74 -0
psyke/genetic/gin/__init__.py +144 -0
psyke/hypercubepredictor.py +102 -0
psyke/schema/__init__.py +230 -36
psyke/tuning/__init__.py +40 -28
psyke/tuning/crash/__init__.py +33 -64
psyke/tuning/orchid/__init__.py +21 -23
psyke/tuning/pedro/__init__.py +70 -56
psyke/utils/logic.py +8 -8
psyke/utils/plot.py +79 -3
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
psyke-1.0.4.dev10.dist-info/RECORD +46 -0
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
psyke/extraction/cart/predictor.py +0 -73
psyke-0.4.9.dev6.dist-info/RECORD +0 -36
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0

psyke/tuning/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from enum import Enum
 import numpy as np
 import pandas as pd
+from psyke.extraction.hypercubic import Grid
 from psyke.utils import Target
@@ -12,14 +13,12 @@ class Objective(Enum):
 class Optimizer:
-    def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9,
-                 readability_tradeoff: float = 0.1, patience: int = 5,
+    def __init__(self, dataframe: pd.DataFrame, output: Target = Target.CONSTANT, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
                  normalization=None, discretization=None):
         self.dataframe = dataframe
-        self.algorithm = algorithm
         self.output = output
-        self.max_mae_increase = max_mae_increase
+        self.max_error_increase = max_error_increase
         self.min_rule_decrease = min_rule_decrease
         self.readability_tradeoff = readability_tradeoff
         self.patience = patience
@@ -30,23 +29,13 @@ class Optimizer:
     def search(self):
         raise NotImplementedError
-    def _depth_improvement(self, best, other):
-        if other[0] == best[0]:
-            return (best[1] - other[1]) * 2
-        return 1 / (
-                (1 - other[0] / best[0]) ** self.readability_tradeoff *
-                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
-        )
-    @staticmethod
-    def _best(params):
-        param_dict = {Optimizer.__score(t): t for t in params}
+    def _best(self, params):
+        param_dict = {self._score(t): t for t in params}
         min_param = min(param_dict)
         return min_param, param_dict[min_param]
-    @staticmethod
-    def __score(param):
-        return param[0] * np.ceil(param[1] / 5)
+    def _score(self, param):
+        return param[0] * np.ceil(param[1] * self.readability_tradeoff)
     def _best_param(self, param):
         param_dict = {t[param]: t for t in self.params}
@@ -54,24 +43,47 @@ class Optimizer:
         return min_param, param_dict[min_param]
     def get_best(self):
-        names = [self.algorithm, "Predictive loss", "N rules"]
-        params = [Optimizer._best(self.params), self._best_param(0), self._best_param(1)]
+        names = ["Combined", "Predictive loss", "N rules"]
+        params = [self._best(self.params), self._best_param(0), self._best_param(1)]
         for n, p in zip(names, params):
             self._print_params(n, p[1])
             print()
-        return Optimizer._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
+        return self._best(self.params)[1], self._best_param(0)[1], self._best_param(1)[1]
     def _print_params(self, n, param):
         raise NotImplementedError
-class GridOptimizer(Optimizer, ABC):
-    def __init__(self, predictor, algorithm, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
-                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
+class SKEOptimizer(Optimizer, ABC):
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, patience: int = 5,
+                 objective: Objective = Objective.MODEL, output: Target = Target.CONSTANT,
                  normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
                          patience, normalization, discretization)
         self.predictor = predictor
-        self.max_depth = max_depth
         self.objective = objective
+class IterativeOptimizer(Optimizer, ABC):
+    def __init__(self, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
+                 min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
+                 patience: int = 5, output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        super().__init__(dataframe, output, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, normalization, discretization)
+        self.max_depth = max_depth
+    def _iteration_improvement(self, best, other):
+        if other[0] == best[0]:
+            return (best[1] - other[1]) * 2
+        return 1 / (
+                (1 - other[0] / best[0]) ** self.readability_tradeoff *
+                np.ceil(other[1] / self.readability_tradeoff) / np.ceil(best[1] / self.readability_tradeoff)
+        )
+    def _check_iteration_improvement(self, best, current):
+        improvement = \
+            self._iteration_improvement([best[0], best[1]], [current[0], current[1]]) if best is not None else np.inf
+        if isinstance(improvement, complex):
+            improvement = 1.0
+        return current, improvement < 1.2

psyke/tuning/crash/__init__.py CHANGED Viewed

@@ -1,91 +1,60 @@
 from enum import Enum
-import numpy as np
 import pandas as pd
-from psyke import Extractor, Clustering
-from psyke.tuning import Objective, Optimizer
+from psyke.tuning import Objective, SKEOptimizer
+from psyke.tuning.orchid import OrCHiD
 from psyke.utils import Target
-class CRASH(Optimizer):
+class CRASH(SKEOptimizer):
     class Algorithm(Enum):
         ExACT = 1,
         CREAM = 2
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
                  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 10,
-                 patience: int = 5, algorithm: Algorithm = Algorithm.CREAM, output: Target = Target.CONSTANT,
-                 objective: Objective = Objective.MODEL, normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
-        self.output = output
+                 max_gauss_components: int = 5, patience: int = 5, output: Target = Target.CONSTANT,
+                 objective: Objective = Objective.MODEL, normalization=None, discretization=None):
+        super().__init__(predictor, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                         patience, objective, output, normalization, discretization)
+        self.max_depth = max_depth
+        self.max_gauss_components = max_gauss_components
     def search(self):
-        self.params = self.__search_depth()
+        self.params = []
+        for algorithm in [OrCHiD.Algorithm.ExACT, OrCHiD.Algorithm.CREAM]:
+            self.params += self.__search_algorithm(algorithm)
-    def __search_depth(self):
+    def __search_algorithm(self, algorithm):
         params = []
         best = None
-        for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
-    def __search_threshold(self, depth):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.9
-        params = []
-        patience = self.patience
-        while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
-            extractor = Extractor.creepy(
-                self.predictor, depth=depth, error_threshold=threshold, output=self.output,
-                gauss_components=10, normalization=self.normalization,
-                clustering=Clustering.cream if self.algorithm == CRASH.Algorithm.CREAM else Clustering.exact
-            )
-            _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print(f"MAE = {mae:.2f}, {n} rules")
-            if len(params) == 0:
-                params.append((mae, n, depth, threshold))
-                threshold += step
-                continue
-            if (n == 1) or (mae == 0.0):
-                params.append((mae, n, depth, threshold))
-                break
-            if mae > params[0][0] * self.max_mae_increase:
+        for gauss_components in range(2, self.max_gauss_components + 1):
+            data = self.dataframe.sample(n=gauss_components * 100) if gauss_components * 100 < len(self.dataframe) \
+                else self.dataframe
+            current_params = self.__search_components(data, algorithm, gauss_components)
+            current_best = self._best(current_params)[1]
+            if best is not None and self._score(best) <= self._score(current_best):
                 break
+            best = current_best
+            params += current_params
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
-            if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
-                patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, depth, threshold))
-            threshold += step
         return params
+    def __search_components(self, data, algorithm, gauss_components):
+        orchid = OrCHiD(data, algorithm, self.output, self.max_error_increase, self.min_rule_decrease,
+                        self.readability_tradeoff, self.patience, self.max_depth, gauss_components,
+                        self.normalization, self.discretization)
+        orchid.search()
+        return [(*p, gauss_components, algorithm) for p in orchid.params]
     def _print_params(self, name, params):
-        print("**********************")
+        print("*****************************")
         print(f"Best {name}")
-        print("**********************")
+        print("*****************************")
         print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Algorithm = {params[5]}")
         print(f"Threshold = {params[3]:.2f}")
         print(f"Depth = {params[2]}")
+        print(f"Gaussian components = {params[4]}")

psyke/tuning/orchid/__init__.py CHANGED Viewed

@@ -4,60 +4,58 @@ import numpy as np
 import pandas as pd
 from psyke import Clustering, EvaluableModel
-from psyke.tuning import Optimizer
+from psyke.tuning import Optimizer, IterativeOptimizer
 from psyke.utils import Target
-class OrCHiD(Optimizer):
+class OrCHiD(IterativeOptimizer):
     class Algorithm(Enum):
         ExACT = 1,
         CREAM = 2
     def __init__(self, dataframe: pd.DataFrame, algorithm, output: Target = Target.CONSTANT,
-                 max_mae_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
-                 patience: int = 5, max_depth: int = 10, normalization=None, discretization=None):
-        super().__init__(dataframe, algorithm, output, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         patience, normalization, discretization)
-        self.max_depth = max_depth
+                 max_error_increase: float = 1.2, min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1,
+                 patience: int = 5, max_depth: int = 10, gauss_components=10, normalization=None, discretization=None):
+        super().__init__(dataframe, max_error_increase, min_rule_decrease, readability_tradeoff, max_depth, patience,
+                         output, normalization, discretization)
+        self.algorithm = algorithm
+        self.gauss_components = gauss_components
     def search(self):
         self.params = self.__search_depth()
     def __search_depth(self):
-        params = []
-        best = None
+        params, best = [], None
         for depth in range(1, self.max_depth + 1):
-            p = self.__search_threshold(depth)
-            b = Optimizer._best(p)[1]
+            current_params = self.__search_threshold(depth)
+            current_best = self._best(current_params)[1]
             print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
-            if len(params) > 1 and improvement < 1.2:
+            if len(params) > 1 and to_break:
                 break
         return params
     def __search_threshold(self, depth):
         step = 1.0
-        threshold = 1.0  # self.max_mae_increase * 0.9
+        threshold = 1.0
         params = []
         patience = self.patience
         while patience > 0:
-            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. ", end="")
+            print(f"{self.algorithm}. Depth: {depth}. Threshold = {threshold:.2f}. "
+                  f"Gaussian components = {self.gauss_components}. ", end="")
             clustering = (Clustering.cream if self.algorithm == OrCHiD.Algorithm.CREAM else Clustering.exact)(
-                depth=depth, error_threshold=threshold, gauss_components=10, output=self.output
+                depth=depth, error_threshold=threshold, gauss_components=self.gauss_components, output=self.output
             )
             clustering.fit(self.dataframe)
             task, metric = \
                 (EvaluableModel.Task.CLASSIFICATION, EvaluableModel.ClassificationScore.INVERSE_ACCURACY) \
                 if self.output == Target.CLASSIFICATION else \
                 (EvaluableModel.Task.REGRESSION, EvaluableModel.RegressionScore.MAE)
-            p, n = clustering.score(self.dataframe, None, False, False, task, [metric])[metric][0], clustering.n_rules
+            p, n = clustering.score(self.dataframe, None, False, False, task=task,
+                                    scoring_function=[metric])[metric][0], clustering.n_rules
             print(f"Predictive loss = {p:.2f}, {n} rules")
@@ -71,7 +69,7 @@ class OrCHiD(Optimizer):
                 params.append((p, n, depth, threshold))
                 break
-            if p > params[0][0] * self.max_mae_increase:
+            if p > params[0][0] * self.max_error_increase:
                 break
             improvement = (params[-1][0] / p) + (1 - n / params[-1][1])

psyke/tuning/pedro/__init__.py CHANGED Viewed

@@ -1,44 +1,73 @@
 import numpy as np
 import pandas as pd
 from enum import Enum
-from psyke import Extractor
+from sklearn.metrics import accuracy_score
+from psyke import Extractor, Target
 from psyke.extraction.hypercubic import Grid, FeatureRanker
 from psyke.extraction.hypercubic.strategy import AdaptiveStrategy, FixedStrategy
-from psyke.tuning import Objective, Optimizer
+from psyke.tuning import Objective, IterativeOptimizer, SKEOptimizer
-class PEDRO(Optimizer):
+class PEDRO(SKEOptimizer, IterativeOptimizer):
     class Algorithm(Enum):
         GRIDEX = 1,
-        GRIDREX = 2
+        GRIDREX = 2,
+        HEX = 3
-    def __init__(self, predictor, dataframe: pd.DataFrame, max_mae_increase: float = 1.2,
+    def __init__(self, predictor, dataframe: pd.DataFrame, max_error_increase: float = 1.2,
                  min_rule_decrease: float = 0.9, readability_tradeoff: float = 0.1, max_depth: int = 3,
                  patience: int = 3, algorithm: Algorithm = Algorithm.GRIDREX, objective: Objective = Objective.MODEL,
-                 normalization=None):
-        super().__init__(predictor, algorithm, dataframe, max_mae_increase, min_rule_decrease, readability_tradeoff,
-                         max_depth, patience, objective, normalization)
+                 output: Target = Target.CONSTANT, normalization=None, discretization=None):
+        SKEOptimizer.__init__(self, predictor, dataframe, max_error_increase, min_rule_decrease,
+                              readability_tradeoff, patience, objective, output, normalization, discretization)
+        IterativeOptimizer.__init__(self, dataframe, max_error_increase, min_rule_decrease, readability_tradeoff,
+                                    max_depth, patience, output, normalization, discretization)
+        self.algorithm = Extractor.gridrex if algorithm == PEDRO.Algorithm.GRIDREX else \
+            Extractor.gridex if algorithm == PEDRO.Algorithm.GRIDEX else Extractor.hex
+        self.algorithm_name = "GridREx" if algorithm == PEDRO.Algorithm.GRIDREX else \
+            "GridEx" if algorithm == PEDRO.Algorithm.GRIDEX else "HEx"
         self.ranked = FeatureRanker(dataframe.columns[:-1]).fit(predictor, dataframe.iloc[:, :-1]).rankings()
-        self.model_mae = abs(self.predictor.predict(dataframe.iloc[:, :-1]).flatten() -
-                             self.dataframe.iloc[:, -1].values).mean()
+        predictions = self.predictor.predict(dataframe.iloc[:, :-1]).flatten()
+        expected = self.dataframe.iloc[:, -1].values
+        self.error = 1 - accuracy_score(predictions, expected) if output == Target.CLASSIFICATION else \
+            abs(predictions - expected).mean()
+    def _search_depth(self, strategy, critical, max_partitions):
+        params, best = [], None
+        for iterations in range(self.max_depth):
+            current_params = self.__search_threshold(Grid(iterations + 1, strategy), critical, max_partitions)
+            current_best = self._best(current_params)[1]
+            print()
+            best, to_break = self._check_iteration_improvement(best, current_best)
+            params += current_params
+            if len(params) > 1 and to_break:
+                break
+        return params
     def __search_threshold(self, grid, critical, max_partitions):
-        step = self.model_mae / 2.0
-        threshold = self.model_mae * 0.5
+        step = self.error / 2.0
+        threshold = self.error * 0.5
         params = []
         patience = self.patience
         while patience > 0:
-            print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm, grid, threshold), end="")
-            extractor = Extractor.gridrex(self.predictor, grid, threshold=threshold, normalization=self.normalization) \
-                if self.algorithm == PEDRO.Algorithm.GRIDREX \
-                else Extractor.gridex(self.predictor, grid, threshold=threshold, normalization=self.normalization)
+            print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
+            param_dict = dict(min_examples=25, threshold=threshold, normalization=self.normalization)
+            if self.algorithm != Extractor.gridrex:
+                param_dict['output'] = self.output
+            extractor = self.algorithm(self.predictor, grid, **param_dict)
             _ = extractor.extract(self.dataframe)
-            mae, n = (extractor.mae(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
-                      extractor.mae(self.dataframe)), extractor.n_rules
-            print("MAE = {:.2f}, {} rules".format(mae, n))
+            error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
+                else extractor.mae
+            error, n = (error_function(self.dataframe, self.predictor) if self.objective == Objective.MODEL else
+                        error_function(self.dataframe)), extractor.n_rules
+            print("MAE = {:.2f}, {} rules".format(error, n))
             if len(params) == 0:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 threshold += step
                 continue
@@ -46,44 +75,24 @@ class PEDRO(Optimizer):
                 break
             if n == 1:
-                params.append((mae, n, threshold, grid))
+                params.append((error, n, threshold, grid))
                 break
-            if mae > params[0][0] * self.max_mae_increase:
+            if error > params[0][0] * self.max_error_increase:
                 break
-            improvement = (params[-1][0] / mae) + (1 - n / params[-1][1])
+            improvement = (params[-1][0] / error) + (1 - n / params[-1][1])
             if improvement <= 1 or n > np.ceil(params[-1][1] * self.min_rule_decrease):
                 patience -= 1
-                step = max(step, abs(mae - threshold) / max(patience, 1))
+                step = max(step, abs(error - threshold) / max(patience, 1))
             elif not critical:
                 patience = self.patience
-            if mae != params[-1][0] or n != params[-1][1]:
-                params.append((mae, n, threshold, grid))
+            if error != params[-1][0] or n != params[-1][1]:
+                params.append((error, n, threshold, grid))
             threshold += step
         return params
-    def __search_depth(self, strategy, critical, max_partitions):
-        params = []
-        best = None
-        for iterations in range(self.max_depth):
-            grid = Grid(iterations + 1, strategy)
-            p = self.__search_threshold(grid, critical, max_partitions)
-            b = Optimizer._best(p)[1]
-            print()
-            improvement = self._depth_improvement(
-                [best[0], best[1]], [b[0], b[1]]
-            ) if best is not None else np.inf
-            best = b
-            params += p
-            if len(params) > 1 and improvement < 1.2:
-                break
-        return params
     def __contains(self, strategies, strategy):
         for s in strategies:
             if strategy.equals(s, self.dataframe.columns[:-1]):
@@ -91,21 +100,26 @@ class PEDRO(Optimizer):
         return False
     def search(self):
-        base_strategy = FixedStrategy(2)
-        strategies = [base_strategy, FixedStrategy(3)]
-        base_partitions = base_strategy.partition_number(self.dataframe.columns[:-1])
+        max_partitions = 200
+        base_partitions = FixedStrategy(2).partition_number(self.dataframe.columns[:-1]) * 3
+        if base_partitions <= max_partitions:
+            strategies = [FixedStrategy(2)]
+            if FixedStrategy(3).partition_number(self.dataframe.columns[:-1]) <= max_partitions:
+                strategies.append(FixedStrategy(3))
+        else:
+            strategies = []
+            base_partitions = max_partitions
         for n in [2, 3, 5, 10]:
             for th in [0.99, 0.75, 0.67, 0.5, 0.3]:
                 strategy = AdaptiveStrategy(self.ranked, [(th, n)])
-                if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions * 3 and \
+                if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions and \
                         not self.__contains(strategies, strategy):
                     strategies.append(strategy)
         for (a, b) in [(0.33, 0.67), (0.25, 0.75), (0.1, 0.9)]:
             strategy = AdaptiveStrategy(self.ranked, [(a, 2), (b, 3)])
-            if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions * 3 and \
+            if strategy.partition_number(self.dataframe.columns[:-1]) < base_partitions and \
                     not self.__contains(strategies, strategy):
                 strategies.append(strategy)
@@ -116,16 +130,16 @@ class PEDRO(Optimizer):
         params = []
         for strategy in strategies:
-            params += self.__search_depth(strategy,
-                                          strategy.partition_number(self.dataframe.columns[:-1]) > avg,
-                                          base_partitions * 3)
+            params += self._search_depth(strategy,
+                                         strategy.partition_number(self.dataframe.columns[:-1]) > avg,
+                                         base_partitions)
         self.params = params
     def _print_params(self, name, params):
         print("**********************")
         print(f"Best {name}")
         print("**********************")
-        print(f"MAE = {params[0]:.2f}, {params[1]} rules")
+        print(f"Error = {params[0]:.2f}, {params[1]} rules")
         print(f"Threshold = {params[2]:.2f}")
         print(f"Iterations = {params[3].iterations}")
         print(f"Strategy = {params[3].strategy}")

psyke/utils/logic.py CHANGED Viewed

@@ -123,17 +123,17 @@ def to_var(name: str) -> Var:
     return var(name[0].upper() + name[1:])
-def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None, sort: bool = True) -> dict[str, Var]:
-    if sort:
-        features = sorted(features, key=lambda x: x.name)
-        dataset = sorted(dataset.columns[:-1]) if dataset is not None else None
-    else:
-        dataset = dataset.columns[:-1] if dataset is not None else None
+def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None) -> dict[str, Var]:
+    dataset = dataset.columns[:-1] if dataset is not None else None
     values = {feature.name: to_var(feature.name) for feature in features} \
-        if len(features) > 0 else {name: to_var(name) for name in dataset}
+        if features else {name: to_var(name) for name in dataset}
     return values
+def last_in_body(body: Struct) -> Struct:
+    return body.args[-1] if body.args[-1].functor == 'is' else last_in_body(body.args[-1])
 def create_head(functor: str, variables: Iterable[Var], output) -> Struct:
     if isinstance(output, Var):
         variables += [output]
@@ -321,4 +321,4 @@ def get_not_in_rule(min_included: bool = False, max_included: bool = True) -> Cl
     parser = DEFAULT_CLAUSES_PARSER
     theory = parser.parse_clauses(not_in_textual_rule(LE if min_included else L, GE if max_included else G),
                                   operators=None)
-    return theory[0]
+    return theory[0]

psyke/utils/plot.py CHANGED Viewed

@@ -7,10 +7,85 @@ import matplotlib.pyplot as plt
 from matplotlib.lines import Line2D
 from tuprolog.solve.prolog import prolog_solver
 from tuprolog.theory import Theory, mutable_theory
-from psyke.utils.logic import data_to_struct, pretty_theory, get_in_rule, get_not_in_rule
+from psyke.extraction.hypercubic import HyperCubeExtractor
+from psyke.utils.logic import data_to_struct, get_in_rule, get_not_in_rule
 import matplotlib
-matplotlib.use('TkAgg')
+#matplotlib.use('TkAgg')
+def plot_init(xlim, ylim, xlabel, ylabel, size=(4, 3), equal=False):
+    plt.figure(figsize=size)
+    if equal:
+        plt.gca().set_aspect(1)
+    plt.xlim(xlim)
+    plt.ylim(ylim)
+    plt.gca().set_xlabel(xlabel)
+    plt.gca().set_ylabel(ylabel)
+    plt.gca().set_rasterized(True)
+def plot_point(x, y, color, marker, ec=None):
+    plt.scatter(x, y, c=color, marker=marker, edgecolors=ec, linewidths=0.6)
+def plot_classification_samples(dataframe, classes, colors, markers, labels, loc, name, show=True, ec=None):
+    marks = [Line2D([0], [0], color=c, marker=m, lw="0") for c, m in zip(colors, markers)]
+    for cl, c, m in zip(classes, colors, markers):
+        df = dataframe[dataframe.target == cl]
+        plot_point(df["petal length"], df["petal width"], c, m, ec=ec)
+    plt.gca().legend(marks, labels, loc=loc)
+    plt.savefig("plot/{}.pdf".format(name), dpi=500, bbox_inches='tight')
+    if show:
+        plt.show()
+def plot_boundaries(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
+                    a: float = .5, h: str = '////////', ls='-', e=.05, fc='none', ec=None, reverse=False):
+    cubes = extractor._hypercubes.copy()
+    if reverse:
+        cubes.reverse()
+    for cube in cubes:
+        plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
+                               fc=colors[cube.output] if fc is None else fc,
+                               ec=colors[cube.output] if ec is None else ec, alpha=a, hatch=h, linestyle=ls)
+def plot_surfaces(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str], ec='r', e=.05):
+    for cube in extractor._hypercubes:
+        plt.gca().fill_between((cube[x][0] - e, cube[x][1] + e), cube[y][0] - e, cube[y][1] + e,
+                               fc='none', ec=ec)
+def plot_perimeters(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str], n: int = 5,
+                    ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
+    for cube in extractor._hypercubes:
+        for corner in cube.perimeter_samples(n):
+            plt.scatter(corner[x], corner[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
+def plot_centers(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
+                 ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
+    for cube in extractor._hypercubes:
+        center = cube.center
+        plt.scatter(center[x], center[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
+def plot_corners(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
+                 ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
+    for cube in extractor._hypercubes:
+        for corner in cube.corners():
+            plt.scatter(corner[x], corner[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
+def plot_barycenters(extractor: HyperCubeExtractor, x: str, y: str, colors: dict[str, str],
+                 ec: str = 'r', m: str = '*', s: int = 60, z: float = 1e10, lw: float = 0.8):
+    for cube in extractor._hypercubes:
+        center = cube.barycenter
+        plt.scatter(center[x], center[y], c=colors[cube.output], marker=m, edgecolor=ec, s=s, zorder=z, linewidth=lw)
 def predict_from_theory(theory: Theory, data: pd.DataFrame) -> list[float or str]:
@@ -95,6 +170,7 @@ def plot_theory(theory: Theory, data: pd.DataFrame = None, output: str = 'plot.p
         pass
         # ax.text2D(0., 0.88, pretty_theory(theory, new_line=False), transform=ax.transAxes, fontsize=8)
     if isinstance(ys[0], str):
-        custom_lines = [Line2D([0], [0], marker='o', markerfacecolor=get_color(c), markersize=20, color='w') for c in classes]
+        custom_lines = [Line2D([0], [0], marker='o', markerfacecolor=get_color(c),
+                               markersize=20, color='w') for c in classes]
         ax.legend(custom_lines, classes, loc='upper left', numpoints=1, ncol=3, fontsize=18, bbox_to_anchor=(0, 0))
     plt.savefig(output, format='pdf')

psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl