PyPI - psyke - Versions diffs - 0.9.1.dev12__py3-none-any.whl → 0.9.1.dev43__py3-none-any.whl - Mend

psyke 0.9.1.dev12py3-none-any.whl → 0.9.1.dev43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psyke might be problematic. Click here for more details.

Files changed (18) hide show

psyke/__init__.py +15 -2
psyke/clustering/__init__.py +4 -0
psyke/clustering/cream/__init__.py +2 -6
psyke/clustering/exact/__init__.py +10 -7
psyke/clustering/utils.py +0 -1
psyke/extraction/cart/FairTree.py +4 -3
psyke/extraction/hypercubic/__init__.py +44 -0
psyke/extraction/hypercubic/creepy/__init__.py +13 -5
psyke/extraction/hypercubic/ginger/__init__.py +98 -0
psyke/extraction/hypercubic/gridex/__init__.py +4 -49
psyke/extraction/hypercubic/hypercube.py +25 -20
psyke/genetic/__init__.py +0 -0
psyke/genetic/gin/__init__.py +106 -0
{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/METADATA +1 -1
{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/RECORD +18 -15
{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/WHEEL +0 -0
{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/licenses/LICENSE +0 -0
{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/top_level.txt +0 -0

psyke/__init__.py CHANGED Viewed

@@ -65,7 +65,7 @@ class EvaluableModel(object):
         raise NotImplementedError('predict')
     def __convert(self, ys: Iterable) -> Iterable:
-        if self.normalization is not None and not isinstance([p for p in ys if p is not None][0], str):
+        if self.normalization is not None and len(ys) > 0 and not isinstance([p for p in ys if p is not None][0], str):
             m, s = self.normalization[list(self.normalization.keys())[-1]]
             ys = [prediction if prediction is None else prediction * s + m for prediction in ys]
         return ys
@@ -231,7 +231,7 @@ class Extractor(EvaluableModel, ABC):
         for i in range(len(output['labels'])):
             for j in range(len(groups)):
-                plt.gca().text(j, i, f'{abs(int(data[i, j]))}%', ha="center", va="center", color="k")
+                plt.gca().text(j, i, f'{abs(data[i, j]):.2f}%', ha="center", va="center", color="k")
         plt.gca().set_xticks([i + .5 for i in range(len(groups))], minor=True)
         plt.gca().set_yticks([i + .5 for i in range(len(output['labels']))], minor=True)
@@ -394,6 +394,19 @@ class Extractor(EvaluableModel, ABC):
         from psyke.extraction.hypercubic.hex import HEx
         return HEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
+    @staticmethod
+    def ginger(predictor, features: Iterable[str], sigmas: Iterable[float], max_slices: int, min_rules: int = 1,
+               max_poly: int = 1, alpha: float = 0.5, indpb: float = 0.5, tournsize: int = 3, metric:str = 'R2',
+               n_gen: int = 50, n_pop: int = 50, threshold=None, valid=None,
+               normalization: dict[str, tuple[float, float]] = None,
+               seed: int = get_default_random_seed()) -> Extractor:
+        """
+        Creates a new GInGER extractor.
+        """
+        from psyke.extraction.hypercubic.ginger import GInGER
+        return GInGER(predictor, features, sigmas, max_slices, min_rules, max_poly, alpha, indpb, tournsize, metric,
+                      n_gen, n_pop, threshold, valid, normalization, seed)
     @staticmethod
     def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
                 normalization: dict[str, tuple[float, float]] = None,

psyke/clustering/__init__.py CHANGED Viewed

@@ -10,6 +10,10 @@ class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
     def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
         HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
+        self._protected_features = []
     def get_hypercubes(self) -> Iterable[HyperCube]:
         raise NotImplementedError('get_hypercubes')
+    def make_fair(self, features: Iterable[str]):
+        self._protected_features = features

psyke/clustering/cream/__init__.py CHANGED Viewed

@@ -46,11 +46,7 @@ class CREAM(ExACT):
     def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
         to_split = [(self.error_threshold * 10, 1, 1, surrounding)]
         while len(to_split) > 0:
-            to_split.sort(reverse=True)
-            (_, depth, _, node) = to_split.pop()
-            data = ExACT._remove_string_label(node.dataframe)
-            gauss_params = select_gaussian_mixture(data, self.gauss_components)
-            gauss_pred = gauss_params[2].predict(data)
+            node, depth, gauss_pred, gauss_params = self._get_gauss_predictions(to_split)
             cubes = self.__eligible_cubes(gauss_pred, node, gauss_params[1])
             if len(cubes) < 1:
                 continue
@@ -65,4 +61,4 @@ class CREAM(ExACT):
                     (error, depth + 1, np.random.uniform(), n) for (n, error) in
                     zip(node.children, [right[0].diversity, left[0].diversity]) if error > self.error_threshold
                 ]
-        return self._node_to_cubes(surrounding)
+        return self._node_to_cubes(surrounding)

psyke/clustering/exact/__init__.py CHANGED Viewed

@@ -54,13 +54,13 @@ class ExACT(HyperCubeClustering, ABC):
         dbscan_pred = DBSCAN(eps=select_dbscan_epsilon(data, clusters)).fit_predict(data.iloc[:, :-1])
         return HyperCube.create_surrounding_cube(
             dataframe.iloc[np.where(dbscan_pred == Counter(dbscan_pred).most_common(1)[0][0])],
-            True, self._output
+            True, self._output, self._protected_features
         )
     def fit(self, dataframe: pd.DataFrame):
         np.random.seed(self.seed)
         self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
-        self._surrounding = HyperCube.create_surrounding_cube(dataframe, True, self._output)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, True, self._output, self._protected_features)
         self._hypercubes = self._iterate(Node(dataframe, self._surrounding))
     def get_hypercubes(self) -> Iterable[HyperCube]:
@@ -79,14 +79,17 @@ class ExACT(HyperCubeClustering, ABC):
             enumerate(dataframe.iloc[:, -1].unique())
         ).items()}}) if isinstance(dataframe.iloc[0, -1], str) else dataframe
+    def _get_gauss_predictions(self, to_split):
+        to_split.sort(reverse=True)
+        (_, depth, _, node) = to_split.pop()
+        data = ExACT._remove_string_label(node.dataframe)
+        gauss_params = select_gaussian_mixture(data.drop(self._protected_features, axis=1), self.gauss_components)
+        return node, depth, gauss_params[2].predict(data.drop(self._protected_features, axis=1)), gauss_params
     def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
         to_split = [(self.error_threshold * 10, 1, 1, surrounding)]
         while len(to_split) > 0:
-            to_split.sort(reverse=True)
-            (_, depth, _, node) = to_split.pop()
-            data = ExACT._remove_string_label(node.dataframe)
-            gauss_params = select_gaussian_mixture(data, self.gauss_components)
-            gauss_pred = gauss_params[2].predict(data)
+            node, depth, gauss_pred, gauss_params = self._get_gauss_predictions(to_split)
             cubes, indices = self.__eligible_cubes(gauss_pred, node, gauss_params[1])
             cubes = [(c.volume(), len(idx), i, idx, c) for i, (c, idx) in enumerate(zip(cubes, indices))
                      if (idx is not None) and (not node.cube.equal(c))]

psyke/clustering/utils.py CHANGED Viewed

@@ -11,7 +11,6 @@ def select_gaussian_mixture(data: pd.DataFrame, max_components) -> tuple[float,
     try:
         models = [GaussianMixture(n_components=n).fit(data) for n in components if n <= len(data)]
     except ValueError:
-        print(data)
         print(len(data))
     return min([(m.bic(data) / (i + 2), (i + 2), m) for i, m in enumerate(models)])

psyke/extraction/cart/FairTree.py CHANGED Viewed

@@ -77,15 +77,16 @@ class FairTree:
         sorted_indices = np.argsort(X)
         X = np.array(X)[sorted_indices]
         y = np.array(y)[sorted_indices]
-        return np.array([(X[i] + X[i - 1]) / 2.0 for i in range(1, len(X)) if y[i] != y[i - 1]])
+        # X = np.array(np.unique(np.unique(list(zip(X, y)), axis=0)[:, 0]), dtype=float)
+        return np.array([(X[:-1][i] + X[1:][i]) / 2.0 for i in range(len(X) - 1) if y[i] != y[i + 1]])
     def _best_split(self, X, y):
         best_gain = -float('inf')
         split_idx, split_threshold = None, None
         for feature in [feature for feature in X.columns if feature not in self.protected_attr]:
-            # for threshold in np.unique(np.quantile(X[feature], np.linspace(0, 1, num=25))):
-            for threshold in self.generate_thresholds(X[feature], y):
+            # for threshold in self.generate_thresholds(X[feature], y):
+            for threshold in np.unique(np.quantile(X[feature], np.linspace(0, 1, num=25))):
                 left_idxs = X[feature] <= threshold
                 right_idxs = X[feature] > threshold

psyke/extraction/hypercubic/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from abc import ABC
 from collections import Iterable
+from itertools import combinations
 import numpy as np
 import pandas as pd
@@ -25,6 +26,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
         HyperCubePredictor.__init__(self, output=output, normalization=normalization)
         PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
         self._default_surrounding_cube = False
+        self.threshold = None
     def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
         if self._output == Target.CONSTANT:
@@ -33,11 +35,53 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
             return RegressionCube()
         return ClassificationCube()
+    @staticmethod
+    def _find_couples(to_split: Iterable[HyperCube], not_in_cache: set[HyperCube],
+                      adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
+            Iterable[tuple[HyperCube, HyperCube, str]]:
+        for cube1, cube2 in combinations(to_split, 2):
+            key = (cube1, cube2) if id(cube1) < id(cube2) else (cube2, cube1)
+            if (cube1 in not_in_cache) or (cube2 in not_in_cache):
+                adjacent_cache[key] = cube1.is_adjacent(cube2)
+            feature = adjacent_cache.get(key)
+            if feature is not None:
+                yield cube1, cube2, feature
+    def _evaluate_merge(self, not_in_cache: Iterable[HyperCube], dataframe: pd.DataFrame, feature: str,
+                        cube: HyperCube, other_cube: HyperCube,
+                        merge_cache: dict[tuple[HyperCube, HyperCube], HyperCube | None]) -> bool:
+        if (cube in not_in_cache) or (other_cube in not_in_cache):
+            merged_cube = cube.merge_along_dimension(other_cube, feature)
+            merged_cube.update(dataframe, self.predictor)
+            merge_cache[(cube, other_cube)] = merged_cube
+        return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
+            merge_cache[(cube, other_cube)].diversity < self.threshold
     def _sort_cubes(self):
         cubes = [(cube.diversity, i, cube) for i, cube in enumerate(self._hypercubes)]
         cubes.sort()
         self._hypercubes = [cube[2] for cube in cubes]
+    def _merge(self, to_split: list[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
+        not_in_cache = set(to_split)
+        adjacent_cache = {}
+        merge_cache = {}
+        while True:
+            to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
+                        HyperCubeExtractor._find_couples(to_split, not_in_cache, adjacent_cache) if
+                        self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
+            if len(to_merge) == 0:
+                break
+            best = min(to_merge, key=lambda c: c[1].diversity)
+            for cube in best[0]:
+                to_split.remove(cube)
+            to_split.append(best[1])
+            not_in_cache = [best[1]]
+        return to_split
     def extract(self, dataframe: pd.DataFrame) -> Theory:
         theory = PedagogicalExtractor.extract(self, dataframe)
         self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)

psyke/extraction/hypercubic/creepy/__init__.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
 from collections import Iterable
-import numpy as np
+from typing import Callable, Any
 import pandas as pd
 from sklearn.base import ClassifierMixin
 from tuprolog.theory import Theory
@@ -16,16 +17,23 @@ class CReEPy(HyperCubeExtractor):
     Explanator implementing CReEPy algorithm.
     """
-    def __init__(self, predictor, clustering=Clustering.exact, depth: int = 3, error_threshold: float = 0.1,
-                 output: Target = Target.CONSTANT, gauss_components: int = 5, ranks: Iterable[(str, float)] = tuple(),
-                 ignore_threshold: float = 0.0, discretization=None, normalization=None,
-                 seed: int = get_default_random_seed()):
+    ClusteringType = Callable[[int, float, Target, int, Any, Any, int], HyperCubeClustering]
+    def __init__(self, predictor, clustering: ClusteringType = Clustering.exact, depth: int = 3,
+                 error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 5,
+                 ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0, discretization=None,
+                 normalization=None, seed: int = get_default_random_seed()):
         super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
                          discretization, normalization)
         self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
                                      normalization, seed)
         self._default_surrounding_cube = True
         self._dimensions_to_ignore = set([dimension for dimension, relevance in ranks if relevance < ignore_threshold])
+        self._protected_features = []
+    def make_fair(self, features: Iterable[str]):
+        self.clustering.make_fair(features)
+        self._dimensions_to_ignore.update(features)
     def _extract(self, dataframe: pd.DataFrame) -> Theory:
         if not isinstance(self.clustering, HyperCubeClustering):

psyke/extraction/hypercubic/ginger/__init__.py ADDED Viewed

@@ -0,0 +1,98 @@
+import itertools
+from typing import Iterable
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import PolynomialFeatures
+from tuprolog.theory import Theory
+from psyke import get_default_random_seed
+from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube, RegressionCube
+from deap import base, creator
+from psyke.genetic.gin import GIn
+class GInGER(HyperCubeExtractor):
+    """
+    Explanator implementing GInGER algorithm.
+    """
+    def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
+                 tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None, normalization=None,
+                 seed: int = get_default_random_seed()):
+        super().__init__(predictor, normalization)
+        self.threshold = threshold
+        np.random.seed(seed)
+        self.features = features
+        self.max_features = len(features)
+        self.sigmas = sigmas
+        self.max_slices = max_slices
+        self.min_rules = min_rules
+        self.poly = max_poly
+        self.trained_poly = None
+        self.alpha = alpha
+        self.indpb = indpb
+        self.tournsize = tournsize
+        self.metric = metric
+        self.n_gen = n_gen
+        self.n_pop = n_pop
+        self.valid = valid
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", list, fitness=creator.FitnessMax)
+    def __poly_names(self):
+        return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
+                         for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
+                for p in self.trained_poly.powers_]
+    def _predict(self, dataframe: pd.DataFrame) -> Iterable:
+        dataframe = pd.DataFrame(self.trained_poly.fit_transform(dataframe), columns=self.__poly_names())
+        return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        best = {}
+        for poly in range(self.poly):
+            for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
+                gr = GIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
+                         slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha,
+                         indpb=self.indpb, tournsize=self.tournsize, metric=self.metric, warm=True)
+                b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
+                best[(score, poly + 1, slices)] = b
+        m = min(best)
+        poly, slices, best = m[1], m[2], best[m]
+        self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
+        transformed = pd.DataFrame(self.trained_poly.fit_transform(dataframe.iloc[:, :-1]), columns=self.__poly_names())
+        transformed[dataframe.columns[-1]] = dataframe.iloc[:, -1].values
+        self._surrounding = HyperCube.create_surrounding_cube(transformed, output=self._output)
+        cuts = [sorted(best[sum(slices[:i]):sum(slices[:i + 1])]) for i in range(len(slices))]
+        intervals = [[(transformed[self.features[i]].min(), cut[0])] +
+                     [(cut[i], cut[i + 1]) for i in range(len(cut) - 1)] +
+                     [(cut[-1], transformed[self.features[i]].max())] for i, cut in enumerate(cuts)]
+        hypercubes = [{f: iv for f, iv in zip(self.features, combo)} for combo in itertools.product(*intervals)]
+        mi_ma = {f: (transformed[f].min(), transformed[f].max()) for f in transformed.columns if f not in self.features}
+        self._hypercubes = [RegressionCube({feat: h[feat] if feat in self.features else mi_ma[feat]
+                                            for feat in transformed.columns[:-1]}) for h in hypercubes]
+        self._hypercubes = [c for c in self._hypercubes if c.count(transformed) >= 2]
+        for c in self._hypercubes:
+            for feature in transformed.columns:
+                if feature not in self.features:
+                    for direction in ['+', '-']:
+                        c.set_infinite(feature, direction)
+            c.update(transformed)
+        if self.threshold is not None:
+            self._hypercubes = self._merge(self._hypercubes, transformed)
+        return self._create_theory(transformed)
+    def make_fair(self, features: Iterable[str]):
+        self._dimensions_to_ignore.update(features)

psyke/extraction/hypercubic/gridex/__init__.py CHANGED Viewed

@@ -59,7 +59,6 @@ class GridEx(HyperCubeExtractor):
     def _iterate(self, dataframe: pd.DataFrame):
         fake = dataframe.copy()
         prev = [self._surrounding]
-        next_iteration = []
         for iteration in self.grid.iterate():
             next_iteration = []
@@ -67,56 +66,12 @@ class GridEx(HyperCubeExtractor):
                 if cube.count(dataframe) == 0:
                     continue
                 if cube.diversity < self.threshold:
-                    self._hypercubes += [cube]
+                    self._hypercubes.append(cube)
                     continue
                 to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
-                next_iteration += [c for c in self._merge(to_split, fake)]
-            prev = next_iteration.copy()
-        self._hypercubes += [cube for cube in next_iteration]
-    @staticmethod
-    def _find_couples(to_split: Iterable[HyperCube], not_in_cache: Iterable[HyperCube],
-                      adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
-            Iterable[tuple[HyperCube, HyperCube, str]]:
-        checked = []
-        eligible = []
-        for cube in to_split:
-            checked.append(cube)
-            for other_cube in [c for c in to_split if c not in checked]:
-                if (cube in not_in_cache) or (other_cube in not_in_cache):
-                    adjacent_cache[(cube, other_cube)] = cube.is_adjacent(other_cube)
-                adjacent_feature = adjacent_cache[(cube, other_cube)]
-                eligible.append((cube, other_cube, adjacent_feature))
-        return [couple for couple in eligible if couple[2] is not None]
-    def _evaluate_merge(self, not_in_cache: Iterable[HyperCube],
-                        dataframe: pd.DataFrame, feature: str,
-                        cube: HyperCube, other_cube: HyperCube,
-                        merge_cache: dict[(HyperCube, HyperCube), HyperCube | None]) -> bool:
-        if (cube in not_in_cache) or (other_cube in not_in_cache):
-            merged_cube = cube.merge_along_dimension(other_cube, feature)
-            merged_cube.update(dataframe, self.predictor)
-            merge_cache[(cube, other_cube)] = merged_cube
-        return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
-            merge_cache[(cube, other_cube)].diversity < self.threshold
-    def _merge(self, to_split: Iterable[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
-        not_in_cache = [cube for cube in to_split]
-        adjacent_cache = {}
-        merge_cache = {}
-        cont = True
-        while cont:
-            to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
-                        GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
-                        self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
-            if len(to_merge) == 0:
-                cont = False
-            else:
-                sorted(to_merge, key=lambda c: c[1].diversity)
-                best = to_merge[0]
-                to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
-                not_in_cache = [best[1]]
-        return to_split
+                next_iteration.extend(self._merge(to_split, fake))
+            prev = next_iteration
+        self._hypercubes.extend(prev)
     def make_fair(self, features: Iterable[str]):
         self.grid.make_fair(features)

psyke/extraction/hypercubic/hypercube.py CHANGED Viewed

@@ -143,10 +143,9 @@ class HyperCube:
         self._default = True
     def set_infinite(self, dimension: str, direction: str):
-        if dimension in self._infinite_dimensions:
-            self._infinite_dimensions[dimension].append(direction)
-        else:
-            self._infinite_dimensions[dimension] = [direction]
+        if dimension not in self._infinite_dimensions:
+            self._infinite_dimensions[dimension] = set()
+        self._infinite_dimensions[dimension].add(direction)
     def copy_infinite_dimensions(self, dimensions: dict[str, str]):
         self._infinite_dimensions = dimensions.copy()
@@ -260,13 +259,15 @@ class HyperCube:
                 if not self.is_default and value is not None]
     @staticmethod
-    def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
-                                output=None) -> GenericCube:
+    def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False, output=None,
+                                features_to_ignore: Iterable[str] = []) -> GenericCube:
         output = Target.CONSTANT if output is None else output
         dimensions = {
             column: (min(dataset[column]) - HyperCube.EPSILON * 2, max(dataset[column]) + HyperCube.EPSILON * 2)
             for column in dataset.columns[:-1]
         }
+        for column in features_to_ignore:
+            dimensions[column] = (-np.inf, np.inf)
         if closed:
             if output == Target.CONSTANT:
                 return ClosedCube(dimensions)
@@ -432,14 +433,16 @@ class HyperCube:
         else:
             self.update_dimension(feature, (lower, upper))
-    def update(self, dataset: pd.DataFrame, predictor) -> None:
-        filtered = self.filter_dataframe(dataset.iloc[:, :-1])
-        predictions = predictor.predict(filtered)
-        self._output = np.mean(predictions)
-        self._diversity = np.std(predictions)
-        self._error = (abs(predictions - self._output)).mean()
-        means = filtered.describe().loc['mean']
-        self._barycenter = Point(means.index.values, means.values)
+    def update(self, dataset: pd.DataFrame, predictor=None) -> None:
+        idx = self.filter_indices(dataset.iloc[:, :-1])
+        filtered = dataset.iloc[idx, :-1]
+        if len(filtered > 0):
+            predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
+            self._output = np.mean(predictions)
+            self._diversity = np.std(predictions)
+            self._error = (abs(predictions - self._output)).mean()
+            means = filtered.describe().loc['mean']
+            self._barycenter = Point(means.index.values, means.values)
     # TODO: why this is not a property?
     def init_diversity(self, std: float) -> None:
@@ -450,10 +453,11 @@ class RegressionCube(HyperCube):
     def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
         super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
-    def update(self, dataset: pd.DataFrame, predictor) -> None:
-        filtered = self.filter_dataframe(dataset.iloc[:, :-1])
+    def update(self, dataset: pd.DataFrame, predictor=None) -> None:
+        idx = self.filter_indices(dataset.iloc[:, :-1])
+        filtered = dataset.iloc[idx, :-1]
         if len(filtered > 0):
-            predictions = predictor.predict(filtered)
+            predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
             self._output.fit(filtered, predictions)
             self._diversity = self._error = (abs(self._output.predict(filtered) - predictions)).mean()
             means = filtered.describe().loc['mean']
@@ -489,10 +493,11 @@ class ClassificationCube(HyperCube):
     def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = ""):
         super().__init__(dimension=dimension, limits=limits, output=output)
-    def update(self, dataset: pd.DataFrame, predictor) -> None:
-        filtered = self.filter_dataframe(dataset.iloc[:, :-1])
+    def update(self, dataset: pd.DataFrame, predictor=None) -> None:
+        idx = self.filter_indices(dataset.iloc[:, :-1])
+        filtered = dataset.iloc[idx, :-1]
         if len(filtered > 0):
-            predictions = predictor.predict(filtered)
+            predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
             self._output = mode(predictions)
             self._diversity = self._error = 1 - sum(p == self.output for p in predictions) / len(predictions)
             means = filtered.describe().loc['mean']

psyke/genetic/__init__.py ADDED Viewed

File without changes

psyke/genetic/gin/__init__.py ADDED Viewed

@@ -0,0 +1,106 @@
+import numpy as np
+from deap import base, creator, tools, algorithms
+import random
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_absolute_error, r2_score
+from sklearn.preprocessing import PolynomialFeatures
+class GIn:
+    def __init__(self, train, valid, features, sigmas, slices, min_rules=1, poly=1,
+                 alpha=0.5, indpb=0.5, tournsize=3, metric='R2', warm=False):
+        self.X, self.y = train
+        self.valid = valid
+        self.features = features
+        self.sigmas = sigmas
+        self.slices = slices
+        self.min_rules = min_rules
+        self.poly = PolynomialFeatures(degree=poly, include_bias=False)
+        self.alpha = alpha
+        self.indpb = indpb
+        self.tournsize = tournsize
+        self.metric = metric
+        self.toolbox = None
+        self.stats = None
+        self.hof = None
+        self.setup(warm)
+    def region(self, X, cuts):
+        indices = [np.searchsorted(np.array(cut), X[f].to_numpy(), side='right')
+                   for cut, f in zip(cuts, self.features)]
+        regions = np.zeros(len(X), dtype=int)
+        multiplier = 1
+        for idx, n in zip(reversed(indices), reversed([len(cut) + 1 for cut in cuts])):
+            regions += idx * multiplier
+            multiplier *= n
+        return regions
+    def evaluate(self, individual):
+        to_pred, true = self.valid or (self.X, self.y)
+        boundaries = np.cumsum([0] + list(self.slices))
+        cuts = [sorted(individual[boundaries[i]:boundaries[i + 1]]) for i in range(len(self.slices))]
+        regions = self.region(to_pred, cuts)
+        regionsT = self.region(self.X, cuts)
+        y_pred = np.zeros(len(to_pred))
+        valid_regions = 0
+        for r in range(np.prod([s + 1 for s in self.slices])):
+            mask = regions == r
+            maskT = regionsT == r
+            if min(mask.sum(), maskT.sum()) < 3:
+                y_pred[mask] = np.mean(self.y)
+                continue
+            y_pred[mask] = LinearRegression().fit(self.poly.fit_transform(self.X)[maskT], self.y[maskT]).predict(
+                self.poly.fit_transform(to_pred)[mask])
+            valid_regions += 1
+        if valid_regions < self.min_rules:
+            return -9999,
+        return (r2_score if self.metric == 'R2' else -mean_absolute_error)(true, y_pred),
+    def setup(self, warm=False):
+        if not warm:
+            creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+            creator.create("Individual", list, fitness=creator.FitnessMax)
+        self.toolbox = base.Toolbox()
+        for f in self.features:
+            self.toolbox.register(f, random.uniform, self.X[f].min(), self.X[f].max())
+        self.toolbox.register("individual", tools.initCycle, creator.Individual,
+                              (sum([[getattr(self.toolbox, f) for i in range(s)]
+                                    for f, s in zip(self.features, self.slices)], [])), n=1)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+        self.toolbox.register("mate", tools.cxBlend, alpha=self.alpha)
+        self.toolbox.register("mutate", tools.mutGaussian, indpb=self.indpb, mu=0,
+                              sigma=sum([[sig] * s for sig, s in zip(self.sigmas, self.slices)], []))
+        self.toolbox.register("select", tools.selTournament, tournsize=self.tournsize)
+        self.toolbox.register("evaluate", self.evaluate)
+        self.stats = tools.Statistics(lambda ind: ind.fitness.values[0])
+        self.stats.register("avg", np.mean)
+        # self.stats.register("min", np.min)
+        self.stats.register("max", np.max)
+        # self.stats.register("std", np.std)
+        self.hof = tools.HallOfFame(1)
+    def run(self, n_pop=30, cxpb=0.8, mutpb=0.5, n_gen=50, seed=123):
+        random.seed(seed)
+        pop = self.toolbox.population(n=n_pop)
+        result, log = algorithms.eaSimple(pop, self.toolbox, cxpb=cxpb, mutpb=mutpb, ngen=n_gen,
+                                          stats=self.stats, halloffame=self.hof, verbose=False)
+        best = tools.selBest(pop, 1)[0]
+        return best, self.evaluate(best)[0], result, log

{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: psyke
-Version: 0.9.1.dev12
+Version: 0.9.1.dev43
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,23 @@
-psyke/__init__.py,sha256=J8lzVDJaGAin9TpDB6cXgAQegopVBuqURaWkkxjptR8,22490
+psyke/__init__.py,sha256=4GYagtqZnAOBz3VKyOgEp-yS5-16J7x2J9PoOkdQ9-U,23282
 psyke/hypercubepredictor.py,sha256=Pg8F2R_NHrNgFHx92s32BorYHMVvaxpEh4GtCsoyB2U,4620
-psyke/clustering/__init__.py,sha256=36MokTVwwWR_-o0mesvXHaYEYVTK2pn2m0ZY4G3Y3qU,581
-psyke/clustering/utils.py,sha256=S0YwCKyHVYp9qUAQVzCMrTwcQFPJ5TD14Jwn10DE-Z4,1616
-psyke/clustering/cream/__init__.py,sha256=W6k7vdjuUdA_azYA4vb5JtpWrofhDJ0DbM2jsnRKzfw,2994
-psyke/clustering/exact/__init__.py,sha256=s4MPvGZ6gle3X9WH3YFHOEdinGcXIXh-7EFRcElWzsQ,5275
+psyke/clustering/__init__.py,sha256=LfLZY2UwHY9xlFT4SMGGbyFY5S6sMXndY-UMaJIJtd8,714
+psyke/clustering/utils.py,sha256=BqMPKJ-r6CdxXwyk-2AvkPV4DBnZF5WUNz2fKiXbhlw,1596
+psyke/clustering/cream/__init__.py,sha256=TtUd5IyfavSUZeSuSAr06ZftVhK30ZLZGUUfH3ZQG2w,2799
+psyke/clustering/exact/__init__.py,sha256=OMMxc_lIKHouZTpbLF7dt4dOA3chL4XFpjOCyc6GTTY,5545
 psyke/extraction/__init__.py,sha256=Q0i6wMzCdU7CkxhzWoD8H_a6XId6bfEx6LZbSJmTqm0,936
 psyke/extraction/cart/CartPredictor.py,sha256=YhEuaENLWixu379sIXZkFeCNc8GBnxLnR6TPCQR7sps,5743
-psyke/extraction/cart/FairTree.py,sha256=49ciVmqgSa6hNL2axYi-oN4DSsqrHdewH76TTYiR8x0,7529
+psyke/extraction/cart/FairTree.py,sha256=mccoLDrSNy6iivqFZ23m33hxIB_kPXa3mNL1ukfb5Ls,7624
 psyke/extraction/cart/FairTreePredictor.py,sha256=7z4oLqflkRMqqVW_UIlrGsQrvROM4sXUfY7LPQJ662g,2321
 psyke/extraction/cart/__init__.py,sha256=SsjAJiL4n6q_GNR6H8PNfhTkAZ67Ka7NRvVRxCULBhQ,3191
-psyke/extraction/hypercubic/__init__.py,sha256=SK-I9IPQEdpYVTkFGa8No803QMwYSqgTTzinry4KLew,10896
-psyke/extraction/hypercubic/hypercube.py,sha256=s1fuGOZfN2ZE21C7f6-b1T3Ta_934c4rwDLD_pBWwFk,25847
+psyke/extraction/hypercubic/__init__.py,sha256=AxvPJxEQzL9Diyi7l2kX3zZESZ9xbh6RFp7ffs0w2ic,13112
+psyke/extraction/hypercubic/hypercube.py,sha256=Pz-F6RkAKLT5e86L29khqLjKTJ7k2TZszdRdxddVFtA,26275
 psyke/extraction/hypercubic/strategy.py,sha256=m9BGSrKc-VadgEQTOPow85hBPFqMIt0J99nCFIh4NUs,1839
 psyke/extraction/hypercubic/utils.py,sha256=D2FN5CCm_T3h23DmLFoTnIcFo7LvIq__ktl4hjUqkcA,1525
 psyke/extraction/hypercubic/cosmik/__init__.py,sha256=XQUvOtMFpR0vMHYtwIVl3G626HMqN8Clt6BqNm4nvFs,1880
-psyke/extraction/hypercubic/creepy/__init__.py,sha256=Cglj1mmj6VM_YBKgfNN0uDIbZb2_YS1RtPc75ftXWP8,1744
+psyke/extraction/hypercubic/creepy/__init__.py,sha256=x8a1ftoYHixGpiDfM3u-6QBEDYmaSlPIRIuAOCx573w,2056
 psyke/extraction/hypercubic/divine/__init__.py,sha256=ClO8CITKKXoo7nhlBJagR1yAachsxLHYQlqggl-9eGE,3665
-psyke/extraction/hypercubic/gridex/__init__.py,sha256=_g_JC6eFKLeg_CtkQawsUpVUAVxiVPQFJbfRVhMaBLg,5632
+psyke/extraction/hypercubic/ginger/__init__.py,sha256=GZOKy_iLmYOuXcRrPhNEDP5ZA2Ez2Cjb6GuervSAD4Y,4476
+psyke/extraction/hypercubic/gridex/__init__.py,sha256=tPPLGRJ-7fCt-OB-qq6W7EV0hqEuQVUGlXs2yyABo98,3161
 psyke/extraction/hypercubic/gridrex/__init__.py,sha256=h9usK5tFqd6ngBmRydsgkfQ1jlcQKj2uG72Tr1puFHk,595
 psyke/extraction/hypercubic/hex/__init__.py,sha256=553AZjOT9thfqDGtVDI6WtgYNex2Y6dg53cEyuf7Q80,4805
 psyke/extraction/hypercubic/iter/__init__.py,sha256=bb0neiPcNlyyr-OUUjgw4vdkehnAsoyJzVJ88jAHtQ8,10233
@@ -24,6 +25,8 @@ psyke/extraction/real/__init__.py,sha256=zAE_syurDqmFiopD5oLeIs9bROiuXy06wxoHmVq
 psyke/extraction/real/utils.py,sha256=4NNL15Eu7cmkG9b29GBP6CKgMTV1cmiJVS0k1MbWpIs,2148
 psyke/extraction/trepan/__init__.py,sha256=H8F_wpFLPcfyx2tgOOno8FwUomxfVxVl1vxlb0ClP1g,6931
 psyke/extraction/trepan/utils.py,sha256=iSUJ1ooNQT_VO1KfBZuIUeUsyUbGdQf_pSEE87vMeQg,2320
+psyke/genetic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+psyke/genetic/gin/__init__.py,sha256=liRG5kenjPnMlr4pDGIshLDGmwK-GYrFCKFlWUDk1YU,4179
 psyke/schema/__init__.py,sha256=axv4ejZY0ItUwrC9IXb_yAhaQL5f1vwvXXmaIAHJmt0,26063
 psyke/tuning/__init__.py,sha256=yd_ForFmHeYbtRXltY1fOa-mPJvpE6ijzg50M_8Sdxw,3649
 psyke/tuning/crash/__init__.py,sha256=zIHEF75EFy_mRIieqzP04qKLG3GLsSc_mYZHpPfkzxU,2623
@@ -35,8 +38,8 @@ psyke/utils/logic.py,sha256=ioP25WMTYNYEzaRDNDe3kGNWqZ6DA_63t19d-ky_2kM,12227
 psyke/utils/metrics.py,sha256=Oo5BOonOSfo0qYsXWT5dmypZ7jiStByFC2MKEU0uMHg,2250
 psyke/utils/plot.py,sha256=dE8JJ6tQ0Ezosid-r2jqAisREjFe5LqExRzsVi5Ns-c,7785
 psyke/utils/sorted.py,sha256=C3CPW2JisND30BRk5c1sAAHs3Lb_wsRB2qZrYFuRnfM,678
-psyke-0.9.1.dev12.dist-info/licenses/LICENSE,sha256=G3mPaubObvkBXbsgTTeYGLk_pNEW8tc7HZr4u_wLEpU,11398
-psyke-0.9.1.dev12.dist-info/METADATA,sha256=R8bTf2T9Hf2Qc35iVKAINxAm5kiqRpF79HRUFYPwXcc,8395
-psyke-0.9.1.dev12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-psyke-0.9.1.dev12.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
-psyke-0.9.1.dev12.dist-info/RECORD,,
+psyke-0.9.1.dev43.dist-info/licenses/LICENSE,sha256=G3mPaubObvkBXbsgTTeYGLk_pNEW8tc7HZr4u_wLEpU,11398
+psyke-0.9.1.dev43.dist-info/METADATA,sha256=8P6RB9OhVsNHokg8Q1meg5zpigQoaigC58y8EtrQHCo,8395
+psyke-0.9.1.dev43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+psyke-0.9.1.dev43.dist-info/top_level.txt,sha256=q1HglxOqqoIRukFtyis_ZNHczZg4gANRUPWkD7HAUTU,6
+psyke-0.9.1.dev43.dist-info/RECORD,,

{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/WHEEL RENAMED Viewed

File without changes

{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{psyke-0.9.1.dev12.dist-info → psyke-0.9.1.dev43.dist-info}/top_level.txt RENAMED Viewed

File without changes

psyke 0.9.1.dev12__py3-none-any.whl → 0.9.1.dev43__py3-none-any.whl

Potentially problematic release.

psyke 0.9.1.dev12py3-none-any.whl → 0.9.1.dev43py3-none-any.whl