PyPI - psyke - Versions diffs - 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl - Mend

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

psyke/__init__.py +231 -85
psyke/clustering/__init__.py +9 -4
psyke/clustering/cream/__init__.py +6 -10
psyke/clustering/exact/__init__.py +17 -11
psyke/clustering/utils.py +0 -1
psyke/extraction/__init__.py +25 -0
psyke/extraction/cart/CartPredictor.py +128 -0
psyke/extraction/cart/FairTree.py +205 -0
psyke/extraction/cart/FairTreePredictor.py +56 -0
psyke/extraction/cart/__init__.py +48 -62
psyke/extraction/hypercubic/__init__.py +187 -47
psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
psyke/extraction/hypercubic/creepy/__init__.py +24 -29
psyke/extraction/hypercubic/divine/__init__.py +86 -0
psyke/extraction/hypercubic/ginger/__init__.py +100 -0
psyke/extraction/hypercubic/gridex/__init__.py +45 -84
psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
psyke/extraction/hypercubic/hex/__init__.py +104 -0
psyke/extraction/hypercubic/hypercube.py +275 -72
psyke/extraction/hypercubic/iter/__init__.py +45 -46
psyke/extraction/hypercubic/strategy.py +13 -9
psyke/extraction/real/__init__.py +24 -29
psyke/extraction/real/utils.py +2 -2
psyke/extraction/trepan/__init__.py +24 -19
psyke/genetic/__init__.py +0 -0
psyke/genetic/fgin/__init__.py +74 -0
psyke/genetic/gin/__init__.py +144 -0
psyke/hypercubepredictor.py +102 -0
psyke/schema/__init__.py +230 -36
psyke/tuning/__init__.py +40 -28
psyke/tuning/crash/__init__.py +33 -64
psyke/tuning/orchid/__init__.py +21 -23
psyke/tuning/pedro/__init__.py +70 -56
psyke/utils/logic.py +8 -8
psyke/utils/plot.py +79 -3
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
psyke-1.0.4.dev10.dist-info/RECORD +46 -0
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
psyke/extraction/cart/predictor.py +0 -73
psyke-0.4.9.dev6.dist-info/RECORD +0 -36
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0

psyke/extraction/hypercubic/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
 from abc import ABC
-from typing import Iterable
+from collections.abc import Iterable
+from itertools import combinations
 import numpy as np
 import pandas as pd
 from sklearn.base import ClassifierMixin
@@ -9,57 +11,180 @@ from sklearn.feature_selection import SelectKBest, f_regression, f_classif
 from sklearn.linear_model import LinearRegression
 from tuprolog.core import Var, Struct, clause
 from tuprolog.theory import Theory, mutable_theory
-from psyke import logger, PedagogicalExtractor
-from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube
+from psyke.extraction import PedagogicalExtractor
+from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
+    GenericCube
+from psyke.hypercubepredictor import HyperCubePredictor
+from psyke.schema import Value
 from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
-from psyke.utils import Target, get_int_precision
+from psyke.utils import Target
 from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
-class HyperCubePredictor:
-    def __init__(self, cubes=[], output=Target.CONSTANT, normalization=None):
-        self._hypercubes = cubes
-        self._output = output
-        self.normalization = normalization
+class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
+    def __init__(self, predictor, output, discretization=None, normalization=None):
+        HyperCubePredictor.__init__(self, output=output, normalization=normalization)
+        PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
+        self._default_surrounding_cube = False
+        self.threshold = None
-    def _predict(self, dataframe: pd.DataFrame) -> Iterable:
-        return np.array([self._predict_from_cubes(dict(row.to_dict())) for _, row in dataframe.iterrows()])
+    def _default_cube(self, dimensions=None) -> HyperCube | RegressionCube | ClassificationCube:
+        if self._output == Target.CONSTANT:
+            return HyperCube(dimensions)
+        if self._output == Target.REGRESSION:
+            return RegressionCube(dimensions)
+        return ClassificationCube(dimensions)
-    def _predict_from_cubes(self, data: dict[str, float]) -> float | None:
-        data = {k: v for k, v in data.items()}
-        for cube in self._hypercubes:
-            if cube.__contains__(data):
-                if self._output == Target.CLASSIFICATION:
-                    return HyperCubePredictor._get_cube_output(cube, data)
-                else:
-                    return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
-        return None
+    @staticmethod
+    def _find_couples(to_split: Iterable[HyperCube], not_in_cache: set[HyperCube],
+                      adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
+            Iterable[tuple[HyperCube, HyperCube, str]]:
-    @property
-    def n_rules(self):
-        return len(list(self._hypercubes))
+        for cube1, cube2 in combinations(to_split, 2):
+            key = (cube1, cube2) if id(cube1) < id(cube2) else (cube2, cube1)
-    @property
-    def volume(self):
-        return sum([cube.volume() for cube in self._hypercubes])
+            if (cube1 in not_in_cache) or (cube2 in not_in_cache):
+                adjacent_cache[key] = cube1.is_adjacent(cube2)
+            feature = adjacent_cache.get(key)
+            if feature is not None:
+                yield cube1, cube2, feature
+    def _evaluate_merge(self, not_in_cache: Iterable[HyperCube], dataframe: pd.DataFrame, feature: str,
+                        cube: HyperCube, other_cube: HyperCube,
+                        merge_cache: dict[tuple[HyperCube, HyperCube], HyperCube | None]) -> bool:
+        if (cube in not_in_cache) or (other_cube in not_in_cache):
+            merged_cube = cube.merge_along_dimension(other_cube, feature)
+            merged_cube.update(dataframe, self.predictor)
+            merge_cache[(cube, other_cube)] = merged_cube
+        return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
+            merge_cache[(cube, other_cube)].diversity < self.threshold
+    def _sort_cubes(self):
+        cubes = [(cube.diversity, i, cube) for i, cube in enumerate(self._hypercubes)]
+        cubes.sort()
+        self._hypercubes = [cube[2] for cube in cubes]
+    def _merge(self, to_split: list[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
+        not_in_cache = set(to_split)
+        adjacent_cache = {}
+        merge_cache = {}
+        while True:
+            to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
+                        HyperCubeExtractor._find_couples(to_split, not_in_cache, adjacent_cache) if
+                        self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
+            if len(to_merge) == 0:
+                break
+            best = min(to_merge, key=lambda c: c[1].diversity)
+            for cube in best[0]:
+                to_split.remove(cube)
+            to_split.append(best[1])
+            not_in_cache = [best[1]]
+        return to_split
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
+        theory = PedagogicalExtractor.extract(self, dataframe)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
+        self._surrounding.update(dataframe, self.predictor)
+        return theory
+    def pairwise_fairness(self, data: dict[str, float], neighbor: dict[str, float]):
+        cube1 = self._find_cube(data)
+        cube2 = self._find_cube(neighbor)
+        different_prediction_reasons = []
+        if cube1.output == cube2.output:
+            print("Prediction", cube1.output, "is FAIR")
+        else:
+            print("Prediction", cube1.output, "may be UNFAIR")
+            print("It could be", cube2.output, "if:")
+            for d in data:
+                a, b = cube2.dimensions[d]
+                if data[d] < a:
+                    print('    ', d, 'increases above', round(a, 1))
+                    different_prediction_reasons.append(d)
+                elif data[d] > b:
+                    print('    ', d, 'decreases below', round(b, 1))
+                    different_prediction_reasons.append(d)
+        return different_prediction_reasons
+    def predict_counter(self, data: dict[str, float], verbose=True, only_first=True):
+        output = ""
+        prediction = None
+        cube = self._find_cube(data)
+        if cube is None:
+            output += "The extracted knowledge is not exhaustive; impossible to predict this instance"
+        else:
+            prediction = self._predict_from_cubes(data)
+            output += f"The output is {prediction}\n"
+        point = Point(list(data.keys()), list(data.values()))
+        cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
+        cubes = sorted([(cube.surface_distance(point), cube.volume(), i, cube) for i, cube in enumerate(cubes)])
+        counter_conditions = []
+        for _, _, _, c in cubes:
+            if not only_first or c.output not in [o for o, _ in counter_conditions]:
+                counter_conditions.append((c.output, {c: [val for val in v if val is not None and not val.is_in(
+                    self.unscale(data[c], c))] for c, v in self.__get_conditions(data, c).items()}))
+        if verbose:
+            for o, conditions in counter_conditions:
+                output += f"The output may be {o} if\n" + HyperCubeExtractor.__conditions_to_string(conditions)
+            print(output)
+        return prediction, counter_conditions
     @staticmethod
-    def _get_cube_output(cube, data: dict[str, float]) -> float:
-        return cube.output.predict(pd.DataFrame([data])).flatten()[0] if \
-            isinstance(cube, RegressionCube) else cube.output
+    def __conditions_to_string(conditions: dict[str, list[Value]]) -> str:
+        output = ""
+        for d in conditions:
+            for i, condition in enumerate(conditions[d]):
+                if i == 0:
+                    output += f'     {d} is '
+                else:
+                    output += ' and '
+                output += condition.print()
+                if i + 1 == len(conditions[d]):
+                    output += '\n'
+        return output
+    def __get_conditions(self, data: dict[str, float], cube: GenericCube) -> dict[str, list[Value]]:
+        conditions = {d: [cube.interval_to_value(d, self.unscale)] for d in data.keys()
+                      if d not in self._dimensions_to_ignore}
+        for c in cube.subcubes(self._hypercubes):
+            for d in conditions:
+                condition = c.interval_to_value(d, self.unscale)
+                if condition is None:
+                    continue
+                elif conditions[d][-1] is None:
+                    conditions[d][-1] = -condition
+                else:
+                    try:
+                        conditions[d][-1] *= -condition
+                    except Exception:
+                        conditions[d].append(-condition)
+        return conditions
+    def predict_why(self, data: dict[str, float], verbose=True):
+        cube = self._find_cube(data)
+        output = ""
+        if cube is None:
+            output += "The extracted knowledge is not exhaustive; impossible to predict this instance\n"
+            if verbose:
+                print(output)
+            return None, {}
+        prediction = self._predict_from_cubes(data)
+        output += f"The output is {prediction} because\n"
+        conditions = {c: [val for val in v if val is not None and val.is_in(self.unscale(data[c], c))]
+                      for c, v in self.__get_conditions(data, cube).items()}
-class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
-    def __init__(self, predictor, output, normalization):
-        PedagogicalExtractor.__init__(self, predictor, normalization=normalization)
-        HyperCubePredictor.__init__(self, output=output, normalization=normalization)
+        if verbose:
+            output += HyperCubeExtractor.__conditions_to_string(conditions)
+            print(output)
-    def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
-        if self._output == Target.CONSTANT:
-            return HyperCube()
-        if self._output == Target.REGRESSION:
-            return RegressionCube()
-        return ClassificationCube()
+        return prediction, conditions
     @staticmethod
     def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
@@ -67,19 +192,28 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
             if not isinstance(output, LinearRegression) else \
             create_head(dataframe.columns[-1], variables[:-1], variables[-1])
-    def _ignore_dimensions(self) -> Iterable[str]:
-        return []
+    def __drop(self, dataframe: pd.DataFrame):
+        self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
+    def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
+        # self.__drop(dataframe)
+        for cube in self._hypercubes:
+            for dimension in cube.dimensions:
+                if abs(cube[dimension][0] - self._surrounding[dimension][0]) < HyperCube.EPSILON * 2:
+                    cube.set_infinite(dimension, '-')
+                if abs(cube[dimension][1] - self._surrounding[dimension][1]) < HyperCube.EPSILON * 2:
+                    cube.set_infinite(dimension, '+')
+        if self._default_surrounding_cube:
+            self._hypercubes[-1].set_default()
-    def _create_theory(self, dataframe: pd.DataFrame, sort: bool = True) -> Theory:
         new_theory = mutable_theory()
         for cube in self._hypercubes:
-            logger.info(cube.output)
-            logger.info(cube.dimensions)
-            variables = create_variable_list([], dataframe, sort)
+            variables = create_variable_list([], dataframe)
             variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
             head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
                                                    self.unscale(cube.output, dataframe.columns[-1]))
-            body = cube.body(variables, self._ignore_dimensions(), self.unscale, self.normalization)
+            body = cube.body(variables, self._dimensions_to_ignore, self.unscale, self.normalization)
             new_theory.assertZ(clause(head, body))
         return HyperCubeExtractor._prettify_theory(new_theory)
@@ -120,10 +254,16 @@ class FeatureRanker:
 class Grid:
-    def __init__(self, iterations: int = 1, strategy: Strategy | list[Strategy] = FixedStrategy()):
+    def __init__(self, iterations: int = 1, strategy: Strategy | Iterable[Strategy] = FixedStrategy()):
         self.iterations = iterations
         self.strategy = strategy
+    def make_fair(self, features: Iterable[str]):
+        if isinstance(self.strategy, Strategy):
+            self.strategy.make_fair(features)
+        elif isinstance(self.strategy, Iterable):
+            [strategy.make_fair(features) for strategy in self.strategy]
     def get(self, feature: str, depth: int) -> int:
         if isinstance(self.strategy, list):
             return self.strategy[depth].get(feature)

psyke/extraction/hypercubic/cosmik/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+import numpy as np
+import pandas as pd
+from sklearn.mixture import GaussianMixture
+from tuprolog.theory import Theory
+from psyke import Target, Extractor, get_default_random_seed
+from psyke.clustering.utils import select_gaussian_mixture
+from psyke.extraction.hypercubic import HyperCube, HyperCubeExtractor, RegressionCube
+class COSMiK(HyperCubeExtractor):
+    """
+    Explanator implementing COSMiK algorithm.
+    """
+    def __init__(self, predictor, max_components: int = 4, k: int = 5, patience: int = 15, close_to_center: bool = True,
+                 output: Target = Target.CONSTANT, discretization=None, normalization=None,
+                 seed: int = get_default_random_seed()):
+        super().__init__(predictor, Target.REGRESSION, discretization, normalization)
+        self.max = max_components
+        self.k = k
+        self.patience = patience
+        self.output = output
+        self.close_to_center = close_to_center
+        self.seed = seed
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        np.random.seed(self.seed)
+        X, y = dataframe.iloc[:, :-1], dataframe.iloc[:, -1]
+        _, n, _ = select_gaussian_mixture(dataframe, self.max)
+        gmm = GaussianMixture(n)
+        gmm.fit(X, y)
+        divine = Extractor.divine(gmm, self.k, self.patience, self.close_to_center,
+                                  self.discretization, self.normalization)
+        df = X.join(pd.DataFrame(gmm.predict(X)))
+        df.columns = dataframe.columns
+        divine.extract(df)
+        self._hypercubes = [HyperCube(cube.dimensions.copy()) if self.output == Target.CONSTANT else
+                            RegressionCube(cube.dimensions.copy()) for cube in divine._hypercubes]
+        for cube in self._hypercubes:
+            cube.update(dataframe, self.predictor)
+        self._sort_cubes()
+        return self._create_theory(dataframe)

psyke/extraction/hypercubic/creepy/__init__.py CHANGED Viewed

@@ -1,50 +1,45 @@
 from __future__ import annotations
-from abc import ABC
-from collections import Iterable
-import numpy as np
+from collections.abc import Iterable
+from typing import Callable, Any
 import pandas as pd
 from sklearn.base import ClassifierMixin
-from tuprolog.core import clause
 from tuprolog.theory import Theory
 from psyke import Clustering
 from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import HyperCubeExtractor
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
-class CReEPy(HyperCubeExtractor, ABC):
+class CReEPy(HyperCubeExtractor):
     """
     Explanator implementing CReEPy algorithm.
     """
-    def __init__(self, predictor, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
-                 gauss_components: int = 5, ranks: list[(str, float)] = [], ignore_threshold: float = 0.0,
-                 normalization=None, clustering=Clustering.exact):
+    ClusteringType = Callable[[int, float, Target, int, Any, Any, int], HyperCubeClustering]
+    def __init__(self, predictor, clustering: ClusteringType = Clustering.exact, depth: int = 3,
+                 error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 5,
+                 ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0, discretization=None,
+                 normalization=None, seed: int = get_default_random_seed()):
         super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
-                         normalization)
-        self.clustering = clustering(depth, error_threshold, self._output, gauss_components)
-        self.ranks = ranks
-        self.ignore_threshold = ignore_threshold
+                         discretization, normalization)
+        self.clustering = clustering(depth, error_threshold, self._output, gauss_components, discretization,
+                                     normalization, seed)
+        self._default_surrounding_cube = True
+        self._dimensions_to_ignore = set([dimension for dimension, relevance in ranks if relevance < ignore_threshold])
+        self._protected_features = []
+    def make_fair(self, features: Iterable[str]):
+        self.clustering.make_fair(features)
+        self._dimensions_to_ignore.update(features)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         if not isinstance(self.clustering, HyperCubeClustering):
             raise TypeError("clustering must be a HyperCubeClustering")
         self.clustering.fit(dataframe)
         self._hypercubes = self.clustering.get_hypercubes()
-        for cube in self._hypercubes:
-            for dimension in self._ignore_dimensions():
-                cube[dimension] = [-np.inf, np.inf]
-        theory = self._create_theory(dataframe)
-        last_clause = list(theory.clauses)[-1]
-        theory.retract(last_clause)
-        theory.assertZ(clause(
-            last_clause.head, [list(last_clause.body)[-1]] if self._output is Target.REGRESSION else []))
-        last_cube = self._hypercubes[-1]
-        for dimension in last_cube.dimensions.keys():
-            last_cube[dimension] = [-np.inf, np.inf]
-        return theory
-    def _ignore_dimensions(self) -> Iterable[str]:
-        return [dimension for dimension, relevance in self.ranks if relevance < self.ignore_threshold]
+        self._surrounding = self._hypercubes[-1]
+        return self._create_theory(dataframe)

psyke/extraction/hypercubic/divine/__init__.py ADDED Viewed

@@ -0,0 +1,86 @@
+import numpy as np
+import pandas as pd
+from tuprolog.theory import Theory
+from psyke import Target, get_default_random_seed
+from psyke.extraction.hypercubic import HyperCubeExtractor
+from psyke.extraction.hypercubic.hypercube import Point, GenericCube, HyperCube
+from sklearn.neighbors import BallTree
+class DiViNE(HyperCubeExtractor):
+    """
+    Explanator implementing DiViNE algorithm.
+    """
+    def __init__(self, predictor, k: int = 5, patience: int = 15, close_to_center: bool = True,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(predictor, Target.CLASSIFICATION, discretization, normalization)
+        self.k = k
+        self.patience = patience
+        self.vicinity_function = DiViNE.closest_to_center if close_to_center else DiViNE.closest_to_corners
+        self.seed = seed
+    @staticmethod
+    def __pop(data: pd.DataFrame, idx: int = None) -> (Point, pd.DataFrame):
+        if idx is None:
+            idx = data.sample(1).index.values[0]
+        t = data.T
+        return DiViNE.__to_point(t.pop(idx)), t.T.reset_index(drop=True)
+    @staticmethod
+    def __to_point(instance) -> Point:
+        point = Point(instance.index.values, instance.values)
+        return point
+    def __to_cube(self, point: Point) -> GenericCube:
+        cube = HyperCube.cube_from_point(point.dimensions, self._output)
+        cube._output = list(point.dimensions.values())[-1]
+        return cube
+    def __clean(self, data: pd.DataFrame) -> pd.DataFrame:
+        _, idx = BallTree(data.iloc[:, :-1]).query(data.iloc[:, :-1], k=self.k)
+        # how many output classes are associated with the k neighbors
+        count = np.array(list(map(lambda indices: len(data.iloc[indices].iloc[:, -1].unique()), idx)))
+        # instances with neighbors of different classes are discarded
+        return data[count == 1]
+    def __closest(self, data: pd.DataFrame, cube: GenericCube) -> (Point, pd.DataFrame):
+        return DiViNE.__pop(data, self.vicinity_function(BallTree(data.iloc[:, :-1]), cube))
+    @staticmethod
+    def closest_to_center(tree: BallTree, cube: GenericCube):
+        return tree.query([list(cube.center.dimensions.values())], k=1)[1][0][-1]
+    @staticmethod
+    def closest_to_corners(tree: BallTree, cube: GenericCube):
+        distance, idx = tree.query([list(point.dimensions.values()) for point in cube.corners()], k=1)
+        return idx[np.argmin(distance)][-1]
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=Target.CLASSIFICATION)
+        np.random.seed(self.seed)
+        data = self.__clean(dataframe)
+        while len(data) > 0:
+            discarded = []
+            patience = self.patience
+            point, data = self.__pop(data)
+            cube = self.__to_cube(point)
+            while patience > 0 and len(data) > 0:
+                other, data = self.__closest(data, cube)
+                if cube.output == list(other.dimensions.values())[-1]:
+                    cube = cube.merge_with_point(other)
+                    data = data[~(cube.filter_indices(data.iloc[:, :-1]))].reset_index(drop=True)
+                else:
+                    patience -= 1
+                    discarded.append(other)
+            if cube.volume() > 0:
+                cube.update(dataframe, self.predictor)
+                self._hypercubes.append(cube)
+            if len(discarded) > 0:
+                data = pd.concat([data] + [d.to_dataframe() for d in discarded]).reset_index(drop=True)
+        self._sort_cubes()
+        return self._create_theory(dataframe)

psyke/extraction/hypercubic/ginger/__init__.py ADDED Viewed

@@ -0,0 +1,100 @@
+import itertools
+from typing import Iterable
+import numpy as np
+import pandas as pd
+from sklearn.base import ClassifierMixin
+from sklearn.preprocessing import PolynomialFeatures
+from tuprolog.theory import Theory
+from psyke import get_default_random_seed, Target
+from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube, RegressionCube
+from deap import base, creator
+from psyke.genetic.gin import GIn
+class GInGER(HyperCubeExtractor):
+    """
+    Explanator implementing GInGER algorithm.
+    """
+    def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
+                 tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None,
+                 output: Target = Target.REGRESSION, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(predictor, output=Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
+                         normalization=normalization)
+        self.threshold = threshold
+        np.random.seed(seed)
+        self.features = features
+        self.max_features = len(features)
+        self.sigmas = sigmas
+        self.max_slices = max_slices
+        self.min_rules = min_rules
+        self.poly = max_poly
+        self.trained_poly = None
+        self.alpha = alpha
+        self.indpb = indpb
+        self.tournsize = tournsize
+        self.metric = metric
+        self.n_gen = n_gen
+        self.n_pop = n_pop
+        self.valid = valid
+        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+        creator.create("Individual", list, fitness=creator.FitnessMax)
+    def __poly_names(self):
+        return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
+                         for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
+                for p in self.trained_poly.powers_]
+    def _predict(self, dataframe: pd.DataFrame) -> Iterable:
+        dataframe = pd.DataFrame(self.trained_poly.fit_transform(dataframe), columns=self.__poly_names())
+        return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        best = {}
+        for poly in range(self.poly):
+            for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
+                gr = GIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
+                         slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha, indpb=self.indpb,
+                         tournsize=self.tournsize, metric=self.metric, output=self._output, warm=True)
+                b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
+                best[(score, poly + 1, slices)] = b
+        m = min(best)
+        poly, slices, best = m[1], m[2], best[m]
+        self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
+        transformed = pd.DataFrame(self.trained_poly.fit_transform(dataframe.iloc[:, :-1]), columns=self.__poly_names())
+        transformed[dataframe.columns[-1]] = dataframe.iloc[:, -1].values
+        self._surrounding = HyperCube.create_surrounding_cube(transformed, output=self._output)
+        cuts = [sorted(best[sum(slices[:i]):sum(slices[:i + 1])]) for i in range(len(slices))]
+        intervals = [[(transformed[self.features[i]].min(), cut[0])] +
+                     [(cut[i], cut[i + 1]) for i in range(len(cut) - 1)] +
+                     [(cut[-1], transformed[self.features[i]].max())] for i, cut in enumerate(cuts)]
+        hypercubes = [{f: iv for f, iv in zip(self.features, combo)} for combo in itertools.product(*intervals)]
+        mi_ma = {f: (transformed[f].min(), transformed[f].max()) for f in transformed.columns if f not in self.features}
+        self._hypercubes = [self._default_cube({feat: h[feat] if feat in self.features else mi_ma[feat]
+                                                for feat in transformed.columns[:-1]}) for h in hypercubes]
+        self._hypercubes = [c for c in self._hypercubes if c.count(transformed) >= 2]
+        for c in self._hypercubes:
+            for feature in transformed.columns:
+                if feature not in self.features:
+                    for direction in ['+', '-']:
+                        c.set_infinite(feature, direction)
+            c.update(transformed)
+        if self.threshold is not None:
+            self._hypercubes = self._merge(self._hypercubes, transformed)
+        return self._create_theory(transformed)
+    def make_fair(self, features: Iterable[str]):
+        self._dimensions_to_ignore.update(features)

psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl