PyPI - psyke - Versions diffs - 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl - Mend

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

psyke/__init__.py +231 -85
psyke/clustering/__init__.py +9 -4
psyke/clustering/cream/__init__.py +6 -10
psyke/clustering/exact/__init__.py +17 -11
psyke/clustering/utils.py +0 -1
psyke/extraction/__init__.py +25 -0
psyke/extraction/cart/CartPredictor.py +128 -0
psyke/extraction/cart/FairTree.py +205 -0
psyke/extraction/cart/FairTreePredictor.py +56 -0
psyke/extraction/cart/__init__.py +48 -62
psyke/extraction/hypercubic/__init__.py +187 -47
psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
psyke/extraction/hypercubic/creepy/__init__.py +24 -29
psyke/extraction/hypercubic/divine/__init__.py +86 -0
psyke/extraction/hypercubic/ginger/__init__.py +100 -0
psyke/extraction/hypercubic/gridex/__init__.py +45 -84
psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
psyke/extraction/hypercubic/hex/__init__.py +104 -0
psyke/extraction/hypercubic/hypercube.py +275 -72
psyke/extraction/hypercubic/iter/__init__.py +45 -46
psyke/extraction/hypercubic/strategy.py +13 -9
psyke/extraction/real/__init__.py +24 -29
psyke/extraction/real/utils.py +2 -2
psyke/extraction/trepan/__init__.py +24 -19
psyke/genetic/__init__.py +0 -0
psyke/genetic/fgin/__init__.py +74 -0
psyke/genetic/gin/__init__.py +144 -0
psyke/hypercubepredictor.py +102 -0
psyke/schema/__init__.py +230 -36
psyke/tuning/__init__.py +40 -28
psyke/tuning/crash/__init__.py +33 -64
psyke/tuning/orchid/__init__.py +21 -23
psyke/tuning/pedro/__init__.py +70 -56
psyke/utils/logic.py +8 -8
psyke/utils/plot.py +79 -3
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
psyke-1.0.4.dev10.dist-info/RECORD +46 -0
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
psyke/extraction/cart/predictor.py +0 -73
psyke-0.4.9.dev6.dist-info/RECORD +0 -36
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0

psyke/extraction/hypercubic/gridex/__init__.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from __future__ import annotations
-import random as rnd
 from itertools import product
 from typing import Iterable
 import numpy as np
 import pandas as pd
+from sklearn.base import ClassifierMixin
 from tuprolog.theory import Theory
-from psyke import get_default_random_seed, PedagogicalExtractor
+from psyke import get_default_random_seed
 from psyke.utils import Target
 from psyke.extraction.hypercubic import HyperCubeExtractor, Grid, HyperCube
@@ -15,102 +15,63 @@ class GridEx(HyperCubeExtractor):
     Explanator implementing GridEx algorithm, doi:10.1007/978-3-030-82017-6_2.
     """
-    def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization=None,
-                 seed=get_default_random_seed()):
-        super().__init__(predictor, Target.CONSTANT, normalization)
+    def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
+                         discretization, normalization)
         self.grid = grid
         self.min_examples = min_examples
         self.threshold = threshold
-        self.__generator = rnd.Random(seed)
+        np.random.seed(seed)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         self._hypercubes = []
-        if isinstance(np.array(self.predictor.predict(dataframe.iloc[0:1, :-1])).flatten()[0], str):
-            self._output = Target.CLASSIFICATION
-        surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
-        surrounding.init_diversity(2 * self.threshold)
-        self._iterate(surrounding, dataframe)
-        return self._create_theory(dataframe, sort)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
+        self._surrounding.init_diversity(2 * self.threshold)
+        self._iterate(dataframe)
+        return self._create_theory(dataframe)
-    def _ignore_dimensions(self) -> Iterable[str]:
-        cube = self._hypercubes[0]
-        return [d for d in cube.dimensions if all(c[d] == cube[d] for c in self._hypercubes)]
+    def _create_ranges(self, cube, iteration):
+        ranges = {}
+        for (feature, (a, b)) in cube.dimensions.items():
+            n_bins = self.grid.get(feature, iteration)
+            if n_bins == 1:
+                ranges[feature] = [(a, b)]
+                self._dimensions_to_ignore.add(feature)
+            else:
+                size = (b - a) / n_bins
+                ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
+        return ranges
-    def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
+    def _cubes_to_split(self, cube, iteration, dataframe, fake, keep_empty=False):
+        to_split = []
+        for p in product(*self._create_ranges(cube, iteration).values()):
+            cube = self._default_cube()
+            for i, f in enumerate(dataframe.columns[:-1]):
+                cube.update_dimension(f, p[i])
+            n = cube.count(dataframe)
+            if n > 0 or keep_empty:
+                fake = pd.concat([fake, cube.create_samples(self.min_examples - n)])
+                cube.update(fake, self.predictor)
+                to_split.append(cube)
+        return to_split, fake
+    def _iterate(self, dataframe: pd.DataFrame):
         fake = dataframe.copy()
-        prev = [surrounding]
-        next_iteration = []
+        prev = [self._surrounding]
         for iteration in self.grid.iterate():
             next_iteration = []
             for cube in prev:
-                to_split = []
                 if cube.count(dataframe) == 0:
                     continue
                 if cube.diversity < self.threshold:
-                    self._hypercubes += [cube]
+                    self._hypercubes.append(cube)
                     continue
-                ranges = {}
-                for (feature, (a, b)) in cube.dimensions.items():
-                    bins = []
-                    n_bins = self.grid.get(feature, iteration)
-                    size = (b - a) / n_bins
-                    for i in range(n_bins):
-                        bins.append((a + size * i, a + size * (i + 1)))
-                    ranges[feature] = bins
-                for (pn, p) in enumerate(list(product(*ranges.values()))):
-                    cube = self._default_cube()
-                    for i, f in enumerate(dataframe.columns[:-1]):
-                        cube.update_dimension(f, p[i])
-                    n = cube.count(dataframe)
-                    if n > 0:
-                        fake = pd.concat([fake, cube.create_samples(self.min_examples - n, self.__generator)])
-                        cube.update(fake, self.predictor)
-                        to_split += [cube]
-                to_split = self._merge(to_split, fake)
-                next_iteration += [cube for cube in to_split]
-            prev = next_iteration.copy()
-        self._hypercubes += [cube for cube in next_iteration]
-    @staticmethod
-    def _find_couples(to_split: Iterable[HyperCube], not_in_cache: Iterable[HyperCube],
-                      adjacent_cache: dict[tuple[HyperCube, HyperCube], str | None]) -> \
-            Iterable[tuple[HyperCube, HyperCube, str]]:
-        checked = []
-        eligible = []
-        for cube in to_split:
-            checked.append(cube)
-            for other_cube in [c for c in to_split if c not in checked]:
-                if (cube in not_in_cache) or (other_cube in not_in_cache):
-                    adjacent_cache[(cube, other_cube)] = cube.is_adjacent(other_cube)
-                adjacent_feature = adjacent_cache[(cube, other_cube)]
-                eligible.append((cube, other_cube, adjacent_feature))
-        return [couple for couple in eligible if couple[2] is not None]
-    def _evaluate_merge(self, not_in_cache: Iterable[HyperCube],
-                        dataframe: pd.DataFrame, feature: str,
-                        cube: HyperCube, other_cube: HyperCube,
-                        merge_cache: dict[(HyperCube, HyperCube), HyperCube | None]) -> bool:
-        if (cube in not_in_cache) or (other_cube in not_in_cache):
-            merged_cube = cube.merge_along_dimension(other_cube, feature)
-            merged_cube.update(dataframe, self.predictor)
-            merge_cache[(cube, other_cube)] = merged_cube
-        return cube.output == other_cube.output if self._output == Target.CLASSIFICATION else \
-            merge_cache[(cube, other_cube)].diversity < self.threshold
+                to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
+                next_iteration.extend(self._merge(to_split, fake))
+            prev = next_iteration
+        self._hypercubes.extend(prev)
-    def _merge(self, to_split: Iterable[HyperCube], dataframe: pd.DataFrame) -> Iterable[HyperCube]:
-        not_in_cache = [cube for cube in to_split]
-        adjacent_cache = {}
-        merge_cache = {}
-        # TODO: refactor this. A while true with a break is as ugly as hunger.
-        while True:
-            to_merge = [([cube, other_cube], merge_cache[(cube, other_cube)]) for cube, other_cube, feature in
-                        GridEx._find_couples(to_split, not_in_cache, adjacent_cache) if
-                        self._evaluate_merge(not_in_cache, dataframe, feature, cube, other_cube, merge_cache)]
-            if len(to_merge) == 0:
-                break
-            sorted(to_merge, key=lambda c: c[1].diversity)
-            best = to_merge[0]
-            to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
-            not_in_cache = [best[1]]
-        return to_split
+    def make_fair(self, features: Iterable[str]):
+        self.grid.make_fair(features)

psyke/extraction/hypercubic/gridrex/__init__.py CHANGED Viewed

@@ -1,16 +1,16 @@
-from psyke import get_default_random_seed
+from psyke import get_default_random_seed, Target
 from psyke.extraction.hypercubic import Grid, RegressionCube
 from psyke.extraction.hypercubic.gridex import GridEx
 class GridREx(GridEx):
     """
-    Explanator implementing GridREx algorithm.
+    Explanator implementing GridREx algorithm, doi:10.24963/kr.2022/57.
     """
     def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, normalization,
                  seed=get_default_random_seed()):
-        super().__init__(predictor, grid, min_examples, threshold, normalization, seed)
+        super().__init__(predictor, grid, min_examples, threshold, Target.REGRESSION, None, normalization, seed)
-    def _default_cube(self) -> RegressionCube:
+    def _default_cube(self, dimensions=None) -> RegressionCube:
         return RegressionCube()

psyke/extraction/hypercubic/hex/__init__.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+from typing import Iterable
+import numpy as np
+import pandas as pd
+from psyke import get_default_random_seed, Target
+from psyke.extraction.hypercubic import Grid, HyperCube, GenericCube, ClassificationCube
+from psyke.extraction.hypercubic.gridex import GridEx
+class HEx(GridEx):
+    """
+    Explanator implementing HEx algorithm.
+    """
+    class Node:
+        def __init__(self, cube: GenericCube, parent: HEx.Node = None, threshold: float = None):
+            self.cube = cube
+            self.parent = parent
+            self.children: Iterable[HEx.Node] = []
+            self.threshold = threshold
+            self.gain = True if parent is None else self.check()
+        def check(self) -> bool:
+            other = self.parent
+            try:
+                while not other.gain:
+                    other = other.parent
+            except AttributeError:
+                return True
+            if isinstance(other.cube, ClassificationCube):
+                return other.cube.output != self.cube.output
+            return other.cube.error - self.cube.error > self.threshold * .6
+        def indices(self, dataframe: pd.DataFrame):
+            return self.cube.filter_indices(dataframe.iloc[:, :-1])
+        def eligible_children(self, dataframe) -> Iterable[HEx.Node]:
+            return [c for c in self.children if c.cube.count(dataframe) > 0]
+        def permanent_children(self, dataframe) -> Iterable[HEx.Node]:
+            return [c for c in self.eligible_children(dataframe) if c.gain]
+        def permanent_indices(self, dataframe):
+            return np.any([c.cube.filter_indices(dataframe.iloc[:, :-1])
+                           for c in self.eligible_children(dataframe) if c.gain], axis=0)
+        def update(self, dataframe: pd.DataFrame, predictor, recursive=False):
+            if recursive:
+                for node in self.children:
+                    node.update(dataframe, predictor, recursive)
+            cleaned = [(c.cube, c.gain) for c in self.eligible_children(dataframe)]
+            idx = self.permanent_indices(dataframe)
+            if sum(g for _, g in cleaned) > 0 and sum(self.indices(dataframe)) > sum(idx) and self.gain:
+                self.cube.update(dataframe[self.indices(dataframe) & ~idx], predictor)
+            return cleaned
+        def linearize(self, dataframe, depth=1):
+            children = [c.linearize(dataframe, depth + 1) for c in self.permanent_children(dataframe)]
+            return [(cc, dd) for c in children for cc, dd in c if c != []] + \
+                   [(c, depth) for c in self.permanent_children(dataframe)]
+    def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
+        self._default_surrounding_cube = True
+    def _gain(self, parent_cube: GenericCube, new_cube: GenericCube) -> float:
+        if isinstance(parent_cube, ClassificationCube):
+            return parent_cube.output != new_cube.output
+        return parent_cube.error - new_cube.error > self.threshold * .6
+    def _iterate(self, dataframe: pd.DataFrame):
+        fake = dataframe.copy()
+        self._surrounding.update(dataframe, self.predictor)
+        root = HEx.Node(self._surrounding, threshold=self.threshold)
+        current = [root]
+        for iteration in self.grid.iterate():
+            next_iteration = []
+            for node in current:
+                if node.cube.diversity < self.threshold:
+                    continue
+                children, fake = self._cubes_to_split(node.cube, iteration, dataframe, fake, True)
+                node.children = [HEx.Node(c, node, threshold=self.threshold) for c in children]
+                cleaned = node.update(fake, self.predictor, False)
+                node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
+                    [c for c, _ in cleaned], fake)]
+                next_iteration += [n for n in node.children]
+            current = next_iteration.copy()
+        _ = root.update(fake, self.predictor, True)
+        self._hypercubes = []
+        linearized = root.linearize(fake)
+        for depth in sorted(np.unique([d for (_, d) in linearized]), reverse=True):
+            self._hypercubes += self._merge([c.cube for (c, d) in linearized if d == depth], fake)
+        if len(self._hypercubes) == 0:
+            self._hypercubes = [self._surrounding]
+        elif not min(np.any([c.filter_indices(dataframe.iloc[:, :-1]) for c in self._hypercubes], axis=0)):
+            self._hypercubes = self._hypercubes + [self._surrounding]

psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl