PyPI - psyke - Versions diffs - 0.7.11.dev2__tar.gz → 0.8.0.dev11__tar.gz - Mend

psyke 0.7.11.dev2tar.gz → 0.8.0.dev11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show

{psyke-0.7.11.dev2/psyke.egg-info → psyke-0.8.0.dev11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.7.11.dev2
+Version: 0.8.0.dev11
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

psyke-0.8.0.dev11/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.8.0.dev11

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/__init__.py RENAMED Viewed

@@ -48,34 +48,28 @@ class EvaluableModel(object):
         self.discretization = [] if discretization is None else list(discretization)
         self.normalization = normalization
-    def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
+    def predict(self, dataframe: pd.DataFrame) -> Iterable:
         """
         Predicts the output values of every sample in dataset.
         :param dataframe: is the set of instances to predict.
-        :param mapping: for one-hot encoding.
         :return: a list of predictions.
         """
-        return self.__convert(self._predict(dataframe), mapping)
+        return self.__convert(self._predict(dataframe))
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         raise NotImplementedError('predict')
-    def __convert(self, ys: Iterable, mapping: dict[str: int] = None) -> Iterable:
-        if mapping is not None:
-            inverse_mapping = {v: k for k, v in mapping.items()}
-            ys = [inverse_mapping[y] for y in ys]
+    def __convert(self, ys: Iterable) -> Iterable:
         if self.normalization is not None:
             m, s = self.normalization[list(self.normalization.keys())[-1]]
             ys = [prediction if prediction is None else prediction * s + m for prediction in ys]
         return ys
-    def brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
-                      mapping: dict[str: int] = None) -> Iterable:
-        return self.__convert(self._brute_predict(dataframe, criterion, n, mapping), mapping)
+    def brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
+        return self.__convert(self._brute_predict(dataframe, criterion, n))
-    def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
-                       mapping: dict[str: int] = None) -> Iterable:
+    def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
         raise NotImplementedError('brute_predict')
     def unscale(self, values, name):
@@ -151,24 +145,36 @@ class Extractor(EvaluableModel, ABC):
     ----------
     predictor : the underling black box predictor.
     discretization : A collection of sets of discretised features.
-        Each set corresponds to a set of features derived from a single non-discrete feature.
+    Each set corresponds to a set of features derived from a single non-discrete feature.
     """
     def __init__(self, predictor, discretization: Iterable[DiscreteFeature] = None, normalization=None):
         super().__init__(discretization, normalization)
         self.predictor = predictor
-    def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
         """
         Extracts rules from the underlying predictor.
         :param dataframe: is the set of instances to be used for the extraction.
-        :param mapping: for one-hot encoding.
-        :param sort: alphabetically sort the variables of the head of the rules.
         :return: the theory created from the extracted rules.
         """
         raise NotImplementedError('extract')
+    def predict_why(self, data: dict[str, float]):
+        """
+        Provides a prediction and the corresponding explanation.
+        :param data: is the instance to predict.
+        """
+        raise NotImplementedError('predict_why')
+    def predict_counter(self, data: dict[str, float]):
+        """
+        Provides a prediction and counterfactual explanations.
+        :param data: is the instance to predict.
+        """
+        raise NotImplementedError('predict_counter')
     def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
             n: int = 3) -> float:
         """

psyke-0.8.0.dev11/psyke/extraction/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+from abc import ABC
+import pandas as pd
+from tuprolog.theory import Theory
+from psyke import Extractor
+class PedagogicalExtractor(Extractor, ABC):
+    def __init__(self, predictor, discretization=None, normalization=None):
+        Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
+        new_y = pd.DataFrame(self.predictor.predict(dataframe.iloc[:, :-1])).set_index(dataframe.index)
+        data = dataframe.iloc[:, :-1].copy().join(new_y)
+        data.columns = dataframe.columns
+        return self._extract(data)
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        raise NotImplementedError('extract')

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/cart/__init__.py RENAMED Viewed

@@ -47,7 +47,7 @@ class Cart(PedagogicalExtractor):
             simplified.append(nodes.pop(0))
         return simplified
-    def _create_theory(self, data: pd.DataFrame, mapping: dict[str: int], sort: bool = True) -> Theory:
+    def _create_theory(self, data: pd.DataFrame) -> Theory:
         new_theory = mutable_theory()
         nodes = [node for node in self._cart_predictor]
         nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
@@ -55,12 +55,7 @@ class Cart(PedagogicalExtractor):
             if self.normalization is not None:
                 m, s = self.normalization[data.columns[-1]]
                 prediction = prediction * s + m
-            if mapping is not None and prediction in mapping.values():
-                for k, v in mapping.items():
-                    if v == prediction:
-                        prediction = k
-                        break
-            variables = create_variable_list(self.discretization, data, sort)
+            variables = create_variable_list(self.discretization, data)
             new_theory.assertZ(
                 clause(
                     create_head(data.columns[-1], list(variables.values()), prediction),
@@ -69,15 +64,11 @@ class Cart(PedagogicalExtractor):
             )
         return new_theory
-    def _extract(self, data: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        self._cart_predictor.predictor = DecisionTreeClassifier(random_state=TREE_SEED) \
-            if isinstance(data.iloc[0, -1], str) or mapping is not None else DecisionTreeRegressor(random_state=TREE_SEED)
-        if mapping is not None:
-            data.iloc[:, -1] = data.iloc[:, -1].apply(lambda x: mapping[x] if x in mapping.keys() else x)
-        self._cart_predictor.predictor.max_depth = self.depth
-        self._cart_predictor.predictor.max_leaf_nodes = self.leaves
+    def _extract(self, data: pd.DataFrame) -> Theory:
+        tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
+        self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth, max_leaf_nodes=self.leaves)
         self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
-        return self._create_theory(data, mapping, sort)
+        return self._create_theory(data)
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return self._cart_predictor.predict(dataframe)

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from abc import ABC
+from itertools import groupby
 from typing import Iterable
 import numpy as np
 import pandas as pd
@@ -14,7 +15,8 @@ from psyke.extraction import PedagogicalExtractor
 from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
     GenericCube
 from psyke.hypercubepredictor import HyperCubePredictor
-from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body
+from psyke.schema import Between, Outside, Value
+from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier, last_in_body, PRECISION
 from psyke.utils import Target
 from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
@@ -48,12 +50,99 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
             last_cube[dimension] = [-np.inf, np.inf]
         return theory
-    def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        theory = PedagogicalExtractor.extract(self, dataframe, mapping, sort)
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
+        theory = PedagogicalExtractor.extract(self, dataframe)
         self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
         self._surrounding.update(dataframe, self.predictor)
         return theory
+    def predict_counter(self, data: dict[str, float]):
+        cube = self._find_cube(data)
+        if cube is None:
+            print("The extracted knowledge is not exhaustive; impossible to predict this instance")
+        else:
+            print("The output is", self._predict_from_cubes(data))
+        point = Point(list(data.keys()), list(data.values()))
+        cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
+        cubes = sorted([(cube.surface_distance(point), cube.volume(), cube) for cube in cubes])
+        outputs = []
+        for _, _, c in cubes:
+            if c.output not in outputs:
+                outputs.append(c.output)
+                print("The output may be", c.output, 'if')
+                for d in c.dimensions.keys():
+                    lower, upper = c[d]
+                    p = point[d]
+                    if p < lower:
+                        print('    ', d, '=', round(lower, 1))
+                    elif p > upper:
+                        print('    ', d, '=', round(upper, 1))
+    def __get_local_conditions(self, cube: GenericCube) -> dict[list[Value]]:
+        conditions = {d: [] for d in cube.dimensions}
+        for d in cube.finite_dimensions:
+            conditions[d].append(Between(*cube.dimensions[d]))
+        subcubes = cube.subcubes(self._hypercubes)
+        for c in [c for c in subcubes if sum(c in sc and c != sc for sc in subcubes) == 0]:
+            for d in c.finite_dimensions:
+                conditions[d].append(Outside(*c.dimensions[d]))
+        return conditions
+    def predict_why(self, data: dict[str, float]):
+        cube = self._find_cube(data)
+        if cube is None:
+            print("The extracted knowledge is not exhaustive; impossible to predict this instance")
+        else:
+            output = self._predict_from_cubes(data)
+            print(f"The output is {output} because")
+            conditions = self.__get_local_conditions(cube)
+            for d in conditions:
+                simplified = HyperCubeExtractor.__simplify(conditions[d])
+                for i, condition in enumerate(simplified):
+                    if i == 0:
+                        print('    ', d, 'is', end=' ')
+                    else:
+                        print('and', end=' ')
+                    if isinstance(condition, Outside):
+                        print('not', end=' ')
+                    print('between', round(condition.lower, 1), 'and', round(condition.upper, 1), end=' ')
+                    if i + 1 == len(simplified):
+                        print()
+    @staticmethod
+    def __simplify(conditions):
+        simplified = []
+        for condition in conditions:
+            to_add = True
+            for i, simple in enumerate(simplified):
+                if isinstance(condition, Outside) and isinstance(simple, Outside):
+                    if simple.lower <= condition.lower <= simple.upper or \
+                            simple.lower <= condition.upper <= simple.upper or \
+                            condition.lower <= simple.lower <= simple.upper <= condition.upper:
+                        simplified[i].upper = max(condition.upper, simple.upper)
+                        simplified[i].lower = min(condition.lower, simple.lower)
+                        to_add = False
+                        break
+                elif isinstance(condition, Outside) and isinstance(simple, Between):
+                    if simple.lower >= condition.upper or simple.upper <= condition.lower:
+                        to_add = False
+                        break
+                    elif condition.lower <= simple.lower <= condition.upper <= simple.upper:
+                        simplified[i].lower = condition.upper
+                        to_add = False
+                        break
+                    elif simple.lower <= condition.lower <= simple.upper <= condition.upper:
+                        simplified[i].upper = condition.lower
+                        to_add = False
+                        break
+                    elif condition.lower <= simple.lower <= simple.upper <= condition.upper:
+                        raise ValueError
+            if to_add:
+                simplified.append(condition)
+        return simplified
     @staticmethod
     def _create_head(dataframe: pd.DataFrame, variables: list[Var], output: float | LinearRegression) -> Struct:
         return create_head(dataframe.columns[-1], variables[:-1], output) \
@@ -66,13 +155,13 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
     def __drop(self, dataframe: pd.DataFrame):
         self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
-    def _create_theory(self, dataframe: pd.DataFrame, sort: bool = False) -> Theory:
+    def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
         self.__drop(dataframe)
         new_theory = mutable_theory()
         for cube in self._hypercubes:
             logger.info(cube.output)
             logger.info(cube.dimensions)
-            variables = create_variable_list([], dataframe, sort)
+            variables = create_variable_list([], dataframe)
             variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
             head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),
                                                    self.unscale(cube.output, dataframe.columns[-1]))

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/cosmik/__init__.py RENAMED Viewed

@@ -24,7 +24,7 @@ class COSMiK(HyperCubeExtractor):
         self.close_to_center = close_to_center
         self.seed = seed
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         np.random.seed(self.seed)
         X, y = dataframe.iloc[:, :-1], dataframe.iloc[:, -1]
@@ -44,4 +44,4 @@ class COSMiK(HyperCubeExtractor):
             cube.update(dataframe, self.predictor)
         self._sort_cubes()
-        return self._create_theory(dataframe, sort)
+        return self._create_theory(dataframe)

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/creepy/__init__.py RENAMED Viewed

@@ -28,7 +28,7 @@ class CReEPy(HyperCubeExtractor):
         self.ignore_threshold = ignore_threshold
         self._default_surrounding_cube = True
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         if not isinstance(self.clustering, HyperCubeClustering):
             raise TypeError("clustering must be a HyperCubeClustering")

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/divine/__init__.py RENAMED Viewed

@@ -58,7 +58,7 @@ class DiViNE(HyperCubeExtractor):
         distance, idx = tree.query([list(point.dimensions.values()) for point in cube.corners()], k=1)
         return idx[np.argmin(distance)][-1]
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         np.random.seed(self.seed)
         data = self.__clean(dataframe)
@@ -82,4 +82,4 @@ class DiViNE(HyperCubeExtractor):
             if len(discarded) > 0:
                 data = pd.concat([data] + [d.to_dataframe() for d in discarded]).reset_index(drop=True)
         self._sort_cubes()
-        return self._create_theory(dataframe, sort)
+        return self._create_theory(dataframe)

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/gridex/__init__.py RENAMED Viewed

@@ -25,12 +25,12 @@ class GridEx(HyperCubeExtractor):
         self.threshold = threshold
         self._generator = rnd.Random(seed)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         self._hypercubes = []
         surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
         surrounding.init_diversity(2 * self.threshold)
         self._iterate(surrounding, dataframe)
-        return self._create_theory(dataframe, sort)
+        return self._create_theory(dataframe)
     def _create_ranges(self, cube, iteration):
         ranges = {}

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/hex/__init__.py RENAMED Viewed

@@ -16,19 +16,23 @@ class HEx(GridEx):
     """
     class Node:
-        def __init__(self, cube: GenericCube, parent: HEx.Node = None, gain: bool = True, threshold: float = None):
+        def __init__(self, cube: GenericCube, parent: HEx.Node = None, threshold: float = None):
             self.cube = cube
             self.parent = parent
             self.children: Iterable[HEx.Node] = []
-            self.gain = gain if not threshold else self.check(threshold)
+            self.threshold = threshold
+            self.gain = True if parent is None else self.check()
-        def check(self, threshold: float) -> bool:
+        def check(self) -> bool:
             other = self.parent
-            while not other.gain:
-                other = other.parent
+            try:
+                while not other.gain:
+                    other = other.parent
+            except AttributeError:
+                return True
             if isinstance(other.cube, ClassificationCube):
                 return other.cube.output != self.cube.output
-            return other.cube.error - self.cube.error > threshold * .6
+            return other.cube.error - self.cube.error > self.threshold * .6
         def indices(self, dataframe: pd.DataFrame):
             return self.cube.filter_indices(dataframe.iloc[:, :-1])
@@ -71,7 +75,7 @@ class HEx(GridEx):
     def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
         fake = dataframe.copy()
         surrounding.update(dataframe, self.predictor)
-        root = HEx.Node(surrounding)
+        root = HEx.Node(surrounding, threshold=self.threshold)
         current = [root]
         for iteration in self.grid.iterate():
@@ -82,7 +86,7 @@ class HEx(GridEx):
                 cleaned = node.update(fake, self.predictor, False)
                 node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
                     [c for c, _ in cleaned], fake)]
-                next_iteration += [n for n in node.permanent_children(fake)]
+                next_iteration += [n for n in node.children]
             current = next_iteration.copy()
         _ = root.update(fake, self.predictor, True)

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/hypercube.py RENAMED Viewed

@@ -45,6 +45,16 @@ class Point:
     def __eq__(self, other: Point) -> bool:
         return all([abs(self[dimension] - other[dimension]) < Point.EPSILON for dimension in self._dimensions])
+    def distance(self, other: Point, metric: str='Euclidean') -> float:
+        distances = [abs(self[dimension] - other[dimension]) for dimension in self._dimensions]
+        if metric == 'Euclidean':
+            distance = sum(np.array(distances)**2)**0.5
+        elif metric == 'Manhattan':
+            distance = sum(distances)
+        else:
+            raise ValueError("metric should be 'Euclidean' or 'Manhattan'")
+        return distance
     @property
     def dimensions(self) -> dict[str, float | str]:
         return self._dimensions
@@ -73,19 +83,25 @@ class HyperCube:
         self._error = 0.0
         self._barycenter = Point([], [])
-    def __contains__(self, point: dict[str, float]) -> bool:
+    def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
         """
-        Note that a point (dict[str, float]) is inside a hypercube if ALL its dimensions' values satisfy:
-            min_dim <= value < max_dim
-        :param point: an N-dimensional point
-        :return: true if the point is inside the hypercube, false otherwise
+        Note that a point is inside a hypercube if ALL its dimensions' values satisfy:
+            min_dim <= object dimension < max_dim
+        :param obj: an N-dimensional object (point or hypercube)
+        :return: true if the object is inside the hypercube, false otherwise
         """
-        return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in point.items()])
+        if isinstance(obj, HyperCube):
+            return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
+                        for k in obj.dimensions])
+        elif isinstance(obj, dict):
+            return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in obj.items()])
+        else:
+            raise TypeError("Invalid type for obj parameter")
     def __eq__(self, other: HyperCube) -> bool:
         return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
                     & (abs(dimension.this_dimension[1] - dimension.other_dimension[1]) < HyperCube.EPSILON)
-                    for dimension in self._zip_dimensions(other)])
+                    for dimension in self._zip_dimensions(other, True)])
     def __getitem__(self, feature: str) -> Dimension:
         if feature in self._dimensions.keys():
@@ -104,6 +120,10 @@ class HyperCube:
     def dimensions(self) -> Dimensions:
         return self._dimensions
+    @property
+    def finite_dimensions(self) -> Dimensions:
+        return {k: v for k, v in self._dimensions.items() if np.isfinite(v[0]) and np.isfinite(v[1])}
     @property
     def limit_count(self) -> int:
         return len(self._limits)
@@ -124,6 +144,9 @@ class HyperCube:
     def barycenter(self) -> Point:
         return self._barycenter
+    def subcubes(self, cubes: Iterable[GenericCube]) -> Iterable[GenericCube]:
+        return [c for c in cubes if c in self and c != self]
     def _fit_dimension(self, dimension: dict[str, tuple[float, float]]) -> dict[str, tuple[float, float]]:
         new_dimension: dict[str, tuple[float, float]] = {}
         for key, value in dimension.items():
@@ -144,8 +167,10 @@ class HyperCube:
     def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
         return dataset[self.filter_indices(dataset)]
-    def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
-        return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self._dimensions.keys()]
+    def _zip_dimensions(self, other: HyperCube, check_finite: bool = False) -> list[ZippedDimension]:
+        dimensions = set(self.finite_dimensions).union(set(other.finite_dimensions)) if check_finite else \
+            set(self.dimensions)
+        return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in dimensions]
     def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
         if isinstance(limit_or_feature, Limit):
@@ -433,8 +458,20 @@ class ClosedCube(HyperCube):
     def __init__(self, dimension: dict[str, tuple] = None):
         super().__init__(dimension=dimension)
-    def __contains__(self, point: dict[str, float]) -> bool:
-        return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in point.items()])
+    def __contains__(self, obj: dict[str, float] | ClosedCube) -> bool:
+        """
+       Note that an object is inside a hypercube if ALL its dimensions' values satisfy:
+           min_dim <= object dimension <= max_dim
+       :param obj: an N-dimensional object (point or hypercube)
+       :return: true if the object is inside the hypercube, false otherwise
+        """
+        if isinstance(obj, ClosedCube):
+            return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
+                        for k in obj.dimensions])
+        elif isinstance(obj, dict):
+            return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in obj.items()])
+        else:
+            raise TypeError("Invalid type for obj parameter")
     def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
         v = np.array([v for _, v in self._dimensions.items()])

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/hypercubic/iter/__init__.py RENAMED Viewed

@@ -170,7 +170,7 @@ class ITER(HyperCubeExtractor):
                               min(overlapping_cube.get_first(feature), b) if direction == '+' else b)
         return cube.overlap(hypercubes)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         self._hypercubes, domain = self._initialize(dataframe)
         temp_train = dataframe.copy()
         fake = dataframe.copy()
@@ -193,4 +193,4 @@ class ITER(HyperCubeExtractor):
                     ratio *= 2
                 if new_cube.has_volume():
                     self._hypercubes += [new_cube]
-        return self._create_theory(dataframe, sort)
+        return self._create_theory(dataframe)

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/real/__init__.py RENAMED Viewed

@@ -58,10 +58,10 @@ class REAL(PedagogicalExtractor):
                 rules.append(self._create_new_rule(sample))
         return ruleset.optimize()
-    def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet, sort: bool = True) -> MutableTheory:
+    def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet) -> MutableTheory:
         theory = mutable_theory()
         for key, rule in ruleset.flatten():
-            variables = create_variable_list(self.discretization, sort=sort)
+            variables = create_variable_list(self.discretization)
             theory.assertZ(self._create_clause(dataset, variables, key, rule))
         return theory
@@ -111,16 +111,12 @@ class REAL(PedagogicalExtractor):
         samples_all = samples_0.append(samples_1)
         return samples_all, len(set(self.predictor.predict(samples_all))) == 1
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         # Order the dataset by column to preserve reproducibility.
         dataframe = dataframe.sort_values(by=list(dataframe.columns.values), ascending=False)
-        # Always perform output mapping in the same (sorted) way to preserve reproducibility.
-        if mapping is None:
-            self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
-        else:
-            self._output_mapping = {value: index for index, value in enumerate(sorted(set(mapping[dataframe.iloc[:, -1]])))}
+        self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
         self._ruleset = self._get_or_set(HashableDataFrame(dataframe))
-        return self._create_theory(dataframe, self._ruleset, sort)
+        return self._create_theory(dataframe, self._ruleset)
     def _predict(self, dataframe) -> Iterable:
         return np.array([self._internal_predict(data.transpose()) for _, data in dataframe.iterrows()])

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/extraction/trepan/__init__.py RENAMED Viewed

@@ -136,7 +136,7 @@ class Trepan(PedagogicalExtractor):
                 nodes.append(child)
         return len(to_remove)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         queue = self._init(dataframe)
         while len(queue) > 0:
             node = queue.pop()
@@ -149,7 +149,7 @@ class Trepan(PedagogicalExtractor):
             queue.add_all(best)
             node.children += list(best)
         self._optimize()
-        return self._create_theory(dataframe.columns[-1], sort)
+        return self._create_theory(dataframe.columns[-1])
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return np.array(

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/psyke/hypercubepredictor.py RENAMED Viewed

@@ -20,8 +20,7 @@ class HyperCubePredictor(EvaluableModel):
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return np.array([self._predict_from_cubes(row.to_dict()) for _, row in dataframe.iterrows()])
-    def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2,
-                       mapping: dict[str: int] = None) -> Iterable:
+    def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
         predictions = np.array(self._predict(dataframe))
         idx = [prediction is None for prediction in predictions]
         if sum(idx) > 0:
@@ -46,10 +45,9 @@ class HyperCubePredictor(EvaluableModel):
         return HyperCubePredictor._get_cube_output(cubes[idx], row)
     def _brute_predict_surface(self, row: dict[str, float]) -> GenericCube:
-        distances = [(
-            cube.surface_distance(Point(list(row.keys()), list(row.values))), cube.volume(), cube
-        ) for cube in self._hypercubes]
-        return min(distances)[-1]
+        return min([(
+            cube.surface_distance(Point(list(row.keys()), list(row.values()))), cube.volume(), cube
+        ) for cube in self._hypercubes])[-1]
     def _create_brute_tree(self, criterion: str = 'center', n: int = 2) -> (BallTree, list[GenericCube]):
         admissible_criteria = ['surface', 'center', 'corner', 'perimeter', 'density', 'default']
@@ -68,12 +66,18 @@ class HyperCubePredictor(EvaluableModel):
             [point[1] for point in points]
     def _predict_from_cubes(self, data: dict[str, float]) -> float | str | None:
+        cube = self._find_cube(data)
+        if cube is None:
+            return None
+        elif self._output == Target.CLASSIFICATION:
+            return HyperCubePredictor._get_cube_output(cube, data)
+        else:
+            return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
+    def _find_cube(self, data: dict[str, float]) -> GenericCube | None:
         for cube in self._hypercubes:
             if data in cube:
-                if self._output == Target.CLASSIFICATION:
-                    return HyperCubePredictor._get_cube_output(cube, data)
-                else:
-                    return round(HyperCubePredictor._get_cube_output(cube, data), get_int_precision())
+                return cube
         return None
     @property

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11/psyke.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.7.11.dev2
+Version: 0.8.0.dev11
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/__init__.py RENAMED Viewed

@@ -66,8 +66,7 @@ def initialize(file: str) -> list[dict[str:Theory]]:
                 params['grid'] = Grid(int(row['grid']), AdaptiveStrategy(ranked, n))
         extractor = get_extractor(row['extractor_type'], params)
-        mapping = None if 'output_mapping' not in row.keys() or row['output_mapping'] == '' else ast.literal_eval(row['output_mapping'])
-        theory = extractor.extract(training_set, mapping) if mapping is not None else extractor.extract(training_set)
+        theory = extractor.extract(training_set)
         # Compute predictions from rules
         index = test_set.shape[1] - 1
@@ -78,12 +77,8 @@ def initialize(file: str) -> list[dict[str:Theory]]:
         solver = prolog_solver(static_kb=mutable_theory(theory).assertZ(get_in_rule()).assertZ(get_not_in_rule()))
         substitutions = [solver.solveOnce(data_to_struct(data)) for _, data in ordered_test_set.iterrows()]
         expected = [cast(query.solved_query.get_arg_at(index)) for query in substitutions if query.is_yes]
-        if mapping is not None:
-            predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1], mapping)
-                          if prediction is not None]
-        else:
-            predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1])
-                           if prediction is not None]
+        predictions = [prediction for prediction in extractor.predict(test_set_for_predictor.iloc[:, :-1])
+                       if prediction is not None]
         yield {
             'extractor': extractor,

{psyke-0.7.11.dev2 → psyke-0.8.0.dev11}/test/psyke/extraction/hypercubic/test_hypercube.py RENAMED Viewed

@@ -246,7 +246,7 @@ class TestHypercube(AbstractTestHypercube):
     def test_zip_dimensions(self):
         cube = HyperCube({'X': self.y, 'Y': self.x})
-        expected = [ZippedDimension(d, self.cube[d], cube[d]) for d in self.dimensions.keys()]
+        expected = [ZippedDimension(d, self.cube[d], cube[d]) for d in set(self.dimensions)]
         self.assertEqual(self.cube._zip_dimensions(cube), expected)
     def test_fit_dimension(self):

psyke-0.7.11.dev2/VERSION DELETED Viewed

	@@ -1 +0,0 @@
1	- 0.7.11.dev2

psyke-0.7.11.dev2/psyke/extraction/__init__.py DELETED Viewed

@@ -1,32 +0,0 @@
-from abc import ABC
-import pandas as pd
-from numpy import argmax
-from tuprolog.theory import Theory
-from psyke import Extractor
-class PedagogicalExtractor(Extractor, ABC):
-    def __init__(self, predictor, discretization=None, normalization=None):
-        Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
-    def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        from psyke.extraction.hypercubic import HyperCubeExtractor, HyperCube
-        new_y = self.predictor.predict(dataframe.iloc[:, :-1])
-        if mapping is not None:
-            if hasattr(new_y[0], 'shape'):
-                # One-hot encoding for multi-class tasks
-                if len(new_y[0].shape) > 0 and new_y[0].shape[0] > 1:
-                    new_y = [argmax(y, axis=0) for y in new_y]
-                # One-hot encoding for binary class tasks
-                else:
-                    new_y = [round(y[0]) for y in new_y]
-        new_y = pd.DataFrame(new_y).set_index(dataframe.index)
-        data = dataframe.iloc[:, :-1].copy().join(new_y)
-        data.columns = dataframe.columns
-        return self._extract(data, mapping, sort)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        raise NotImplementedError('extract')