PyPI - psyke - Versions diffs - 0.8.2.dev18__tar.gz → 0.8.3.dev2__tar.gz - Mend

psyke 0.8.2.dev18tar.gz → 0.8.3.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of psyke might be problematic. Click here for more details.

Files changed (78) hide show

{psyke-0.8.2.dev18/psyke.egg-info → psyke-0.8.3.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.8.2.dev18
+Version: 0.8.3.dev2
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

psyke-0.8.3.dev2/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.8.3.dev2

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/clustering/exact/__init__.py RENAMED Viewed

@@ -60,8 +60,8 @@ class ExACT(HyperCubeClustering, ABC):
     def fit(self, dataframe: pd.DataFrame):
         np.random.seed(self.seed)
         self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
-        self._hypercubes = \
-            self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, True, self._output)
+        self._hypercubes = self._iterate(Node(dataframe, self._surrounding))
     def get_hypercubes(self) -> Iterable[HyperCube]:
         return list(self._hypercubes)

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/__init__.py RENAMED Viewed

@@ -8,7 +8,6 @@ from sklearn.feature_selection import SelectKBest, f_regression, f_classif
 from sklearn.linear_model import LinearRegression
 from tuprolog.core import Var, Struct, clause
 from tuprolog.theory import Theory, mutable_theory
-from psyke import logger
 from psyke.extraction import PedagogicalExtractor
 from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
     GenericCube
@@ -23,7 +22,6 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
     def __init__(self, predictor, output, discretization=None, normalization=None):
         HyperCubePredictor.__init__(self, output=output, normalization=normalization)
         PedagogicalExtractor.__init__(self, predictor, discretization=discretization, normalization=normalization)
-        self._surrounding = None
         self._default_surrounding_cube = False
     def _default_cube(self) -> HyperCube | RegressionCube | ClassificationCube:
@@ -72,7 +70,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
             output += "The extracted knowledge is not exhaustive; impossible to predict this instance"
         else:
             prediction = self._predict_from_cubes(data)
-            output += f"The output is {prediction}\n"
+            output += f"The output is {prediction}"
         point = Point(list(data.keys()), list(data.values()))
         cubes = self._hypercubes if cube is None else [c for c in self._hypercubes if cube.output != c.output]
@@ -82,7 +80,7 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
         for _, _, _, c in cubes:
             if c.output not in outputs:
                 outputs.append(c.output)
-                output += f"The output may be {c.output} if"
+                output += f"\nThe output may be {c.output} if"
                 for d in point.dimensions.keys():
                     lower, upper = c[d]
@@ -98,12 +96,10 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
         return prediction, different_prediction_reasons
     def __get_local_conditions(self, data: dict[str, float], cube: GenericCube) -> dict[list[Value]]:
-        conditions = {d: [] for d in cube.dimensions}
-        for d in cube.finite_dimensions:
-            conditions[d].append(Between(*cube.dimensions[d]))
+        conditions = {d: [Between(*cube.dimensions[d])] for d in cube.dimensions}
         subcubes = cube.subcubes(self._hypercubes)
         for c in [c for c in subcubes if sum(c in sc and c != sc for sc in subcubes) == 0]:
-            for d in [d for d in c.finite_dimensions if d in data]:
+            for d in [d for d in c.dimensions if d in data]:
                 if c.dimensions[d][0] > data[d] or c.dimensions[d][1] < data[d]:
                     conditions[d].append(Outside(*c.dimensions[d]))
         return conditions
@@ -171,14 +167,19 @@ class HyperCubeExtractor(HyperCubePredictor, PedagogicalExtractor, ABC):
         self._hypercubes = [cube for cube in self._hypercubes if cube.count(dataframe) > 1]
     def _create_theory(self, dataframe: pd.DataFrame) -> Theory:
-        self.__drop(dataframe)
+        # self.__drop(dataframe)
+        for cube in self._hypercubes:
+            for dimension in cube.dimensions:
+                if abs(cube[dimension][0] - self._surrounding[dimension][0]) < HyperCube.EPSILON * 2:
+                    cube.set_infinite(dimension, '-')
+                if abs(cube[dimension][1] - self._surrounding[dimension][1]) < HyperCube.EPSILON * 2:
+                    cube.set_infinite(dimension, '+')
         if self._default_surrounding_cube:
             self._hypercubes[-1].set_default()
         new_theory = mutable_theory()
         for cube in self._hypercubes:
-            logger.info(cube.output)
-            logger.info(cube.dimensions)
             variables = create_variable_list([], dataframe)
             variables[dataframe.columns[-1]] = to_var(dataframe.columns[-1])
             head = HyperCubeExtractor._create_head(dataframe, list(variables.values()),

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/creepy/__init__.py RENAMED Viewed

@@ -33,4 +33,5 @@ class CReEPy(HyperCubeExtractor):
         self.clustering.fit(dataframe)
         self._hypercubes = self.clustering.get_hypercubes()
+        self._surrounding = self._hypercubes[-1]
         return self._create_theory(dataframe)

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/divine/__init__.py RENAMED Viewed

@@ -59,6 +59,7 @@ class DiViNE(HyperCubeExtractor):
         return idx[np.argmin(distance)][-1]
     def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=Target.CLASSIFICATION)
         np.random.seed(self.seed)
         data = self.__clean(dataframe)

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/gridex/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
 from __future__ import annotations
-import random as rnd
 from itertools import product
 from typing import Iterable
 import numpy as np
@@ -23,13 +22,13 @@ class GridEx(HyperCubeExtractor):
         self.grid = grid
         self.min_examples = min_examples
         self.threshold = threshold
-        self._generator = rnd.Random(seed)
+        np.random.seed(seed)
     def _extract(self, dataframe: pd.DataFrame) -> Theory:
         self._hypercubes = []
-        surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
-        surrounding.init_diversity(2 * self.threshold)
-        self._iterate(surrounding, dataframe)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
+        self._surrounding.init_diversity(2 * self.threshold)
+        self._iterate(dataframe)
         return self._create_theory(dataframe)
     def _create_ranges(self, cube, iteration):
@@ -44,22 +43,22 @@ class GridEx(HyperCubeExtractor):
                 ranges[feature] = [(a + size * i, a + size * (i + 1)) for i in range(n_bins)]
         return ranges
-    def _cubes_to_split(self, cube, surrounding, iteration, dataframe, fake, keep_empty=False):
+    def _cubes_to_split(self, cube, iteration, dataframe, fake, keep_empty=False):
         to_split = []
-        for (pn, p) in enumerate(list(product(*self._create_ranges(cube, iteration).values()))):
+        for p in product(*self._create_ranges(cube, iteration).values()):
             cube = self._default_cube()
             for i, f in enumerate(dataframe.columns[:-1]):
                 cube.update_dimension(f, p[i])
             n = cube.count(dataframe)
             if n > 0 or keep_empty:
-                fake = pd.concat([fake, cube.create_samples(self.min_examples - n, surrounding, self._generator)])
+                fake = pd.concat([fake, cube.create_samples(self.min_examples - n)])
                 cube.update(fake, self.predictor)
                 to_split.append(cube)
         return to_split, fake
-    def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
+    def _iterate(self, dataframe: pd.DataFrame):
         fake = dataframe.copy()
-        prev = [surrounding]
+        prev = [self._surrounding]
         next_iteration = []
         for iteration in self.grid.iterate():
@@ -70,7 +69,7 @@ class GridEx(HyperCubeExtractor):
                 if cube.diversity < self.threshold:
                     self._hypercubes += [cube]
                     continue
-                to_split, fake = self._cubes_to_split(cube, surrounding, iteration, dataframe, fake)
+                to_split, fake = self._cubes_to_split(cube, iteration, dataframe, fake)
                 next_iteration += [c for c in self._merge(to_split, fake)]
             prev = next_iteration.copy()
         self._hypercubes += [cube for cube in next_iteration]

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/hex/__init__.py RENAMED Viewed

@@ -58,9 +58,10 @@ class HEx(GridEx):
                 self.cube.update(dataframe[self.indices(dataframe) & ~idx], predictor)
             return cleaned
-        def linearize(self, dataframe):
-            children = [c.linearize(dataframe) for c in self.permanent_children(dataframe)]
-            return [cc for c in children for cc in c if c != []] + list(self.permanent_children(dataframe))
+        def linearize(self, dataframe, depth=1):
+            children = [c.linearize(dataframe, depth + 1) for c in self.permanent_children(dataframe)]
+            return [(cc, dd) for c in children for cc, dd in c if c != []] + \
+                   [(c, depth) for c in self.permanent_children(dataframe)]
     def __init__(self, predictor, grid: Grid, min_examples: int, threshold: float, output: Target = Target.CONSTANT,
                  discretization=None, normalization=None, seed: int = get_default_random_seed()):
@@ -72,10 +73,10 @@ class HEx(GridEx):
             return parent_cube.output != new_cube.output
         return parent_cube.error - new_cube.error > self.threshold * .6
-    def _iterate(self, surrounding: HyperCube, dataframe: pd.DataFrame):
+    def _iterate(self, dataframe: pd.DataFrame):
         fake = dataframe.copy()
-        surrounding.update(dataframe, self.predictor)
-        root = HEx.Node(surrounding, threshold=self.threshold)
+        self._surrounding.update(dataframe, self.predictor)
+        root = HEx.Node(self._surrounding, threshold=self.threshold)
         current = [root]
         for iteration in self.grid.iterate():
@@ -83,7 +84,7 @@ class HEx(GridEx):
             for node in current:
                 if node.cube.diversity < self.threshold:
                     continue
-                children, fake = self._cubes_to_split(node.cube, surrounding, iteration, dataframe, fake, True)
+                children, fake = self._cubes_to_split(node.cube, iteration, dataframe, fake, True)
                 node.children = [HEx.Node(c, node, threshold=self.threshold) for c in children]
                 cleaned = node.update(fake, self.predictor, False)
                 node.children = [HEx.Node(c, node, threshold=self.threshold) for c in self._merge(
@@ -92,9 +93,12 @@ class HEx(GridEx):
             current = next_iteration.copy()
         _ = root.update(fake, self.predictor, True)
-        self._hypercubes = [c.cube for c in root.linearize(fake)]
+        self._hypercubes = []
+        linearized = root.linearize(fake)
+        for depth in sorted(np.unique([d for (_, d) in linearized]), reverse=True):
+            self._hypercubes += self._merge([c.cube for (c, d) in linearized if d == depth], fake)
         if len(self._hypercubes) == 0:
-            self._hypercubes = [surrounding]
+            self._hypercubes = [self._surrounding]
         elif not min(np.any([c.filter_indices(dataframe.iloc[:, :-1]) for c in self._hypercubes], axis=0)):
-            self._hypercubes = self._hypercubes + [surrounding]
+            self._hypercubes = self._hypercubes + [self._surrounding]

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/hypercube.py RENAMED Viewed

@@ -8,7 +8,7 @@ import pandas as pd
 from numpy import ndarray
 from psyke.extraction.hypercubic.utils import Dimension, Dimensions, MinUpdate, ZippedDimension, Limit, Expansion
-from psyke.schema import Between
+from psyke.schema import Between, GreaterThan, LessThan
 from psyke.utils import get_default_precision, get_int_precision, Target, get_default_random_seed
 from psyke.utils.logic import create_term, to_rounded_real, linear_function_creator
 from sklearn.linear_model import LinearRegression
@@ -68,7 +68,7 @@ class Point:
 class HyperCube:
     """
-    An N-dimensional cube holding a numeric value.
+    An N-dimensional cube holding an output numeric value.
     """
     EPSILON = get_default_precision()  # Precision used when comparing two hypercubes
@@ -83,6 +83,7 @@ class HyperCube:
         self._error = 0.0
         self._barycenter = Point([], [])
         self._default = False
+        self._infinite_dimensions = {}
     def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
         """
@@ -92,17 +93,35 @@ class HyperCube:
         :return: true if the object is inside the hypercube, false otherwise
         """
         if isinstance(obj, HyperCube):
-            return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
-                        for k in obj.dimensions])
+            for k in obj.dimensions:
+                if k not in self._infinite_dimensions:
+                    if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) < self.get_second(k)):
+                        return False
+                elif len(self._infinite_dimensions[k]) == 2:
+                    continue
+                elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
+                    return False
+                elif '-' in self._infinite_dimensions[k] and obj.get_second(k) >= self.get_second(k):
+                    return False
         elif isinstance(obj, dict):
-            return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in obj.items()])
+            for k, v in obj.items():
+                if k not in self._infinite_dimensions:
+                    if not (self.get_first(k) <= v < self.get_second(k)):
+                        return False
+                elif len(self._infinite_dimensions[k]) == 2:
+                    continue
+                elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
+                    return False
+                elif '-' in self._infinite_dimensions[k] and v >= self.get_second(k):
+                    return False
         else:
             raise TypeError("Invalid type for obj parameter")
+        return True
     def __eq__(self, other: HyperCube) -> bool:
         return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
                     & (abs(dimension.this_dimension[1] - dimension.other_dimension[1]) < HyperCube.EPSILON)
-                    for dimension in self._zip_dimensions(other, True)])
+                    for dimension in self._zip_dimensions(other)])
     def __getitem__(self, feature: str) -> Dimension:
         if feature in self._dimensions.keys():
@@ -124,14 +143,16 @@ class HyperCube:
     def set_default(self):
         self._default = True
+    def set_infinite(self, dimension: str, direction: str):
+        if dimension in self._infinite_dimensions:
+            self._infinite_dimensions[dimension].append(direction)
+        else:
+            self._infinite_dimensions[dimension] = [direction]
     @property
     def dimensions(self) -> Dimensions:
         return self._dimensions
-    @property
-    def finite_dimensions(self) -> Dimensions:
-        return {k: v for k, v in self._dimensions.items() if np.isfinite(v[0]) and np.isfinite(v[1])}
     @property
     def limit_count(self) -> int:
         return len(self._limits)
@@ -175,10 +196,8 @@ class HyperCube:
     def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
         return dataset[self.filter_indices(dataset)]
-    def _zip_dimensions(self, other: HyperCube, check_finite: bool = False) -> list[ZippedDimension]:
-        dimensions = set(self.finite_dimensions).union(set(other.finite_dimensions)) if check_finite else \
-            set(self.dimensions)
-        return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in dimensions]
+    def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
+        return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self.dimensions]
     def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
         if isinstance(limit_or_feature, Limit):
@@ -196,9 +215,8 @@ class HyperCube:
             return '*'
         raise Exception('Too many limits for this feature')
-    def create_samples(self, n: int = 1, surrounding: GenericCube = None,
-                       generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
-        return pd.DataFrame([self._create_tuple(generator, surrounding) for _ in range(n)])
+    def create_samples(self, n: int = 1) -> pd.DataFrame:
+        return pd.DataFrame([self._create_tuple() for _ in range(n)])
     @staticmethod
     def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
@@ -218,10 +236,20 @@ class HyperCube:
     def count(self, dataset: pd.DataFrame) -> int:
         return self.filter_dataframe(dataset.iloc[:, :-1]).shape[0]
+    def _interval_to_value(self, dimension, unscale):
+        if dimension not in self._infinite_dimensions:
+            return Between(unscale(self[dimension][0], dimension), unscale(self[dimension][1], dimension))
+        if len(self._infinite_dimensions[dimension]) == 2:
+            return
+        if '+' in self._infinite_dimensions[dimension]:
+            return GreaterThan(unscale(self[dimension][0], dimension))
+        if '-' in self._infinite_dimensions[dimension]:
+            return LessThan(unscale(self[dimension][1], dimension))
     def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
-        dimensions = dict(self.dimensions)
-        return [create_term(variables[name], Between(unscale(values[0], name), unscale(values[1], name)))
-                for name, values in dimensions.items() if name not in ignore and not self.is_default]
+        values = [(dim, self._interval_to_value(dim, unscale)) for dim in self.dimensions if dim not in ignore]
+        return [create_term(variables[name], value) for name, value in values
+                if not self.is_default and value is not None]
     @staticmethod
     def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
@@ -243,10 +271,8 @@ class HyperCube:
             return RegressionCube(dimensions)
         return HyperCube(dimensions)
-    def _create_tuple(self, generator: Random, surrounding: GenericCube) -> dict:
-        minmax = {k: (self[k][0] if np.isfinite(self[k][0]) else surrounding[k][0],
-                      self[k][1] if np.isfinite(self[k][1]) else surrounding[k][1]) for k in self._dimensions.keys()}
-        return {k: generator.uniform(minmax[k][0], minmax[k][1]) for k in self._dimensions.keys()}
+    def _create_tuple(self) -> dict:
+        return {k: np.random.uniform(self[k][0], self[k][1]) for k in self._dimensions.keys()}
     @staticmethod
     def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
@@ -286,12 +312,10 @@ class HyperCube:
         return self[feature][1]
     def has_volume(self) -> bool:
-        return all([dimension[1] - dimension[0] > HyperCube.EPSILON for dimension in self._dimensions.values()
-                    if np.isfinite(dimension[0]) and np.isfinite(dimension[1])])
+        return all([dimension[1] - dimension[0] > HyperCube.EPSILON for dimension in self._dimensions.values()])
     def volume(self) -> float:
-        return reduce(lambda a, b: a * b, [dimension[1] - dimension[0] for dimension in self._dimensions.values()
-                                           if np.isfinite(dimension[0]) and np.isfinite(dimension[1])], 1)
+        return reduce(lambda a, b: a * b, [dimension[1] - dimension[0] for dimension in self._dimensions.values()], 1)
     def diagonal(self) -> float:
         return reduce(
@@ -477,13 +501,31 @@ class ClosedCube(HyperCube):
        :param obj: an N-dimensional object (point or hypercube)
        :return: true if the object is inside the hypercube, false otherwise
         """
-        if isinstance(obj, ClosedCube):
-            return all([(self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k))
-                        for k in obj.dimensions])
+        if isinstance(obj, HyperCube):
+            for k in obj.dimensions:
+                if k not in self._infinite_dimensions:
+                    if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k)):
+                        return False
+                elif len(self._infinite_dimensions[k]) == 2:
+                    continue
+                elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
+                    return False
+                elif '-' in self._infinite_dimensions[k] and obj.get_second(k) > self.get_second(k):
+                    return False
         elif isinstance(obj, dict):
-            return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in obj.items()])
+            for k, v in obj.items():
+                if k not in self._infinite_dimensions:
+                    if not (self.get_first(k) <= v <= self.get_second(k)):
+                        return False
+                elif len(self._infinite_dimensions[k]) == 2:
+                    continue
+                elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
+                    return False
+                elif '-' in self._infinite_dimensions[k] and v > self.get_second(k):
+                    return False
         else:
             raise TypeError("Invalid type for obj parameter")
+        return True
     def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
         v = np.array([v for _, v in self._dimensions.items()])

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/extraction/hypercubic/iter/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
 from __future__ import annotations
-from random import Random
 from typing import Iterable
 import numpy as np
 import pandas as pd
@@ -10,8 +9,6 @@ from psyke.extraction.hypercubic.hypercube import GenericCube
 from psyke.extraction.hypercubic.utils import MinUpdate, Expansion
 from psyke.utils import get_default_random_seed, Target
-DomainProperties = (Iterable[MinUpdate], GenericCube)
 class ITER(HyperCubeExtractor):
     """
@@ -32,14 +29,13 @@ class ITER(HyperCubeExtractor):
         self.fill_gaps = fill_gaps
         self._output = Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else \
             output if output is not None else Target.CONSTANT
-        self.__generator = Random(seed)
+        self.seed = seed
     def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
         expansions = []
         for limit in cubes:
             count = limit.cube.count(dataframe)
-            dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count,
-                                                                        generator=self.__generator)])
+            dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count)])
             limit.cube.update(dataframe, self.predictor)
             expansions.append(Expansion(
                 limit.cube, limit.feature, limit.direction,
@@ -50,24 +46,21 @@ class ITER(HyperCubeExtractor):
             return sorted(expansions, key=lambda e: e.distance)[0]
         return None
-    def _calculate_min_updates(self, surrounding: GenericCube) -> Iterable[MinUpdate]:
+    def _calculate_min_updates(self) -> Iterable[MinUpdate]:
         return [MinUpdate(name, (interval[1] - interval[0]) * self.min_update) for (name, interval) in
-                surrounding.dimensions.items()]
+                self._surrounding.dimensions.items()]
-    @staticmethod
-    def _create_range(cube: GenericCube, domain: DomainProperties, feature: str, direction: str)\
+    def _create_range(self, cube: GenericCube, min_updates: Iterable[MinUpdate], feature: str, direction: str)\
             -> tuple[GenericCube, tuple[float, float]]:
-        min_updates, surrounding = domain
         a, b = cube[feature]
         size = [min_update for min_update in min_updates if min_update.name == feature][0].value
-        return (cube.copy(), (max(a - size, surrounding.get_first(feature)), a)
-                if direction == '-' else (b, min(b + size, surrounding.get_second(feature))))
+        return (cube.copy(), (max(a - size, self._surrounding.get_first(feature)), a)
+                if direction == '-' else (b, min(b + size, self._surrounding.get_second(feature))))
-    @staticmethod
-    def _create_temp_cube(cube: GenericCube, domain: DomainProperties,
+    def _create_temp_cube(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
                           hypercubes: Iterable[GenericCube], feature: str,
                           direction: str) -> Iterable[Expansion]:
-        temp_cube, values = ITER._create_range(cube, domain, feature, direction)
+        temp_cube, values = self._create_range(cube, min_updates, feature, direction)
         temp_cube.update_dimension(feature, values)
         overlap = temp_cube.overlap(hypercubes)
         while (overlap is not None) & (temp_cube.has_volume()):
@@ -77,23 +70,22 @@ class ITER(HyperCubeExtractor):
         else:
             cube.add_limit(feature, direction)
-    @staticmethod
-    def _create_temp_cubes(cube: GenericCube, domain: DomainProperties,
+    def _create_temp_cubes(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
                            hypercubes: Iterable[GenericCube]) -> Iterable[Expansion]:
         tmp_cubes = []
-        for feature in domain[1].dimensions.keys():
+        for feature in self._surrounding.dimensions.keys():
             limit = cube.check_limits(feature)
             if limit == '*':
                 continue
             for x in {'-', '+'} - {limit}:
-                tmp_cubes += ITER._create_temp_cube(cube, domain, hypercubes, feature, x)
+                tmp_cubes += self._create_temp_cube(cube, min_updates, hypercubes, feature, x)
         return tmp_cubes
     def _cubes_to_update(self, dataframe: pd.DataFrame, to_expand: Iterable[GenericCube],
-                         hypercubes: Iterable[GenericCube], domain: DomainProperties) \
+                         hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate]) \
             -> Iterable[tuple[GenericCube, Expansion]]:
         results = [(hypercube, self._best_cube(dataframe, hypercube, self._create_temp_cubes(
-            hypercube, domain, hypercubes))) for hypercube in to_expand]
+            hypercube, min_updates, hypercubes))) for hypercube in to_expand]
         return sorted([result for result in results if result[1] is not None], key=lambda x: x[1].distance)
     def _expand_or_create(self, cube: GenericCube, expansion: Expansion, hypercubes: Iterable[GenericCube]) -> None:
@@ -103,7 +95,7 @@ class ITER(HyperCubeExtractor):
             cube.expand(expansion, hypercubes)
     @staticmethod
-    def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, tuple]:
+    def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, float]:
         if isinstance(output, str):
             close_sample = dataframe[dataframe.iloc[:, -1] == output].iloc[0].to_dict()
         else:
@@ -126,36 +118,32 @@ class ITER(HyperCubeExtractor):
         return [HyperCube.cube_from_point(ITER._find_closer_sample(dataframe, point), output=self._output)
                 for point in points]
-    def _initialize(self, dataframe: pd.DataFrame) -> tuple[Iterable[GenericCube], DomainProperties]:
+    def _initialize(self, dataframe: pd.DataFrame) -> Iterable[MinUpdate]:
         self._fake_dataframe = dataframe.copy()
-        surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
-        min_updates = self._calculate_min_updates(surrounding)
-        self._hypercubes = self._init_hypercubes(dataframe, min_updates, surrounding)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
+        min_updates = self._calculate_min_updates()
+        self._init_hypercubes(dataframe, min_updates)
         for hypercube in self._hypercubes:
             hypercube.update(dataframe, self.predictor)
-        return self._hypercubes, (min_updates, surrounding)
-    def _init_hypercubes(
-            self,
-            dataframe: pd.DataFrame,
-            min_updates: Iterable[MinUpdate],
-            surrounding: GenericCube
-    ) -> Iterable[GenericCube]:
+        return min_updates
+    def _init_hypercubes(self, dataframe: pd.DataFrame, min_updates: Iterable[MinUpdate]):
         while True:
             hypercubes = self._generate_starting_points(dataframe)
             for hypercube in hypercubes:
-                hypercube.expand_all(min_updates, surrounding)
+                hypercube.expand_all(min_updates, self._surrounding)
             self.n_points = self.n_points - 1
             if not HyperCube.check_overlap(hypercubes, hypercubes):
                 break
-        return hypercubes
+        self._hypercubes = hypercubes
-    def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], domain: DomainProperties,
+    def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate],
                  left_iteration: int) -> int:
+        np.random.seed(self.seed)
         iterations = 0
         to_expand = [cube for cube in hypercubes if cube.limit_count < (len(dataframe.columns) - 1) * 2]
         while (len(to_expand) > 0) and (iterations < left_iteration):
-            updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, domain))
+            updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, min_updates))
             if len(updates) > 0:
                 self._expand_or_create(updates[0][0], updates[0][1], hypercubes)
             iterations += 1
@@ -171,12 +159,12 @@ class ITER(HyperCubeExtractor):
         return cube.overlap(hypercubes)
     def _extract(self, dataframe: pd.DataFrame) -> Theory:
-        self._hypercubes, domain = self._initialize(dataframe)
+        min_updates = self._initialize(dataframe)
         temp_train = dataframe.copy()
         fake = dataframe.copy()
         iterations = 0
         while temp_train.shape[0] > 0:
-            iterations += self._iterate(fake, self._hypercubes, domain, self.max_iterations - iterations)
+            iterations += self._iterate(fake, self._hypercubes, min_updates, self.max_iterations - iterations)
             if (iterations >= self.max_iterations) or (not self.fill_gaps):
                 break
             temp_train = temp_train.iloc[[p is None for p in self.predict(temp_train.iloc[:, :-1])]]
@@ -188,7 +176,7 @@ class ITER(HyperCubeExtractor):
                         if not new_cube.has_volume():
                             break
                     new_cube = HyperCube.cube_from_point(point, self._output)
-                    new_cube.expand_all(domain[0], domain[1], ratio)
+                    new_cube.expand_all(min_updates, self._surrounding, ratio)
                     overlap = new_cube.overlap(self._hypercubes)
                     ratio *= 2
                 if new_cube.has_volume():

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/psyke/hypercubepredictor.py RENAMED Viewed

@@ -78,14 +78,11 @@ class HyperCubePredictor(EvaluableModel):
     def _find_cube(self, data: dict[str, float]) -> GenericCube | None:
         for dimension in self._dimensions_to_ignore:
             del data[dimension]
-        found = None
         for cube in self._hypercubes:
             if data in cube:
-                found = cube.copy()
-                break
-        if found is None and self._hypercubes[-1].is_default:
-            found = self._hypercubes[-1].copy()
-        return found
+                return cube.copy()
+        if self._hypercubes[-1].is_default:
+            return self._hypercubes[-1].copy()
     @property
     def n_rules(self):

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2/psyke.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: psyke
-Version: 0.8.2.dev18
+Version: 0.8.3.dev2
 Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
 Home-page: https://github.com/psykei/psyke-python
 Author: Matteo Magnini

{psyke-0.8.2.dev18 → psyke-0.8.3.dev2}/setup.py RENAMED Viewed

@@ -81,7 +81,6 @@ class CreateTestPredictors(distutils.cmd.Command):
         pass
     def run(self):
-        from test.psyke import Predictor
         from psyke.utils import get_default_random_seed
         from psyke.utils.dataframe import get_discrete_dataset
         from sklearn.model_selection import train_test_split
@@ -90,6 +89,7 @@ class CreateTestPredictors(distutils.cmd.Command):
         import ast
         import pandas as pd
         from tensorflow.keras import Model
+        from test import Predictor
         # Read the required predictors to run the tests:
         #   model | model_options | dataset
@@ -105,7 +105,7 @@ class CreateTestPredictors(distutils.cmd.Command):
                 if row['bins'] > 0:
                     schema = get_schema(dataset)  # int(row['bins'])
                     dataset = get_discrete_dataset(dataset.iloc[:, :-1], schema).join(dataset.iloc[:, -1])
-                model = get_model(row['model'], options)
+                model, _ = get_model(row['model'], options)
                 training_set, test_set = train_test_split(dataset, test_size=0.5,
                                                           random_state=get_default_random_seed())
                 if isinstance(model, Model):

psyke 0.8.2.dev18__tar.gz → 0.8.3.dev2__tar.gz

Potentially problematic release.

psyke 0.8.2.dev18tar.gz → 0.8.3.dev2tar.gz