PyPI - psyke - Versions diffs - 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl - Mend

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

psyke/__init__.py +231 -85
psyke/clustering/__init__.py +9 -4
psyke/clustering/cream/__init__.py +6 -10
psyke/clustering/exact/__init__.py +17 -11
psyke/clustering/utils.py +0 -1
psyke/extraction/__init__.py +25 -0
psyke/extraction/cart/CartPredictor.py +128 -0
psyke/extraction/cart/FairTree.py +205 -0
psyke/extraction/cart/FairTreePredictor.py +56 -0
psyke/extraction/cart/__init__.py +48 -62
psyke/extraction/hypercubic/__init__.py +187 -47
psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
psyke/extraction/hypercubic/creepy/__init__.py +24 -29
psyke/extraction/hypercubic/divine/__init__.py +86 -0
psyke/extraction/hypercubic/ginger/__init__.py +100 -0
psyke/extraction/hypercubic/gridex/__init__.py +45 -84
psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
psyke/extraction/hypercubic/hex/__init__.py +104 -0
psyke/extraction/hypercubic/hypercube.py +275 -72
psyke/extraction/hypercubic/iter/__init__.py +45 -46
psyke/extraction/hypercubic/strategy.py +13 -9
psyke/extraction/real/__init__.py +24 -29
psyke/extraction/real/utils.py +2 -2
psyke/extraction/trepan/__init__.py +24 -19
psyke/genetic/__init__.py +0 -0
psyke/genetic/fgin/__init__.py +74 -0
psyke/genetic/gin/__init__.py +144 -0
psyke/hypercubepredictor.py +102 -0
psyke/schema/__init__.py +230 -36
psyke/tuning/__init__.py +40 -28
psyke/tuning/crash/__init__.py +33 -64
psyke/tuning/orchid/__init__.py +21 -23
psyke/tuning/pedro/__init__.py +70 -56
psyke/utils/logic.py +8 -8
psyke/utils/plot.py +79 -3
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
psyke-1.0.4.dev10.dist-info/RECORD +46 -0
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
psyke/extraction/cart/predictor.py +0 -73
psyke-0.4.9.dev6.dist-info/RECORD +0 -36
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0

psyke/extraction/hypercubic/iter/__init__.py CHANGED Viewed

@@ -1,18 +1,14 @@
 from __future__ import annotations
-from random import Random
 from typing import Iterable
 import numpy as np
 import pandas as pd
 from sklearn.base import ClassifierMixin
 from tuprolog.theory import Theory
-from psyke import PedagogicalExtractor
 from psyke.extraction.hypercubic import HyperCube, HyperCubeExtractor
 from psyke.extraction.hypercubic.hypercube import GenericCube
 from psyke.extraction.hypercubic.utils import MinUpdate, Expansion
 from psyke.utils import get_default_random_seed, Target
-DomainProperties = (Iterable[MinUpdate], GenericCube)
 class ITER(HyperCubeExtractor):
     """
@@ -20,12 +16,14 @@ class ITER(HyperCubeExtractor):
     """
     def __init__(self, predictor, min_update, n_points, max_iterations, min_examples, threshold, fill_gaps,
-                 normalization, output: Target = Target.CONSTANT, seed=get_default_random_seed()):
-        super().__init__(predictor, output, normalization)
+                 ignore_dimensions: Iterable, normalization, output: Target = Target.CONSTANT,
+                 seed=get_default_random_seed()):
+        super().__init__(predictor, output, normalization=normalization)
         if output is Target.REGRESSION:
             raise NotImplementedError
         self.predictor = predictor
         self.min_update = min_update
+        self._init_points = n_points
         self.n_points = n_points
         self.max_iterations = max_iterations
         self.min_examples = min_examples
@@ -33,13 +31,18 @@ class ITER(HyperCubeExtractor):
         self.fill_gaps = fill_gaps
         self._output = Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else \
             output if output is not None else Target.CONSTANT
-        self.__generator = Random(seed)
+        self.seed = seed
+        self.ignore_dimensions = ignore_dimensions if ignore_dimensions is not None else []
+    def make_fair(self, features: Iterable[str]):
+        self.n_points = self._init_points
+        self.ignore_dimensions += list(features)
     def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
         expansions = []
         for limit in cubes:
             count = limit.cube.count(dataframe)
-            dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count, self.__generator)])
+            dataframe = pd.concat([dataframe, limit.cube.create_samples(self.min_examples - count)])
             limit.cube.update(dataframe, self.predictor)
             expansions.append(Expansion(
                 limit.cube, limit.feature, limit.direction,
@@ -50,24 +53,21 @@ class ITER(HyperCubeExtractor):
             return sorted(expansions, key=lambda e: e.distance)[0]
         return None
-    def _calculate_min_updates(self, surrounding: GenericCube) -> Iterable[MinUpdate]:
+    def _calculate_min_updates(self) -> Iterable[MinUpdate]:
         return [MinUpdate(name, (interval[1] - interval[0]) * self.min_update) for (name, interval) in
-                surrounding.dimensions.items()]
+                self._surrounding.dimensions.items()]
-    @staticmethod
-    def _create_range(cube: GenericCube, domain: DomainProperties, feature: str, direction: str)\
+    def _create_range(self, cube: GenericCube, min_updates: Iterable[MinUpdate], feature: str, direction: str)\
             -> tuple[GenericCube, tuple[float, float]]:
-        min_updates, surrounding = domain
         a, b = cube[feature]
         size = [min_update for min_update in min_updates if min_update.name == feature][0].value
-        return (cube.copy(), (max(a - size, surrounding.get_first(feature)), a)
-                if direction == '-' else (b, min(b + size, surrounding.get_second(feature))))
+        return (cube.copy(), (max(a - size, self._surrounding.get_first(feature)), a)
+                if direction == '-' else (b, min(b + size, self._surrounding.get_second(feature))))
-    @staticmethod
-    def _create_temp_cube(cube: GenericCube, domain: DomainProperties,
+    def _create_temp_cube(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
                           hypercubes: Iterable[GenericCube], feature: str,
                           direction: str) -> Iterable[Expansion]:
-        temp_cube, values = ITER._create_range(cube, domain, feature, direction)
+        temp_cube, values = self._create_range(cube, min_updates, feature, direction)
         temp_cube.update_dimension(feature, values)
         overlap = temp_cube.overlap(hypercubes)
         while (overlap is not None) & (temp_cube.has_volume()):
@@ -77,23 +77,24 @@ class ITER(HyperCubeExtractor):
         else:
             cube.add_limit(feature, direction)
-    @staticmethod
-    def _create_temp_cubes(cube: GenericCube, domain: DomainProperties,
+    def _create_temp_cubes(self, cube: GenericCube, min_updates: Iterable[MinUpdate],
                            hypercubes: Iterable[GenericCube]) -> Iterable[Expansion]:
         tmp_cubes = []
-        for feature in domain[1].dimensions.keys():
+        for feature in self._surrounding.dimensions.keys():
+            if feature in self.ignore_dimensions:
+                continue
             limit = cube.check_limits(feature)
             if limit == '*':
                 continue
             for x in {'-', '+'} - {limit}:
-                tmp_cubes += ITER._create_temp_cube(cube, domain, hypercubes, feature, x)
+                tmp_cubes += self._create_temp_cube(cube, min_updates, hypercubes, feature, x)
         return tmp_cubes
     def _cubes_to_update(self, dataframe: pd.DataFrame, to_expand: Iterable[GenericCube],
-                         hypercubes: Iterable[GenericCube], domain: DomainProperties) \
+                         hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate]) \
             -> Iterable[tuple[GenericCube, Expansion]]:
         results = [(hypercube, self._best_cube(dataframe, hypercube, self._create_temp_cubes(
-            hypercube, domain, hypercubes))) for hypercube in to_expand]
+            hypercube, min_updates, hypercubes))) for hypercube in to_expand]
         return sorted([result for result in results if result[1] is not None], key=lambda x: x[1].distance)
     def _expand_or_create(self, cube: GenericCube, expansion: Expansion, hypercubes: Iterable[GenericCube]) -> None:
@@ -103,7 +104,7 @@ class ITER(HyperCubeExtractor):
             cube.expand(expansion, hypercubes)
     @staticmethod
-    def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, tuple]:
+    def _find_closer_sample(dataframe: pd.DataFrame, output: float | str) -> dict[str, float]:
         if isinstance(output, str):
             close_sample = dataframe[dataframe.iloc[:, -1] == output].iloc[0].to_dict()
         else:
@@ -126,36 +127,34 @@ class ITER(HyperCubeExtractor):
         return [HyperCube.cube_from_point(ITER._find_closer_sample(dataframe, point), output=self._output)
                 for point in points]
-    def _initialize(self, dataframe: pd.DataFrame) -> tuple[Iterable[GenericCube], DomainProperties]:
+    def _initialize(self, dataframe: pd.DataFrame) -> Iterable[MinUpdate]:
         self._fake_dataframe = dataframe.copy()
-        surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
-        min_updates = self._calculate_min_updates(surrounding)
-        self._hypercubes = self._init_hypercubes(dataframe, min_updates, surrounding)
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, output=self._output)
+        min_updates = self._calculate_min_updates()
+        self._init_hypercubes(dataframe, min_updates)
         for hypercube in self._hypercubes:
             hypercube.update(dataframe, self.predictor)
-        return self._hypercubes, (min_updates, surrounding)
-    def _init_hypercubes(
-            self,
-            dataframe: pd.DataFrame,
-            min_updates: Iterable[MinUpdate],
-            surrounding: GenericCube
-    ) -> Iterable[GenericCube]:
+        return min_updates
+    def _init_hypercubes(self, dataframe: pd.DataFrame, min_updates: Iterable[MinUpdate]):
         while True:
             hypercubes = self._generate_starting_points(dataframe)
             for hypercube in hypercubes:
-                hypercube.expand_all(min_updates, surrounding)
+                hypercube.expand_all(min_updates, self._surrounding)
+                for d in self.ignore_dimensions:
+                    hypercube[d] = self._surrounding[d]
             self.n_points = self.n_points - 1
             if not HyperCube.check_overlap(hypercubes, hypercubes):
                 break
-        return hypercubes
+        self._hypercubes = hypercubes
-    def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], domain: DomainProperties,
+    def _iterate(self, dataframe: pd.DataFrame, hypercubes: Iterable[GenericCube], min_updates: Iterable[MinUpdate],
                  left_iteration: int) -> int:
+        np.random.seed(self.seed)
         iterations = 0
         to_expand = [cube for cube in hypercubes if cube.limit_count < (len(dataframe.columns) - 1) * 2]
         while (len(to_expand) > 0) and (iterations < left_iteration):
-            updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, domain))
+            updates = list(self._cubes_to_update(dataframe, to_expand, hypercubes, min_updates))
             if len(updates) > 0:
                 self._expand_or_create(updates[0][0], updates[0][1], hypercubes)
             iterations += 1
@@ -170,13 +169,13 @@ class ITER(HyperCubeExtractor):
                               min(overlapping_cube.get_first(feature), b) if direction == '+' else b)
         return cube.overlap(hypercubes)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        self._hypercubes, domain = self._initialize(dataframe)
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        min_updates = self._initialize(dataframe)
         temp_train = dataframe.copy()
         fake = dataframe.copy()
         iterations = 0
         while temp_train.shape[0] > 0:
-            iterations += self._iterate(fake, self._hypercubes, domain, self.max_iterations - iterations)
+            iterations += self._iterate(fake, self._hypercubes, min_updates, self.max_iterations - iterations)
             if (iterations >= self.max_iterations) or (not self.fill_gaps):
                 break
             temp_train = temp_train.iloc[[p is None for p in self.predict(temp_train.iloc[:, :-1])]]
@@ -188,9 +187,9 @@ class ITER(HyperCubeExtractor):
                         if not new_cube.has_volume():
                             break
                     new_cube = HyperCube.cube_from_point(point, self._output)
-                    new_cube.expand_all(domain[0], domain[1], ratio)
+                    new_cube.expand_all(min_updates, self._surrounding, ratio)
                     overlap = new_cube.overlap(self._hypercubes)
                     ratio *= 2
                 if new_cube.has_volume():
                     self._hypercubes += [new_cube]
-        return self._create_theory(dataframe, sort)
+        return self._create_theory(dataframe)

psyke/extraction/hypercubic/strategy.py CHANGED Viewed

@@ -1,16 +1,20 @@
 from __future__ import annotations
 from functools import reduce
-from typing import Iterable
+from collections.abc import Iterable
 class Strategy:
-    def __init__(self):
-        self._partitions = None
+    def __init__(self, partitions = None):
+        self._partitions = partitions
+        self._no_features = []
     def get(self, feature: str) -> int:
         raise NotImplementedError
+    def make_fair(self, features: Iterable[str]):
+        self._no_features = features
     def partition_number(self, features: Iterable[str]) -> int:
         return reduce(lambda x, y: x * y, map(self.get, features), 1)
@@ -29,23 +33,23 @@ class Strategy:
 class FixedStrategy(Strategy):
     def __init__(self, partitions: int = 2):
-        super().__init__()
-        self._partitions = partitions
+        super().__init__(partitions)
     def get(self, feature: str) -> int:
-        return self._partitions
+        return 1 if feature in self._no_features else self._partitions
     def __str__(self):
         return "Fixed ({})".format(super().__str__())
 class AdaptiveStrategy(Strategy):
-    def __init__(self, features: Iterable[str], partitions: Iterable[tuple[float, float]] | None = None):
-        super().__init__()
+    def __init__(self, features: Iterable[(str, float)], partitions: Iterable[tuple[float, float]] | None = None):
+        super().__init__(partitions if partitions is not None else [(0.33, 2), (0.67, 3)])
         self.features = features
-        self._partitions = partitions if partitions is not None else [(0.33, 2), (0.67, 3)]
     def get(self, feature: str) -> int:
+        if feature in self._no_features:
+            return 1
         importance = next(filter(lambda t: t[0] == feature, self.features))[1]
         n = 1
         for (imp, part) in self._partitions:

psyke/extraction/real/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from functools import lru_cache
+from psyke.extraction import PedagogicalExtractor
 from psyke.extraction.real.utils import Rule, IndexedRuleSet
-from psyke import PedagogicalExtractor
 from psyke.schema import DiscreteFeature
 from psyke.utils.dataframe import HashableDataFrame
 from psyke.utils.logic import create_term, create_head, create_variable_list
@@ -15,13 +15,12 @@ class REAL(PedagogicalExtractor):
     """
     Explanator implementing Rule Extraction As Learning (REAL) algorithm, doi:10.1016/B978-1-55860-335-6.50013-1.
     The algorithm is sensible to features' order in the provided dataset during extraction.
-    To make it reproducible the features are internally sorted (alphabetically).
     """
     def __init__(self, predictor, discretization: Iterable[DiscreteFeature]):
         super().__init__(predictor, discretization)
+        self._ignore_feature = []
         self._ruleset: IndexedRuleSet = IndexedRuleSet()
-        self._output_mapping = {}
     @property
     def n_rules(self):
@@ -31,7 +30,7 @@ class REAL(PedagogicalExtractor):
         new_rule = self._rule_from_example(sample)
         return any([new_rule in rule for rule in rules])
-    def _create_body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
+    def _body(self, variables: dict[str, Var], rule: Rule) -> list[Struct]:
         result = []
         for predicates, truth_value in zip(rule.to_lists(), [True, False]):
             for predicate in predicates:
@@ -40,28 +39,25 @@ class REAL(PedagogicalExtractor):
         return result
     def _create_clause(self, dataset: pd.DataFrame, variables: dict[str, Var], key: int, rule: Rule) -> Clause:
-        head = create_head(dataset.columns[-1],
-                           sorted(list(variables.values())),
-                           str(sorted(list(set(dataset.iloc[:, -1])))[key]))
-        return clause(head, self._create_body(variables, rule))
+        return clause(create_head(dataset.columns[-1], list(variables.values()), key), self._body(variables, rule))
     def _create_new_rule(self, sample: pd.Series) -> Rule:
         rule = self._rule_from_example(sample)
         return self._generalise(rule, sample)
     def _create_ruleset(self, dataset: pd.DataFrame) -> IndexedRuleSet:
-        ruleset = IndexedRuleSet.create_indexed_ruleset(dataset)
-        for index, sample in dataset.iloc[:, :-1].iterrows():
+        ruleset = IndexedRuleSet.create_indexed_ruleset(sorted(set(dataset.iloc[:, -1])))
+        for _, sample in dataset.iloc[:, :-1].iterrows():
             prediction = list(self.predictor.predict(sample.to_frame().transpose()))[0]
-            rules = ruleset.get(self._output_mapping[prediction])
+            rules = ruleset.get(prediction)
             if not self._covers(sample, rules):
                 rules.append(self._create_new_rule(sample))
         return ruleset.optimize()
-    def _create_theory(self, dataset: pd.DataFrame, ruleset: IndexedRuleSet, sort: bool = True) -> MutableTheory:
+    def _create_theory(self, dataset: pd.DataFrame) -> MutableTheory:
         theory = mutable_theory()
-        for key, rule in ruleset.flatten():
-            variables = create_variable_list(self.discretization, sort=sort)
+        for key, rule in self._ruleset.flatten():
+            variables = create_variable_list(self.discretization)
             theory.assertZ(self._create_clause(dataset, variables, key, rule))
         return theory
@@ -92,16 +88,22 @@ class REAL(PedagogicalExtractor):
         return self._create_ruleset(dataset)
     def _internal_predict(self, sample: pd.Series):
-        x = [index for index, rule in self._ruleset.flatten() if REAL._rule_from_example(sample) in rule]
-        reverse_mapping = dict((v, k) for k, v in self._output_mapping.items())
-        return reverse_mapping[x[0]] if len(x) > 0 else None
+        x = [index for index, rule in self._ruleset.flatten() if self._rule_from_example(sample) in rule]
+        return x[0] if x else None
-    @staticmethod
-    def _rule_from_example(sample: pd.Series) -> Rule:
+    def make_fair(self, features: Iterable[str]):
+        self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
+            if self.discretization else [features]
+        self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
+        self._get_or_set.cache_clear()
+    def _rule_from_example(self, sample: pd.Series) -> Rule:
         true_predicates, false_predicates = [], []
         for feature, value in sample.items():
+            if feature in self._ignore_feature:
+                continue
             true_predicates.append(str(feature)) if value == 1 else false_predicates.append(str(feature))
-        return Rule(sorted(true_predicates), sorted(false_predicates))
+        return Rule(true_predicates, false_predicates)
     def _subset(self, samples: pd.DataFrame, predicate: str) -> (pd.DataFrame, bool):
         samples_0 = samples.copy()
@@ -111,16 +113,9 @@ class REAL(PedagogicalExtractor):
         samples_all = samples_0.append(samples_1)
         return samples_all, len(set(self.predictor.predict(samples_all))) == 1
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        # Order the dataset by column to preserve reproducibility.
-        dataframe = dataframe.sort_values(by=list(dataframe.columns.values), ascending=False)
-        # Always perform output mapping in the same (sorted) way to preserve reproducibility.
-        if mapping is None:
-            self._output_mapping = {value: index for index, value in enumerate(sorted(set(dataframe.iloc[:, -1])))}
-        else:
-            self._output_mapping = {value: index for index, value in enumerate(sorted(set(mapping[dataframe.iloc[:, -1]])))}
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         self._ruleset = self._get_or_set(HashableDataFrame(dataframe))
-        return self._create_theory(dataframe, self._ruleset, sort)
+        return self._create_theory(dataframe)
     def _predict(self, dataframe) -> Iterable:
         return np.array([self._internal_predict(data.transpose()) for _, data in dataframe.iterrows()])

psyke/extraction/real/utils.py CHANGED Viewed

@@ -49,5 +49,5 @@ class IndexedRuleSet(dict[int, list[Rule]]):
         ]
     @staticmethod
-    def create_indexed_ruleset(dataset: pd.DataFrame) -> IndexedRuleSet:
-        return IndexedRuleSet({index: [] for index, _ in enumerate(set(dataset.iloc[:, -1]))})
+    def create_indexed_ruleset(indices: Iterable) -> IndexedRuleSet:
+        return IndexedRuleSet({i: [] for i in indices})

psyke/extraction/trepan/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import numpy as np
+from psyke.extraction import PedagogicalExtractor
 from psyke.extraction.trepan.utils import Node, Split, SplitLogic
-from psyke import DiscreteFeature, PedagogicalExtractor
+from psyke import DiscreteFeature
 from psyke.utils.logic import create_term, create_variable_list, create_head
 from psyke.utils.sorted import SortedList
 from tuprolog.core import Var, Struct, clause
@@ -14,11 +15,17 @@ class Trepan(PedagogicalExtractor):
     def __init__(self, predictor, discretization: Iterable[DiscreteFeature], min_examples: int = 0, max_depth: int = 3,
                  split_logic: SplitLogic = SplitLogic.DEFAULT):
         super().__init__(predictor, discretization)
+        self._ignore_feature = []
         self.min_examples = min_examples
         self.max_depth = max_depth
         self.split_logic = split_logic
         self._root: Node
+    def make_fair(self, features: Iterable[str]):
+        self._ignore_feature = [list(i.admissible_values.keys()) for i in self.discretization if i.name in features] \
+            if self.discretization else [features]
+        self._ignore_feature = [feature for features in self._ignore_feature for feature in features]
     @property
     def n_rules(self):
         return sum(1 for _ in self._root)
@@ -28,7 +35,7 @@ class Trepan(PedagogicalExtractor):
             raise NotImplementedError()
         if node.n_classes == 1:
             return None
-        splits = Trepan._create_splits(node, names)
+        splits = self._create_splits(node, names)
         return None if len(splits) == 0 or splits[0].children[0].depth > self.max_depth else splits[0].children
     def _compact(self):
@@ -54,28 +61,26 @@ class Trepan(PedagogicalExtractor):
     def _create_split(node: Node, column: str) -> Union[Split, None]:
         true_examples = Trepan._create_samples(node, column, 1.0)
         false_examples = Trepan._create_samples(node, column, 0.0)
-        true_constrains = list(node.constraints) + [(column, 1.0)]
-        false_constrains = list(node.constraints) + [(column, 0.0)]
-        true_node = Node(true_examples, node.n_examples, true_constrains, depth=node.depth + 1)\
+        true_constraints = list(node.constraints) + [(column, 1.0)]
+        false_constraints = list(node.constraints) + [(column, 0.0)]
+        true_node = Node(true_examples, node.n_examples, true_constraints, depth=node.depth + 1) \
             if true_examples.shape[0] > 0 else None
-        false_node = Node(false_examples, node.n_examples, false_constrains, depth=node.depth + 1)\
+        false_node = Node(false_examples, node.n_examples, false_constraints, depth=node.depth + 1) \
             if false_examples.shape[0] > 0 else None
         return None if true_node is None or false_node is None else Split(node, (true_node, false_node))
-    @staticmethod
-    def _create_splits(node: Node, names: Iterable[str]) -> SortedList[Split]:
-        splits, constrains = Trepan._init_splits(node)
-        for column in names:
-            if column not in constrains:
-                split = Trepan._create_split(node, column)
-                if split is not None:
-                    splits.add(split)
+    def _create_splits(self, node: Node, names: Iterable[str]) -> SortedList[Split]:
+        splits, constraints = Trepan._init_splits(node)
+        for column in [column for column in names if column not in list(constraints) + self._ignore_feature]:
+            split = Trepan._create_split(node, column)
+            if split is not None:
+                splits.add(split)
         return splits
-    def _create_theory(self, name: str, sort: bool = True) -> MutableTheory:
+    def _create_theory(self, name: str) -> MutableTheory:
         theory = mutable_theory()
         for node in self._root:
-            variables = create_variable_list(self.discretization, sort=sort)
+            variables = create_variable_list(self.discretization)
             theory.assertZ(
                 clause(
                     create_head(name, list(variables.values()), str(node.dominant)),
@@ -116,7 +121,7 @@ class Trepan(PedagogicalExtractor):
                     continue
             if not skip:
                 return Trepan._internal_predict(x, child, categories)
-        return node.dominant  # Alternatively node.dominant index in categories
+        return node.dominant
     def _optimize(self) -> None:
         n, nodes = 0, [self._root]
@@ -135,7 +140,7 @@ class Trepan(PedagogicalExtractor):
                 nodes.append(child)
         return len(to_remove)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
         queue = self._init(dataframe)
         while len(queue) > 0:
             node = queue.pop()
@@ -148,7 +153,7 @@ class Trepan(PedagogicalExtractor):
             queue.add_all(best)
             node.children += list(best)
         self._optimize()
-        return self._create_theory(dataframe.columns[-1], sort)
+        return self._create_theory(dataframe.columns[-1])
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         return np.array(

psyke/genetic/__init__.py ADDED Viewed

File without changes

psyke/genetic/fgin/__init__.py ADDED Viewed

@@ -0,0 +1,74 @@
+import numpy as np
+import pandas as pd
+from psyke import Target
+from psyke.genetic.gin import GIn
+import skfuzzy as skf
+class FGIn(GIn):
+    def __init__(self, train, valid, features, sigmas, slices, min_rules=1, poly=1, alpha=0.5, indpb=0.5, tournsize=3,
+                 metric='R2', output=Target.REGRESSION, warm=False):
+        super().__init__(train, valid, features, sigmas, slices, min_rules, poly, alpha, indpb, tournsize,
+                         metric, output, warm)
+        self.feature_to_idx = {f: i for i, f in enumerate(self.X.columns)}
+    def _evaluate(self, individual=None):
+        y_pred, valid_regions = self.__predict(individual or self.best, self.X if self.valid is None else self.valid[0])
+        if valid_regions < self.min_rules:
+            return -9999,
+        return self._score(self.y if self.valid is None else self.valid[1], y_pred),
+    @staticmethod
+    def __generate_membership(var, domain, thresholds, shape='tri'):
+        th = [var.min()] + [min(max(t, var.min()), var.max()) for t in thresholds] + [var.max()]
+        if shape == 'tri':
+            mid = [(x1 + x2) / 2 for x1, x2 in zip(th[:-1], th[1:])]
+            return [skf.trapmf(domain, [domain.min()] * 2 + mid[:2])] + \
+                   [skf.trimf(domain, [x1, x2, x3]) for x1, x2, x3 in zip(mid[:-2], mid[1:-1], mid[2:])] + \
+                   [skf.trapmf(domain, mid[-2:] + [domain.max()] * 2)]
+        if shape == 'trap':
+            beg = [None, domain.min()] + [(3 * x1 + x2) / 4 for x1, x2 in zip(th[1:-1], th[2:])] + [domain.max()]
+            end = [domain.min()] + [(x1 + 3 * x2) / 4 for x1, x2 in zip(th[:-2], th[1:-1])] + [domain.max()]
+            return [skf.trapmf(domain, [end[i - 1], beg[i], end[i], beg[i + 1]]) for i in range(1, len(th))]
+        raise ValueError('Supported shape values are only \'tri\' and \'trap\'')
+    @staticmethod
+    def __extend_domain(x, q_low=0.05, q_high=0.95, p=0.05, k_sigma=2.0, abs_min_margin=0.0):
+        ql, qh = np.quantile(x, [q_low, q_high])
+        margin = max(p * (qh - ql), k_sigma * np.std(x), abs_min_margin)
+        return np.array([ql - margin, qh + margin])
+    def __get_activations(self, x, functions_domains, valid_masks):
+        levels = [np.array([skf.interp_membership(domain, mf, x[index]) for mf in mfs])
+                  for mfs, domain, index in functions_domains.values()]
+        return np.prod(np.meshgrid(*levels, indexing='ij'), axis=0).ravel()[valid_masks]
+    def __fuzzify(self, cuts):
+        cuts = dict(zip(self.features, cuts))
+        doms = {c: FGIn.__extend_domain(self.X[c]) for c in self.features}
+        return {c: (FGIn.__generate_membership(self.X[c], doms[c], cuts[c], 'trap'), doms[c],
+                    self.feature_to_idx[c]) for c in self.features}
+    def __predict(self, individual=None, to_pred=None):
+        cuts = self._get_cuts(individual or self.best)
+        masks = np.array([self._region(to_pred, cuts) == r for r in range(np.prod([s + 1 for s in self.slices]))])
+        valid_masks = masks.sum(axis=1) >= 3
+        masks = [mask for mask in masks if mask.sum() >= 3]
+        functions_domains = self.__fuzzify(cuts)
+        pred = np.array([self._output_estimation(mask, to_pred) for mask in masks]).T
+        activations = np.array([self.__get_activations(x, functions_domains, valid_masks) for x in to_pred.values])
+        if self.output == Target.CLASSIFICATION:
+            classes, idx = np.unique(pred, return_inverse=True)
+            pred = classes[np.argmax(np.vstack([activations[:, idx == i].sum(axis=1) for i, c in enumerate(classes)]),
+                                     axis=0)]
+        else:
+            pred = (pred * activations).sum(axis=1)
+        return pd.DataFrame(pred, index=to_pred.index), len(masks)

psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl