PyPI - psyke - Versions diffs - 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl - Mend

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

psyke/__init__.py +231 -85
psyke/clustering/__init__.py +9 -4
psyke/clustering/cream/__init__.py +6 -10
psyke/clustering/exact/__init__.py +17 -11
psyke/clustering/utils.py +0 -1
psyke/extraction/__init__.py +25 -0
psyke/extraction/cart/CartPredictor.py +128 -0
psyke/extraction/cart/FairTree.py +205 -0
psyke/extraction/cart/FairTreePredictor.py +56 -0
psyke/extraction/cart/__init__.py +48 -62
psyke/extraction/hypercubic/__init__.py +187 -47
psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
psyke/extraction/hypercubic/creepy/__init__.py +24 -29
psyke/extraction/hypercubic/divine/__init__.py +86 -0
psyke/extraction/hypercubic/ginger/__init__.py +100 -0
psyke/extraction/hypercubic/gridex/__init__.py +45 -84
psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
psyke/extraction/hypercubic/hex/__init__.py +104 -0
psyke/extraction/hypercubic/hypercube.py +275 -72
psyke/extraction/hypercubic/iter/__init__.py +45 -46
psyke/extraction/hypercubic/strategy.py +13 -9
psyke/extraction/real/__init__.py +24 -29
psyke/extraction/real/utils.py +2 -2
psyke/extraction/trepan/__init__.py +24 -19
psyke/genetic/__init__.py +0 -0
psyke/genetic/fgin/__init__.py +74 -0
psyke/genetic/gin/__init__.py +144 -0
psyke/hypercubepredictor.py +102 -0
psyke/schema/__init__.py +230 -36
psyke/tuning/__init__.py +40 -28
psyke/tuning/crash/__init__.py +33 -64
psyke/tuning/orchid/__init__.py +21 -23
psyke/tuning/pedro/__init__.py +70 -56
psyke/utils/logic.py +8 -8
psyke/utils/plot.py +79 -3
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
psyke-1.0.4.dev10.dist-info/RECORD +46 -0
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
psyke/extraction/cart/predictor.py +0 -73
psyke-0.4.9.dev6.dist-info/RECORD +0 -36
{psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0

psyke/__init__.py CHANGED Viewed

@@ -5,17 +5,20 @@ from enum import Enum
 import numpy as np
 import pandas as pd
-from numpy import argmax
+from matplotlib import pyplot as plt
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, f1_score, accuracy_score, \
     adjusted_rand_score, adjusted_mutual_info_score, v_measure_score, fowlkes_mallows_score
+from tuprolog.solve.prolog import prolog_solver
 from psyke.schema import DiscreteFeature
 from psyke.utils import get_default_random_seed, Target, get_int_precision
-from tuprolog.theory import Theory
+from tuprolog.theory import Theory, mutable_theory
 from typing import Iterable
 import logging
+from psyke.utils.logic import get_in_rule, data_to_struct, get_not_in_rule
 logging.basicConfig(level=logging.WARN)
 logger = logging.getLogger('psyke')
@@ -45,28 +48,36 @@ class EvaluableModel(object):
         V = 3,
         FMI = 4
-    def __init__(self, normalization=None):
+    def __init__(self, discretization=None, normalization=None):
+        self.discretization = [] if discretization is None else list(discretization)
         self.normalization = normalization
-    def predict(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None) -> Iterable:
+    def predict(self, dataframe: pd.DataFrame) -> Iterable:
         """
         Predicts the output values of every sample in dataset.
-        :param dataframe: is the set of instances to predict.
-        :param mapping: for one-hot encoding.
+        :param dataframe: the set of instances to predict.
         :return: a list of predictions.
         """
-        ys = self._predict(dataframe)
-        if mapping is not None:
-            inverse_mapping = {v: k for k, v in mapping.items()}
-            ys = [inverse_mapping[y] for y in ys]
-        return ys
+        return self.__convert(self._predict(dataframe))
     def _predict(self, dataframe: pd.DataFrame) -> Iterable:
         raise NotImplementedError('predict')
+    def __convert(self, ys: Iterable) -> Iterable:
+        if self.normalization is not None and len(ys) > 0 and not isinstance([p for p in ys if p is not None][0], str):
+            m, s = self.normalization[list(self.normalization.keys())[-1]]
+            ys = [prediction if prediction is None else prediction * s + m for prediction in ys]
+        return ys
+    def brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
+        return self.__convert(self._brute_predict(dataframe, criterion, n))
+    def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
+        raise NotImplementedError('brute_predict')
     def unscale(self, values, name):
-        if self.normalization is None or isinstance(values, LinearRegression):
+        if self.normalization is None or name not in self.normalization or isinstance(values, LinearRegression):
             return values
         if isinstance(values, Iterable):
             values = [None if value is None else
@@ -76,9 +87,13 @@ class EvaluableModel(object):
         return values
     def score(self, dataframe: pd.DataFrame, predictor=None, fidelity: bool = False, completeness: bool = True,
+              brute: bool = False, criterion: str = 'corners', n: int = 2,
               task: EvaluableModel.Task = Task.CLASSIFICATION,
-              scoring_function: Iterable[EvaluableModel.Score] = [ClassificationScore.ACCURACY]):
-        extracted = np.array(self.predict(dataframe.iloc[:, :-1]))
+              scoring_function: Iterable[EvaluableModel.Score] = (ClassificationScore.ACCURACY, )):
+        extracted = np.array(
+            self.predict(dataframe.iloc[:, :-1]) if not brute else
+            self.brute_predict(dataframe.iloc[:, :-1], criterion, n)
+        )
         idx = [prediction is not None for prediction in extracted]
         y_extracted = extracted[idx]
         true = [dataframe.iloc[idx, -1]]
@@ -134,110 +149,263 @@ class Extractor(EvaluableModel, ABC):
     ----------
     predictor : the underling black box predictor.
     discretization : A collection of sets of discretised features.
-        Each set corresponds to a set of features derived from a single non-discrete feature.
+    Each set corresponds to a set of features derived from a single non-discrete feature.
     """
     def __init__(self, predictor, discretization: Iterable[DiscreteFeature] = None, normalization=None):
-        super().__init__(normalization)
+        super().__init__(discretization, normalization)
         self.predictor = predictor
-        self.discretization = [] if discretization is None else list(discretization)
+        self.theory = None
-    def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
         """
         Extracts rules from the underlying predictor.
-        :param dataframe: is the set of instances to be used for the extraction.
-        :param mapping: for one-hot encoding.
-        :param sort: alphabetically sort the variables of the head of the rules.
+        :param dataframe: the set of instances to be used for the extraction.
         :return: the theory created from the extracted rules.
         """
         raise NotImplementedError('extract')
-    def mae(self, dataframe: pd.DataFrame, predictor=None) -> float:
+    def predict_why(self, data: dict[str, float], verbose: bool = True):
+        """
+        Provides a prediction and the corresponding explanation.
+        :param data: the instance to predict.
+        :param verbose: if True the explanation is printed.
+        """
+        raise NotImplementedError('predict_why')
+    def predict_counter(self, data: dict[str, float], verbose: bool = True, only_first: bool = True):
+        """
+        Provides a prediction and counterfactual explanations.
+        :param data: the instance to predict.
+        :param verbose: if True the counterfactual explanation is printed.
+        :param only_first: if True only the closest counterfactual explanation is provided for each distinct class.
+        """
+        raise NotImplementedError('predict_counter')
+    def plot_fairness(self, dataframe: pd.DataFrame, groups: dict[str, list], colormap='seismic_r', filename=None,
+                      figsize=(5, 4)):
+        """
+        Provides a visual estimation of the fairness exhibited by an extractor with respect to the specified groups.
+        :param dataframe: the set of instances to be used for the estimation.
+        :param groups: the set of relevant groups to consider.
+        :param colormap: the colormap to use for the plot.
+        :param filename: if not None, name used to save the plot.
+        :param figsize: size of the plot.
+        """
+        counts = {group: len(dataframe[idx_g]) for group, idx_g in groups.items()}
+        output = {'labels': []}
+        for group in groups:
+            output[group] = []
+        for i, clause in enumerate(self.theory.clauses):
+            if len(dataframe) == 0:
+                break
+            solver = prolog_solver(static_kb=mutable_theory(clause).assertZ(get_in_rule()).assertZ(get_not_in_rule()))
+            idx = np.array([query.is_yes for query in
+                            [solver.solveOnce(data_to_struct(data)) for _, data in dataframe.iterrows()]])
+            # print(f'Rule {i + 1}. Outcome {clause.head.args[-1]}. Affecting', end='')
+            output['labels'].append(str(clause.head.args[-1]))
+            for group, idx_g in groups.items():
+                # print(f' {len(dataframe[idx & idx_g]) / counts[group]:.2f}%{group}', end='')
+                output[group].append(len(dataframe[idx & idx_g]) / counts[group])
+            dataframe = dataframe[~idx]
+            groups = {group: indices[~idx] for group, indices in groups.items()}
+            # print(f'. Left {len(dataframe)} instances')
+        binary = len(set(output['labels'])) == 2
+        labels = sorted(set(output['labels']))
+        data = np.vstack([output[group] for group in groups]).T * 100
+        if binary:
+            data[np.array(output['labels']) == labels[0]] *= -1
+        plt.figure(figsize=figsize)
+        plt.imshow(data, cmap=colormap, vmin=-100 if binary else 0, vmax=100)
+        plt.gca().set_xticks(range(len(groups)), labels=groups.keys())
+        plt.gca().set_yticks(range(len(output['labels'])),
+                             labels=[f'Rule {i + 1}\n{l}' for i, l in enumerate(output['labels'])])
+        plt.xlabel('Groups')
+        plt.ylabel('Rules')
+        plt.title("Rule set impact on groups")
+        for i in range(len(output['labels'])):
+            for j in range(len(groups)):
+                plt.gca().text(j, i, f'{abs(data[i, j]):.2f}%', ha="center", va="center", color="k")
+        plt.gca().set_xticks([i + .5 for i in range(len(groups))], minor=True)
+        plt.gca().set_yticks([i + .5 for i in range(len(output['labels']))], minor=True)
+        plt.gca().grid(which='minor', color='k', linestyle='-', linewidth=.8)
+        plt.gca().tick_params(which='minor', bottom=False, left=False)
+        cbarticks = np.linspace(-100 if binary else 0, 100, 9 if binary else 11, dtype=int)
+        cbar = plt.colorbar(fraction=0.046, label='Affected samples (%)', ticks=cbarticks)
+        if binary:
+            ticklabels = [str(-i) if i < 0 else str(i) for i in cbarticks]
+            ticklabels[0] += f' {labels[0]}'
+            ticklabels[-1] += f' {labels[-1]}'
+            cbar.ax.set_yticklabels(ticklabels)
+        plt.tight_layout()
+        if filename is not None:
+            plt.savefig(filename, dpi=500)
+        plt.show()
+    def make_fair(self, features: Iterable[str]):
+        raise NotImplementedError(f'Fairness for {type(self).__name__} is not supported at the moment')
+    def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
+            n: int = 3) -> float:
         """
         Calculates the predictions' MAE w.r.t. the instances given as input.
-        :param dataframe: is the set of instances to be used to calculate the mean absolute error.
+        :param dataframe: the set of instances to be used to calculate the mean absolute error.
         :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
+        :param brute: if True, a brute prediction is executed.
+        :param criterion: criterion for brute prediction.
+        :param n: number of points for brute prediction with 'perimeter' criterion.
         :return: the mean absolute error (MAE) of the predictions.
         """
-        return self.score(dataframe, predictor, predictor is not None, False, Extractor.Task.REGRESSION,
-                          [Extractor.RegressionScore.MAE])[Extractor.RegressionScore.MAE][-1]
+        return self.score(dataframe, predictor, predictor is not None, False, brute, criterion, n,
+                          Extractor.Task.REGRESSION, [Extractor.RegressionScore.MAE])[Extractor.RegressionScore.MAE][-1]
-    def mse(self, dataframe: pd.DataFrame, predictor=None) -> float:
+    def mse(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
+            n: int = 3) -> float:
         """
         Calculates the predictions' MSE w.r.t. the instances given as input.
-        :param dataframe: is the set of instances to be used to calculate the mean squared error.
+        :param dataframe: the set of instances to be used to calculate the mean squared error.
         :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
+        :param brute: if True, a brute prediction is executed.
+        :param criterion: criterion for brute prediction.
+        :param n: number of points for brute prediction with 'perimeter' criterion.
         :return: the mean squared error (MSE) of the predictions.
         """
-        return self.score(dataframe, predictor, predictor is not None, False, Extractor.Task.REGRESSION,
-                          [Extractor.RegressionScore.MSE])[Extractor.RegressionScore.MSE][-1]
+        return self.score(dataframe, predictor, predictor is not None, False, brute, criterion, n,
+                          Extractor.Task.REGRESSION, [Extractor.RegressionScore.MSE])[Extractor.RegressionScore.MSE][-1]
-    def r2(self, dataframe: pd.DataFrame, predictor=None) -> float:
+    def r2(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
+            n: int = 3) -> float:
         """
         Calculates the predictions' R2 score w.r.t. the instances given as input.
-        :param dataframe: is the set of instances to be used to calculate the R2 score.
+        :param dataframe: the set of instances to be used to calculate the R2 score.
         :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
+        :param brute: if True, a brute prediction is executed.
+        :param criterion: criterion for brute prediction.
+        :param n: number of points for brute prediction with 'perimeter' criterion.
         :return: the R2 score of the predictions.
         """
-        return self.score(dataframe, predictor, predictor is not None, False,
+        return self.score(dataframe, predictor, predictor is not None, False, brute, criterion, n,
                           Extractor.Task.REGRESSION, [Extractor.RegressionScore.R2])[Extractor.RegressionScore.R2][-1]
-    def accuracy(self, dataframe: pd.DataFrame, predictor=None) -> float:
+    def accuracy(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
+                 n: int = 3) -> float:
         """
         Calculates the predictions' accuracy classification score w.r.t. the instances given as input.
-        :param dataframe: is the set of instances to be used to calculate the accuracy classification score.
+        :param dataframe: the set of instances to be used to calculate the accuracy classification score.
         :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
+        :param brute: if True, a brute prediction is executed.
+        :param criterion: criterion for brute prediction.
+        :param n: number of points for brute prediction with 'perimeter' criterion.
         :return: the accuracy classification score of the predictions.
         """
-        return self.score(dataframe, predictor, predictor is not None, False, Extractor.Task.CLASSIFICATION,
+        return self.score(dataframe, predictor, predictor is not None, False, brute, criterion, n,
+                          Extractor.Task.CLASSIFICATION,
                           [Extractor.ClassificationScore.ACCURACY])[Extractor.ClassificationScore.ACCURACY][-1]
-    def f1(self, dataframe: pd.DataFrame, predictor=None) -> float:
+    def f1(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
+            n: int = 3) -> float:
         """
         Calculates the predictions' F1 score w.r.t. the instances given as input.
-        :param dataframe: is the set of instances to be used to calculate the F1 score.
+        :param dataframe: the set of instances to be used to calculate the F1 score.
         :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
+        :param brute: if True, a brute prediction is executed.
+        :param criterion: criterion for brute prediction.
+        :param n: number of points for brute prediction with 'perimeter' criterion.
         :return: the F1 score of the predictions.
         """
-        return self.score(dataframe, predictor, predictor is not None, False, Extractor.Task.CLASSIFICATION,
+        return self.score(dataframe, predictor, predictor is not None, False, brute, criterion, n,
+                          Extractor.Task.CLASSIFICATION,
                           [Extractor.ClassificationScore.F1])[Extractor.ClassificationScore.F1][-1]
     @staticmethod
-    def cart(predictor, max_depth: int = 3, max_leaves: int = 3,
+    def cart(predictor, max_depth: int = 3, max_leaves: int = 3, max_features=None,
              discretization: Iterable[DiscreteFeature] = None, normalization=None, simplify: bool = True) -> Extractor:
         """
         Creates a new Cart extractor.
         """
         from psyke.extraction.cart import Cart
-        return Cart(predictor, max_depth, max_leaves, discretization=discretization, normalization=normalization,
-                    simplify=simplify)
+        return Cart(predictor, max_depth, max_leaves, max_features,
+                    discretization=discretization, normalization=normalization, simplify=simplify)
+    @staticmethod
+    def divine(predictor, k: int = 5, patience: int = 15, close_to_center: bool = True,
+               discretization: Iterable[DiscreteFeature] = None, normalization=None,
+               seed: int = get_default_random_seed()) -> Extractor:
+        """
+        Creates a new DiViNE extractor.
+        """
+        from psyke.extraction.hypercubic.divine import DiViNE
+        return DiViNE(predictor, k=k, patience=patience, close_to_center=close_to_center,
+                      discretization=discretization, normalization=normalization, seed=seed)
+    @staticmethod
+    def cosmik(predictor, max_components: int = 4, k: int = 5, patience: int = 15, close_to_center: bool = True,
+               output: Target = Target.CONSTANT, discretization: Iterable[DiscreteFeature] = None, normalization=None,
+               seed: int = get_default_random_seed()) -> Extractor:
+        """
+        Creates a new COSMiK extractor.
+        """
+        from psyke.extraction.hypercubic.cosmik import COSMiK
+        return COSMiK(predictor, max_components=max_components, k=k, patience=patience, close_to_center=close_to_center,
+                      output=output, discretization=discretization, normalization=normalization, seed=seed)
     @staticmethod
     def iter(predictor, min_update: float = 0.1, n_points: int = 1, max_iterations: int = 600, min_examples: int = 250,
-             threshold: float = 0.1, fill_gaps: bool = True, normalization: dict[str, tuple[float, float]] = None,
-             output=None, seed: int = get_default_random_seed()) -> Extractor:
+             threshold: float = 0.1, fill_gaps: bool = True, ignore_dimensions=None,
+             normalization: dict[str, tuple[float, float]] = None, output=None,
+             seed: int = get_default_random_seed()) -> Extractor:
         """
         Creates a new ITER extractor.
         """
         from psyke.extraction.hypercubic.iter import ITER
         return ITER(predictor, min_update, n_points, max_iterations, min_examples, threshold, fill_gaps,
-                    normalization, output, seed)
+                    ignore_dimensions, normalization, output, seed)
     @staticmethod
-    def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
-               normalization: dict[str, tuple[float, float]] = None,
+    def gridex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
+               discretization=None, normalization: dict[str, tuple[float, float]] = None,
                seed: int = get_default_random_seed()) -> Extractor:
         """
         Creates a new GridEx extractor.
         """
         from psyke.extraction.hypercubic.gridex import GridEx
-        return GridEx(predictor, grid, min_examples, threshold, normalization, seed)
+        return GridEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
+    @staticmethod
+    def hex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output: Target = Target.CONSTANT,
+            discretization=None, normalization: dict[str, tuple[float, float]] = None,
+            seed: int = get_default_random_seed()) -> Extractor:
+        """
+        Creates a new HEx extractor.
+        """
+        from psyke.extraction.hypercubic.hex import HEx
+        return HEx(predictor, grid, min_examples, threshold, output, discretization, normalization, seed)
+    @staticmethod
+    def ginger(predictor, features: Iterable[str], sigmas: Iterable[float], max_slices: int, min_rules: int = 1,
+               max_poly: int = 1, alpha: float = 0.5, indpb: float = 0.5, tournsize: int = 3, metric: str = 'R2',
+               n_gen: int = 50, n_pop: int = 50, threshold=None, valid=None, output=Target.REGRESSION,
+               normalization: dict[str, tuple[float, float]] = None,
+               seed: int = get_default_random_seed()) -> Extractor:
+        """
+        Creates a new GInGER extractor.
+        """
+        from psyke.extraction.hypercubic.ginger import GInGER
+        return GInGER(predictor, features, sigmas, max_slices, min_rules, max_poly, alpha, indpb, tournsize, metric,
+                      n_gen, n_pop, threshold, valid, output, normalization, seed)
     @staticmethod
     def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,
@@ -250,15 +418,16 @@ class Extractor(EvaluableModel, ABC):
         return GridREx(predictor, grid, min_examples, threshold, normalization, seed)
     @staticmethod
-    def creepy(predictor, clustering, depth: int, error_threshold: float, output, gauss_components: int = 2,
-               ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
-               normalization: dict[str, tuple[float, float]] = None) -> Extractor:
+    def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
+               gauss_components: int = 2, ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0,
+               discretization=None, normalization: dict[str, tuple[float, float]] = None,
+               seed: int = get_default_random_seed()) -> Extractor:
         """
         Creates a new CReEPy extractor.
         """
         from psyke.extraction.hypercubic.creepy import CReEPy
-        return CReEPy(predictor, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
-                      normalization, clustering)
+        return CReEPy(predictor, clustering, depth, error_threshold, output, gauss_components, ranks, ignore_threshold,
+                      discretization, normalization, seed)
     @staticmethod
     def real(predictor, discretization=None) -> Extractor:
@@ -281,52 +450,29 @@ class Extractor(EvaluableModel, ABC):
 class Clustering(EvaluableModel, ABC):
-    def __init__(self, normalization=None):
-        super().__init__(normalization)
+    def __init__(self, discretization=None, normalization=None):
+        super().__init__(discretization, normalization)
     def fit(self, dataframe: pd.DataFrame):
-        raise NotImplementedError('extract')
+        raise NotImplementedError('fit')
     def explain(self):
-        raise NotImplementedError('extract')
+        raise NotImplementedError('explain')
     @staticmethod
-    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-              gauss_components: int = 2) -> Clustering:
+    def exact(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new ExACT instance.
         """
         from psyke.clustering.exact import ExACT
-        return ExACT(depth, error_threshold, output, gauss_components)
+        return ExACT(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     @staticmethod
-    def cream(depth: int, error_threshold: float, output, gauss_components: int = 2) -> Clustering:
+    def cream(depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT, gauss_components: int = 2,
+              discretization=None, normalization=None, seed: int = get_default_random_seed()) -> Clustering:
         """
         Creates a new CREAM instance.
         """
         from psyke.clustering.cream import CREAM
-        return CREAM(depth, error_threshold, output, gauss_components)
-class PedagogicalExtractor(Extractor, ABC):
-    def __init__(self, predictor, discretization=None, normalization=None):
-        Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
-    def extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        new_y = self.predictor.predict(dataframe.iloc[:, :-1])
-        if mapping is not None:
-            if hasattr(new_y[0], 'shape'):
-                # One-hot encoding for multi-class tasks
-                if len(new_y[0].shape) > 0 and new_y[0].shape[0] > 1:
-                    new_y = [argmax(y, axis=0) for y in new_y]
-                # One-hot encoding for binary class tasks
-                else:
-                    new_y = [round(y[0]) for y in new_y]
-        new_y = pd.DataFrame(new_y).set_index(dataframe.index)
-        data = dataframe.iloc[:, :-1].copy().join(new_y)
-        data.columns = dataframe.columns
-        return self._extract(data, mapping, sort)
-    def _extract(self, dataframe: pd.DataFrame, mapping: dict[str: int] = None, sort: bool = True) -> Theory:
-        raise NotImplementedError('extract')
+        return CREAM(depth, error_threshold, output, gauss_components, discretization, normalization, seed)

psyke/clustering/__init__.py CHANGED Viewed

@@ -2,13 +2,18 @@ from abc import ABC
 from typing import Iterable
 from psyke import Clustering, Target
-from psyke.extraction.hypercubic import HyperCube, HyperCubePredictor
+from psyke.extraction.hypercubic import HyperCube
+from psyke.hypercubepredictor import HyperCubePredictor
 class HyperCubeClustering(HyperCubePredictor, Clustering, ABC):
-    def __init__(self, output: Target = Target.CONSTANT, normalization=None):
-        HyperCubePredictor.__init__(self, output=output, normalization=normalization)
+    def __init__(self, output: Target = Target.CONSTANT, discretization=None, normalization=None):
+        HyperCubePredictor.__init__(self, output=output, discretization=discretization, normalization=normalization)
+        self._protected_features = []
     def get_hypercubes(self) -> Iterable[HyperCube]:
-        raise NotImplementedError('predict')
+        raise NotImplementedError('get_hypercubes')
+    def make_fair(self, features: Iterable[str]):
+        self._protected_features = features

psyke/clustering/cream/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Iterable
 import numpy as np
 import pandas as pd
-import psyke.utils
+from psyke.utils import Target, get_default_random_seed
 from psyke.clustering.exact import ExACT
 from psyke.extraction.hypercubic import Node, HyperCube, ClosedCube
 from psyke.clustering.utils import select_gaussian_mixture
@@ -16,9 +16,9 @@ class CREAM(ExACT):
     Explanator implementing CREAM algorithm.
     """
-    def __init__(self, depth: int, error_threshold: float,
-                 output: psyke.utils.Target = psyke.utils.Target.CONSTANT, gauss_components: int = 5):
-        super().__init__(depth, error_threshold, output, gauss_components)
+    def __init__(self, depth: int, error_threshold: float, output: Target = Target.CONSTANT, gauss_components: int = 5,
+                 discretization=None, normalization=None, seed: int = get_default_random_seed()):
+        super().__init__(depth, error_threshold, output, gauss_components, discretization, normalization, seed)
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []
@@ -46,11 +46,7 @@ class CREAM(ExACT):
     def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
         to_split = [(self.error_threshold * 10, 1, 1, surrounding)]
         while len(to_split) > 0:
-            to_split.sort(reverse=True)
-            (_, depth, _, node) = to_split.pop()
-            data = ExACT._remove_string_label(node.dataframe)
-            gauss_params = select_gaussian_mixture(data, self.gauss_components)
-            gauss_pred = gauss_params[2].predict(data)
+            node, depth, gauss_pred, gauss_params = self._get_gauss_predictions(to_split)
             cubes = self.__eligible_cubes(gauss_pred, node, gauss_params[1])
             if len(cubes) < 1:
                 continue
@@ -65,4 +61,4 @@ class CREAM(ExACT):
                     (error, depth + 1, np.random.uniform(), n) for (n, error) in
                     zip(node.children, [right[0].diversity, left[0].diversity]) if error > self.error_threshold
                 ]
-        return self._node_to_cubes(surrounding)
+        return self._node_to_cubes(surrounding)

psyke/clustering/exact/__init__.py CHANGED Viewed

@@ -13,7 +13,7 @@ from psyke.clustering import HyperCubeClustering
 from psyke.extraction.hypercubic import Node, ClosedCube, HyperCube
 from psyke.clustering.utils import select_gaussian_mixture, select_dbscan_epsilon
 from psyke.extraction.hypercubic.hypercube import ClosedRegressionCube, ClosedClassificationCube
-from psyke.utils import Target
+from psyke.utils import Target, get_default_random_seed
 class ExACT(HyperCubeClustering, ABC):
@@ -22,13 +22,15 @@ class ExACT(HyperCubeClustering, ABC):
     """
     def __init__(self, depth: int = 2, error_threshold: float = 0.1, output: Target = Target.CONSTANT,
-                 gauss_components: int = 2, normalization=None):
-        super().__init__(output, normalization)
+                 gauss_components: int = 2, discretization=None, normalization=None,
+                 seed: int = get_default_random_seed()):
+        super().__init__(output, discretization, normalization)
         self.depth = depth
         self.error_threshold = error_threshold
         self.gauss_components = gauss_components
         self._predictor = KNeighborsClassifier() if output == Target.CLASSIFICATION else KNeighborsRegressor()
         self._predictor.n_neighbors = 1
+        self.seed = seed
     def __eligible_cubes(self, gauss_pred: np.ndarray, node: Node, clusters: int):
         cubes = []
@@ -52,13 +54,14 @@ class ExACT(HyperCubeClustering, ABC):
         dbscan_pred = DBSCAN(eps=select_dbscan_epsilon(data, clusters)).fit_predict(data.iloc[:, :-1])
         return HyperCube.create_surrounding_cube(
             dataframe.iloc[np.where(dbscan_pred == Counter(dbscan_pred).most_common(1)[0][0])],
-            True, self._output
+            True, self._output, self._protected_features
         )
     def fit(self, dataframe: pd.DataFrame):
+        np.random.seed(self.seed)
         self._predictor.fit(dataframe.iloc[:, :-1], dataframe.iloc[:, -1])
-        self._hypercubes = \
-            self._iterate(Node(dataframe, HyperCube.create_surrounding_cube(dataframe, True, self._output)))
+        self._surrounding = HyperCube.create_surrounding_cube(dataframe, True, self._output, self._protected_features)
+        self._hypercubes = self._iterate(Node(dataframe, self._surrounding))
     def get_hypercubes(self) -> Iterable[HyperCube]:
         return list(self._hypercubes)
@@ -76,14 +79,17 @@ class ExACT(HyperCubeClustering, ABC):
             enumerate(dataframe.iloc[:, -1].unique())
         ).items()}}) if isinstance(dataframe.iloc[0, -1], str) else dataframe
+    def _get_gauss_predictions(self, to_split):
+        to_split.sort(reverse=True)
+        (_, depth, _, node) = to_split.pop()
+        data = ExACT._remove_string_label(node.dataframe)
+        gauss_params = select_gaussian_mixture(data.drop(self._protected_features, axis=1), self.gauss_components)
+        return node, depth, gauss_params[2].predict(data.drop(self._protected_features, axis=1)), gauss_params
     def _iterate(self, surrounding: Node) -> Iterable[HyperCube]:
         to_split = [(self.error_threshold * 10, 1, 1, surrounding)]
         while len(to_split) > 0:
-            to_split.sort(reverse=True)
-            (_, depth, _, node) = to_split.pop()
-            data = ExACT._remove_string_label(node.dataframe)
-            gauss_params = select_gaussian_mixture(data, self.gauss_components)
-            gauss_pred = gauss_params[2].predict(data)
+            node, depth, gauss_pred, gauss_params = self._get_gauss_predictions(to_split)
             cubes, indices = self.__eligible_cubes(gauss_pred, node, gauss_params[1])
             cubes = [(c.volume(), len(idx), i, idx, c) for i, (c, idx) in enumerate(zip(cubes, indices))
                      if (idx is not None) and (not node.cube.equal(c))]

psyke/clustering/utils.py CHANGED Viewed

@@ -11,7 +11,6 @@ def select_gaussian_mixture(data: pd.DataFrame, max_components) -> tuple[float,
     try:
         models = [GaussianMixture(n_components=n).fit(data) for n in components if n <= len(data)]
     except ValueError:
-        print(data)
         print(len(data))
     return min([(m.bic(data) / (i + 2), (i + 2), m) for i, m in enumerate(models)])

psyke/extraction/__init__.py CHANGED Viewed

@@ -0,0 +1,25 @@
+from abc import ABC
+import pandas as pd
+from tuprolog.theory import Theory
+from psyke import Extractor
+class PedagogicalExtractor(Extractor, ABC):
+    def __init__(self, predictor, discretization=None, normalization=None):
+        Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
+    def _substitute_output(self, dataframe: pd.DataFrame) -> pd.DataFrame:
+        new_y = pd.DataFrame(self.predictor.predict(dataframe.iloc[:, :-1])).set_index(dataframe.index)
+        data = dataframe.iloc[:, :-1].copy().join(new_y)
+        data.columns = dataframe.columns
+        return data
+    def extract(self, dataframe: pd.DataFrame) -> Theory:
+        self.theory = self._extract(self._substitute_output(dataframe))
+        return self.theory
+    def _extract(self, dataframe: pd.DataFrame) -> Theory:
+        raise NotImplementedError('extract')

psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

psyke 0.4.9.dev6py3-none-any.whl → 1.0.4.dev10py3-none-any.whl