PyPI - antakia - Versions diffs - 0.2.1__py3-none-any.whl - Mend

antakia 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

antakia/__init__.py +5 -0
antakia/antakia.py +160 -0
antakia/assets/logo_ai-vidence.png +0 -0
antakia/assets/logo_antakia.png +0 -0
antakia/assets/logo_antakia_horizontal.png +0 -0
antakia/config.py +17 -0
antakia/explanation/__init__.py +0 -0
antakia/explanation/explanation_method.py +66 -0
antakia/explanation/explanations.py +108 -0
antakia/gui/__init__.py +2 -0
antakia/gui/colorTable.py +52 -0
antakia/gui/data_store.py +8 -0
antakia/gui/explanation_values.py +216 -0
antakia/gui/gui.py +930 -0
antakia/gui/high_dim_exp/__init__.py +0 -0
antakia/gui/high_dim_exp/figure_display.py +565 -0
antakia/gui/high_dim_exp/highdimexplorer.py +140 -0
antakia/gui/high_dim_exp/projected_value_bank.py +12 -0
antakia/gui/high_dim_exp/projected_values_selector.py +314 -0
antakia/gui/progress_bar.py +133 -0
antakia/gui/ruleswidget.py +661 -0
antakia/gui/tabs/model_explorer.py +95 -0
antakia/gui/tabs/tab1.py +390 -0
antakia/gui/widget_utils.py +44 -0
antakia/gui/widgets.py +1363 -0
antakia/utils/__init__.py +0 -0
antakia/utils/checks.py +5 -0
antakia/utils/dummy_datasets.py +133 -0
antakia/utils/logging.py +57 -0
antakia-0.2.1.dist-info/LICENSE +13 -0
antakia-0.2.1.dist-info/METADATA +114 -0
antakia-0.2.1.dist-info/RECORD +33 -0
antakia-0.2.1.dist-info/WHEEL +4 -0

antakia/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+__version__ = "0.2.1"
+__author__ = "AI-vidence "
+from antakia.antakia import AntakIA

antakia/antakia.py ADDED Viewed

@@ -0,0 +1,160 @@
+from __future__ import annotations
+from typing import List, Dict, Any
+import numpy as np
+import pandas as pd
+from dotenv import load_dotenv
+from antakia_core.utils.utils import ProblemCategory
+load_dotenv()
+from antakia.utils.checks import is_valid_model
+from antakia_core.utils.variable import Variable, DataVariables
+from antakia.gui.gui import GUI
+class AntakIA:
+    """
+    AntakIA class.
+    Antakia instances provide data and methods to explain a ML model.
+    Instance attributes
+    -------------------
+    X : pd.DataFrame the training dataset
+    y : pd.Series the target value
+    model : Model
+        the model to explain
+    variables : a list of Variables, describing X_list[0]
+    X_test : pd.DataFrame the test dataset
+    y_test : pd.Series the test target value
+    score : reference scoring function
+    """
+    def __init__(
+            self,
+            X: pd.DataFrame,
+            y: pd.Series,
+            model,
+            variables: DataVariables | List[Dict[str, Any]] | pd.DataFrame | None = None,
+            X_test: pd.DataFrame = None,
+            y_test: pd.Series = None,
+            X_exp: pd.DataFrame | None = None,
+            score: callable | str = 'auto',
+            problem_category: str = 'auto'
+    ):
+        """
+        AntakiIA constructor.
+        Parameters:
+            X : pd.DataFrame the training dataset
+            y : pd.Series the target value
+            model : Model
+                the model to explain
+            variables : a list of Variables, describing X_list[0]
+            X_test : pd.DataFrame the test dataset
+            y_test : pd.Series the test target value
+            score : reference scoring function
+        """
+        load_dotenv()
+        if not is_valid_model(model):
+            raise ValueError(model, " should implement predict and score methods")
+        X, y, X_exp = self._preprocess_data(X, y, X_exp)
+        self.X = X
+        self.X_test = X_test
+        if y.ndim > 1:
+            y = y.squeeze()
+        self.y = y.astype(float)
+        if y_test is not None and y_test.ndim > 1:
+            y_test = y_test.squeeze()
+        self.y_test = y_test
+        self.model = model
+        self.X_exp = X_exp
+        self.problem_category = self._preprocess_problem_category(problem_category, model, X)
+        self.score = self._preprocess_score(score, self.problem_category)
+        self.set_variables(X, variables)
+        self.gui = GUI(
+            self.X,
+            self.y,
+            self.model,
+            self.variables,
+            self.X_test,
+            self.y_test,
+            self.X_exp,
+            self.score,
+            self.problem_category
+        )
+    def set_variables(self, X, variables):
+        if variables is not None:
+            if isinstance(variables, list):
+                self.variables: DataVariables = Variable.import_variable_list(variables)
+                if len(self.variables) != len(X.columns):
+                    raise ValueError("Provided variable list must be the same length of the dataframe")
+            elif isinstance(variables, pd.DataFrame):
+                self.variables = Variable.import_variable_df(variables)
+            else:
+                raise ValueError("Provided variable list must be a list or a pandas DataFrame")
+        else:
+            self.variables = Variable.guess_variables(X)
+    def start_gui(self) -> GUI:
+        return self.gui.show_splash_screen()
+    def export_regions(self):
+        return self.gui.region_set
+    def _preprocess_data(self, X: pd.DataFrame, y, X_exp: pd.DataFrame):
+        if isinstance(X, np.ndarray):
+            X = pd.DataFrame(X)
+        if isinstance(X_exp, np.ndarray):
+            X_exp = pd.DataFrame(X_exp)
+        if isinstance(y, np.ndarray):
+            y = pd.Series(y)
+        X.columns = [str(col) for col in X.columns]
+        if X_exp is not None:
+            X_exp.columns = X.columns
+        if X_exp is not None:
+            pd.testing.assert_index_equal(X.index, X_exp.index, check_names=False)
+            if X.reindex(X_exp.index).iloc[:, 0].isna().sum() != X.iloc[:, 0].isna().sum():
+                raise IndexError('X and X_exp must share the same index')
+        pd.testing.assert_index_equal(X.index, y.index, check_names=False)
+        return X, y, X_exp
+    def _preprocess_problem_category(self, problem_category: str, model, X: pd.DataFrame) -> ProblemCategory:
+        if problem_category not in [e.name for e in ProblemCategory]:
+            raise ValueError('Invalid problem category')
+        if problem_category == 'auto':
+            if hasattr(model, 'predict_proba'):
+                return ProblemCategory['classification_with_proba']
+            pred = self.model.predict(self.X.sample(min(100, len(self.X))))
+            if len(pred.shape) > 1 and pred.shape[1] > 1:
+                return ProblemCategory['classification_proba']
+            return ProblemCategory['regression']
+        if problem_category == 'classification':
+            if hasattr(model, 'prodict_proba'):
+                return ProblemCategory['classification_with_proba']
+            pred = model.predict(X.sample(min(100, len(X))))
+            if len(pred.shape) > 1 and pred.shape[1] > 1:
+                return ProblemCategory['classification_proba']
+            return ProblemCategory['classification_label_only']
+        return ProblemCategory[problem_category]
+    def _preprocess_score(self, score, problem_category):
+        if callable(score):
+            return score
+        if score != 'auto':
+            return score
+        if problem_category == ProblemCategory.regression:
+            return 'mse'
+        return 'accuracy'

antakia/assets/logo_ai-vidence.png ADDED Viewed

Binary file

antakia/assets/logo_antakia.png ADDED Viewed

Binary file

antakia/assets/logo_antakia_horizontal.png ADDED Viewed

Binary file

antakia/config.py ADDED Viewed

@@ -0,0 +1,17 @@
+import os
+DEFAULT_EXPLANATION_METHOD = int(os.environ.get('DEFAULT_EXPLANATION_METHOD', 1))
+DEFAULT_DIMENSION = int(os.environ.get('DEFAULT_VS_DIMENSION', 2))
+DEFAULT_PROJECTION = 'PaCMAP'
+INIT_FIG_WIDTH = int(os.environ.get('INIT_FIG_WIDTH', 1800))
+MAX_DOTS = int(os.environ.get('MAX_DOTS', 5000))
+# Rule format
+USE_INTERVALS_FOR_RULES = os.environ.get('USE_INTERVALS_FOR_RULES', 'True') == 'True'
+MAX_RULES_DESCR_LENGTH = int(os.environ.get('MAX_RULES_DESCR_LENGTH', 200))
+SHOW_LOG_MODULE_WIDGET = os.environ.get('SHOW_LOG_MODULE_WIDGET', 'False') == 'True'
+#Auto cluster
+MIN_POINTS_NUMBER = 100

antakia/explanation/__init__.py ADDED Viewed

File without changes

antakia/explanation/explanation_method.py ADDED Viewed

@@ -0,0 +1,66 @@
+import pandas as pd
+from antakia_core.utils.long_task import LongTask
+class ExplanationMethod(LongTask):
+    """
+    Abstract class (see Long Task) to compute explaination values for the Explanation Space (ES)
+    Attributes
+    model : the model to explain
+    explanation_method : SHAP or LIME
+    """
+    # Class attributes
+    NONE = 0  # no explanation, ie: original values
+    SHAP = 1
+    LIME = 2
+    def __init__(
+            self,
+            explanation_method: int,
+            X: pd.DataFrame,
+            model,
+            task_type,
+            progress_updated: callable = None,
+    ):
+        if not ExplanationMethod.is_valid_explanation_method(explanation_method):
+            raise ValueError(explanation_method, " is a bad explanation method")
+        self.explanation_method = explanation_method
+        super().__init__(X, progress_updated)
+        self.task_type = task_type
+        self.model = model
+    @staticmethod
+    def is_valid_explanation_method(method: int) -> bool:
+        """
+        Returns True if this is a valid explanation method.
+        """
+        return (
+                method == ExplanationMethod.SHAP
+                or method == ExplanationMethod.LIME
+                or method == ExplanationMethod.NONE
+        )
+    @staticmethod
+    def explanation_methods_as_list() -> list:
+        return [ExplanationMethod.SHAP, ExplanationMethod.LIME]
+    @staticmethod
+    def explain_method_as_str(method: int) -> str:
+        if method == ExplanationMethod.SHAP:
+            return "SHAP"
+        elif method == ExplanationMethod.LIME:
+            return "LIME"
+        else:
+            raise ValueError(method, " is a bad explanation method")
+    @staticmethod
+    def explain_method_as_int(method: str) -> int:
+        if method.upper() == "SHAP":
+            return ExplanationMethod.SHAP
+        elif method.upper() == "LIME":
+            return ExplanationMethod.LIME
+        else:
+            raise ValueError(method, " is a bad explanation method")

antakia/explanation/explanations.py ADDED Viewed

@@ -0,0 +1,108 @@
+import lime
+import numpy as np
+import pandas as pd
+import shap
+from antakia_core.utils.utils import ProblemCategory
+from antakia.explanation.explanation_method import ExplanationMethod
+# ===========================================================
+#              Explanations implementations
+# ===========================================================
+class SHAPExplanation(ExplanationMethod):
+    """
+    SHAP computation class.
+    """
+    def __init__(self, X: pd.DataFrame, model, task_type, progress_updated: callable = None):
+        super().__init__(ExplanationMethod.SHAP, X, model, task_type, progress_updated)
+    @property
+    def link(self):
+        if self.task_type == ProblemCategory.regression:
+            return "identity"
+        return "logit"
+    def compute(self) -> pd.DataFrame:
+        self.publish_progress(0)
+        try:
+            explainer = shap.TreeExplainer(self.model)
+        except:
+            explainer = shap.KernelExplainer(self.model.predict, self.X.sample(min(200, len(self.X))), link=self.link)
+        chunck_size = 200
+        shap_val_list = []
+        for i in range(0, len(self.X), chunck_size):
+            explanations = explainer.shap_values(self.X.iloc[i:i + chunck_size])
+            shap_val_list.append(
+                pd.DataFrame(explanations, columns=self.X.columns, index=self.X.index[i:i + chunck_size]))
+            self.publish_progress(int(100 * (i * chunck_size) / len(self.X)))
+        shap_values = pd.concat(shap_val_list)
+        self.publish_progress(100)
+        return shap_values
+class LIMExplanation(ExplanationMethod):
+    """
+    LIME computation class.
+    """
+    def __init__(self, X: pd.DataFrame, model, task_type, progress_updated: callable = None):
+        super().__init__(ExplanationMethod.LIME, X, model, task_type, progress_updated)
+    @property
+    def mode(self):
+        print(self.task_type)
+        if self.task_type == ProblemCategory.regression:
+            return 'regression'
+        else:
+            return 'classification'
+    def compute(self) -> pd.DataFrame:
+        self.publish_progress(0)
+        explainer = lime.lime_tabular.LimeTabularExplainer(
+            self.X.sample(min(len(self.X), 500)).values,
+            feature_names=self.X.columns,
+            verbose=False,
+            mode=self.mode,
+            discretize_continuous=False
+        )
+        values_lime = pd.DataFrame(
+            np.zeros(self.X.shape),
+            index=self.X.index,
+            columns=self.X.columns
+        )
+        progress = 0
+        if self.mode == 'regression':
+            predict_fct = self.model.predict
+            i = 0
+        else:
+            i = 1
+            if hasattr(self.model, 'predict_proba'):
+                predict_fct = self.model.predict_proba
+            else:
+                predict_fct = self.model.predict
+        for index, row in self.X.iterrows():
+            exp = explainer.explain_instance(row.values, predict_fct)
+            values_lime.loc[index] = pd.Series(exp.local_exp[i], index=explainer.feature_names).str[1]
+            progress += 100 / len(self.X)
+            self.publish_progress(int(progress))
+        self.publish_progress(100)
+        return values_lime
+def compute_explanations(X: pd.DataFrame, model, explanation_method: int, task_type,
+                         callback: callable) -> pd.DataFrame:
+    """ Generic method to compute explanations, SHAP or LIME
+    """
+    if explanation_method == ExplanationMethod.SHAP:
+        return SHAPExplanation(X, model, task_type, callback).compute()
+    elif explanation_method == ExplanationMethod.LIME:
+        return LIMExplanation(X, model, task_type, callback).compute()
+    else:
+        raise ValueError(f"This explanation method {explanation_method} is not valid!")

antakia/gui/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ __version__ = "0.1.1"
2	+ __author__ = "AI-vidence"

antakia/gui/colorTable.py ADDED Viewed

@@ -0,0 +1,52 @@
+from traitlets import traitlets
+import ipyvuetify as v
+from antakia_core.utils.utils import colors
+class ColorTable(v.VuetifyTemplate):
+    """
+    table to display regions
+    """
+    headers = traitlets.List([]).tag(sync=True, allow_null=True)
+    items = traitlets.List([]).tag(sync=True, allow_null=True)
+    selected = traitlets.List([]).tag(sync=True, allow_null=True)
+    colors = traitlets.List(colors).tag(sync=True)
+    template = traitlets.Unicode('''
+        <template>
+            <v-data-table
+                v-model="selected"
+                :headers="headers"
+                :items="items"
+                item-key="Region"
+                show-select
+                :hide-default-footer="false"
+                @item-selected="tableselect"
+            >
+            <template #header.data-table-select></template>
+            <template v-slot:item.Region="{ item }">
+              <v-chip :color="item.color" >
+                {{ item.Region }}
+              </v-chip>
+            </template>
+            </v-data-table>
+        </template>
+        ''').tag(sync=True)  # type: ignore
+    disable_sort = True
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.callback = None
+    @staticmethod
+    def get_color(item):
+        return item.color
+    # @click:row="tableclick"
+    # def vue_tableclick(self, data):
+    #     raise ValueError(f"click event data = {data}")
+    def set_callback(self, callback: callable):  # type: ignore
+        self.callback = callback
+    def vue_tableselect(self, data):
+        self.callback(data)

antakia/gui/data_store.py ADDED Viewed

@@ -0,0 +1,8 @@
+class DataStore:
+    def __init__(
+            self,
+            X,
+            y,
+            X_test,
+            y_test,
+    ):

antakia/gui/explanation_values.py ADDED Viewed

@@ -0,0 +1,216 @@
+import pandas as pd
+import ipyvuetify as v
+from antakia import config
+from antakia.explanation.explanations import compute_explanations, ExplanationMethod
+from antakia.gui.progress_bar import ProgressBar
+class ExplanationValues:
+    """
+    Widget to manage explanation values
+    in charge on computing them when necessary
+    """
+    available_exp = ['Imported', 'SHAP', 'LIME']
+    def __init__(self, X: pd.DataFrame, y: pd.Series, model, task_type, on_change_callback: callable,
+                 disable_gui: callable, X_exp=None):
+        """
+        Parameters
+        ----------
+        X: original train DataFrame
+        y: target variable
+        model: customer model
+        on_change_callback: callback to notify explanation change
+        X_exp: user provided explanations
+        """
+        self.widget = None
+        self.X = X
+        self.y = y
+        self.model = model
+        self.task_type = task_type
+        self.on_change_callback = on_change_callback
+        self.disable_gui = disable_gui
+        self.initialized = False
+        # init dict of explanations
+        self.explanations: dict[str, pd.DataFrame | None] = {
+            exp: None for exp in self.available_exp
+        }
+        if X_exp is not None:
+            self.explanations[self.available_exp[0]] = X_exp
+        # init selected explanation
+        if X_exp is not None:
+            self.current_exp = self.available_exp[0]
+        else:
+            self.current_exp = self.available_exp[1]
+        self.build_widget()
+    def build_widget(self):
+        self.widget = v.Row(children=[
+            v.Select(  # Select of explanation method
+                label="Explanation method",
+                items=[
+                    {"text": "Imported", "disabled": True},
+                    {"text": "SHAP", "disabled": True},
+                    {"text": "LIME", "disabled": True},
+                ],
+                class_="ml-2 mr-2",
+                style_="width: 15%",
+                disabled=False,
+            ),
+            v.ProgressCircular(  # exp menu progress bar
+                class_="ml-2 mr-2 mt-2",
+                indeterminate=False,
+                color="grey",
+                width="6",
+                size="35",
+            )
+        ])
+        # refresh select menu
+        self.update_explanation_select()
+        self.get_explanation_select().on_event("change", self.explanation_select_changed)
+        # set up callback
+        self.get_progress_bar().reset_progress_bar()
+    def initialize(self, progress_callback):
+        """
+        initialize class (compute explanation if necessary)
+        Parameters
+        ----------
+        progress_callback : callback to notify progress
+        Returns
+        -------
+        """
+        if not self.has_user_exp:
+            # compute explanation if not provided
+            self.compute_explanation(config.DEFAULT_EXPLANATION_METHOD, progress_callback)
+        # ensure progress is at 100%
+        progress_callback(100, 0)
+        self.initialized = True
+    @property
+    def current_exp_df(self) -> pd.DataFrame:
+        """
+        currently selected explanation projected values instance
+        Returns
+        -------
+        """
+        return self.explanations[self.current_exp]
+    @property
+    def has_user_exp(self) -> bool:
+        """
+        has the user provided an explanation
+        Returns
+        -------
+        """
+        return self.explanations[self.available_exp[0]] is not None
+    def update_explanation_select(self):
+        """
+        refresh explanation select menu
+        Returns
+        -------
+        """
+        exp_values = []
+        for exp in self.available_exp:
+            if exp == 'Imported':
+                exp_values.append({
+                    "text": exp,
+                    'disabled': self.explanations[exp] is None
+                })
+            else:
+                exp_values.append({
+                    "text": exp + (' (compute)' if self.explanations[exp] is None else ''),
+                    'disabled': False
+                })
+        self.get_explanation_select().items = exp_values
+        self.get_explanation_select().v_model = self.current_exp
+    def get_progress_bar(self):
+        progress_widget = self.widget.children[1]
+        progress_bar = ProgressBar(progress_widget)
+        return progress_bar
+    def get_explanation_select(self):
+        """
+        returns the explanation select menu
+        Returns
+        -------
+        """
+        return self.widget.children[0]
+    def compute_explanation(self, explanation_method: int, progress_bar: callable):
+        """
+        compute explanation and refresh widgets (select the new explanation method)
+        Parameters
+        ----------
+        explanation_method: desired explanation
+        progress_bar : progress bar to notify progress to
+        Returns
+        -------
+        """
+        self.disable_gui(True)
+        self.current_exp = self.available_exp[explanation_method]
+        # We compute proj for this new PV :
+        x_exp = compute_explanations(self.X, self.model, explanation_method, self.task_type, progress_bar)
+        pd.testing.assert_index_equal(x_exp.columns, self.X.columns)
+        # update explanation
+        self.explanations[self.current_exp] = x_exp
+        # refresh front
+        self.update_explanation_select()
+        self.disable_gui(False)
+    def disable_selection(self, is_disabled: bool):
+        """
+        disable widgets
+        Parameters
+        ----------
+        is_disabled = should disable ?
+        Returns
+        -------
+        """
+        self.get_explanation_select().disabled = is_disabled
+    def explanation_select_changed(self, widget, event, data):
+        """
+        triggered on selection of new explanation by user
+        explanation has already been computed (the option is enabled in select)
+        Parameters
+        ----------
+        widget
+        event
+        data: explanation name
+        Returns
+        -------
+        Called when the user chooses another dataframe
+        """
+        if not isinstance(data, str):
+            raise KeyError('invalid explanation')
+        data = data.replace(' ', '').replace('(compute)', '')
+        self.current_exp = data
+        if self.explanations[self.current_exp] is None:
+            exp_method = ExplanationMethod.explain_method_as_int(self.current_exp)
+            progress_bar = self.get_progress_bar()
+            self.compute_explanation(exp_method, progress_bar)
+        self.on_change_callback(self.current_exp_df)