PyPI - arize - Versions diffs - 8.0.0a9__py3-none-any.whl → 8.0.0a10__py3-none-any.whl - Mend

arize 8.0.0a9py3-none-any.whl → 8.0.0a10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

arize/models/client.py CHANGED Viewed

@@ -80,6 +80,11 @@ _BATCH_DEPS = (
     "tqdm",
 )
 _BATCH_EXTRA = "ml-batch"
+_MIMIC_DEPS = (
+    "interpret_community.mimic",
+    "sklearn.preprocessing",
+)
+_MIMIC_EXTRA = "mimic-explainer"
 class MLModelsClient:
@@ -116,7 +121,6 @@ class MLModelsClient:
         timeout: float | None = None,
     ) -> cf.Future:
         require(_STREAM_EXTRA, _STREAM_DEPS)
         from arize._generated.protocol.rec import public_pb2 as pb2
         from arize.utils.proto import (
             get_pb_dictionary,
@@ -545,17 +549,10 @@ class MLModelsClient:
             dataframe = dataframe.astype(cat_str_map)
         if surrogate_explainability:
-            logger.debug("Running surrogate_explainability.")
+            require(_MIMIC_EXTRA, _MIMIC_DEPS)
+            from arize.models.surrogate_explainer.mimic import Mimic
-            try:
-                # WARNING: MIMIC EXPLAINER IS NOT DONE
-                from arize.pandas.surrogate_explainer.mimic import Mimic
-            except ImportError:
-                raise ImportError(
-                    "To enable surrogate explainability, "
-                    "the arize module must be installed with the MimicExplainer option: pip "
-                    "install 'arize[MimicExplainer]'."
-                ) from None
+            logger.debug("Running surrogate_explainability.")
             if schema.shap_values_column_names:
                 logger.info(
                     "surrogate_explainability=True has no effect "

arize/models/surrogate_explainer/__init__.py ADDED Viewed

File without changes

arize/models/surrogate_explainer/mimic.py ADDED Viewed

@@ -0,0 +1,164 @@
+from __future__ import annotations
+import random
+import string
+from dataclasses import replace
+from typing import TYPE_CHECKING, Callable, Tuple
+import numpy as np
+import pandas as pd
+from interpret_community.mimic.mimic_explainer import (
+    LGBMExplainableModel,
+    MimicExplainer,
+)
+from sklearn.preprocessing import LabelEncoder
+from arize.types import (
+    CATEGORICAL_MODEL_TYPES,
+    NUMERIC_MODEL_TYPES,
+    ModelTypes,
+)
+if TYPE_CHECKING:
+    from arize.types import Schema
+class Mimic:
+    _testing = False
+    def __init__(self, X: pd.DataFrame, model_func: Callable):
+        self.explainer = MimicExplainer(
+            model_func,
+            X,
+            LGBMExplainableModel,
+            augment_data=False,
+            is_function=True,
+        )
+    def explain(self, X: pd.DataFrame) -> pd.DataFrame:
+        return pd.DataFrame(
+            self.explainer.explain_local(X).local_importance_values,
+            columns=X.columns,
+            index=X.index,
+        )
+    @staticmethod
+    def augment(
+        df: pd.DataFrame, schema: Schema, model_type: ModelTypes
+    ) -> Tuple[pd.DataFrame, Schema]:
+        features = schema.feature_column_names
+        X = df[features]
+        if X.shape[1] == 0:
+            return df, schema
+        if model_type in CATEGORICAL_MODEL_TYPES:
+            if not schema.prediction_score_column_name:
+                raise ValueError(
+                    "To calculate surrogate explainability, "
+                    f"prediction_score_column_name must be specified in schema for {model_type}."
+                )
+            y_col_name = schema.prediction_score_column_name
+            y = df[y_col_name].to_numpy()
+            _min, _max = np.min(y), np.max(y)
+            if not 0 <= _min <= 1 or not 0 <= _max <= 1:
+                raise ValueError(
+                    f"To calculate surrogate explainability for {model_type}, "
+                    f"prediction scores must be between 0 and 1, but current "
+                    f"prediction scores range from {_min} to {_max}."
+                )
+            # model func requires 1 positional argument
+            def model_func(_):  # type: ignore
+                return np.column_stack((1 - y, y))
+        elif model_type in NUMERIC_MODEL_TYPES:
+            y_col_name = schema.prediction_label_column_name
+            if schema.prediction_score_column_name is not None:
+                y_col_name = schema.prediction_score_column_name
+            y = df[y_col_name].to_numpy()
+            _finite_count = np.isfinite(y).sum()
+            if len(y) - _finite_count:
+                raise ValueError(
+                    f"To calculate surrogate explainability for {model_type}, "
+                    f"predictions must not contain NaN or infinite values, but "
+                    f"{len(y) - _finite_count} NaN or infinite value(s) are found in {y_col_name}."
+                )
+            # model func requires 1 positional argument
+            def model_func(_):  # type: ignore
+                return y
+        else:
+            raise ValueError(
+                "Surrogate explainability is not supported for the specified "
+                f"model type {model_type}."
+            )
+        # Column name mapping between features and feature importance values.
+        # This is used to augment the schema.
+        col_map = {
+            ft: f"{''.join(random.choices(string.ascii_letters, k=8))}"
+            for ft in features
+        }
+        aug_schema = replace(schema, shap_values_column_names=col_map)
+        # Limit the total number of "cells" to 20M, unless it results in too few or
+        # too many rows. This is done to keep the runtime low. Records not sampled
+        # have feature importance values set to 0.
+        samp_size = min(
+            len(X), min(100_000, max(1_000, 20_000_000 // X.shape[1]))
+        )
+        if samp_size < len(X):
+            _mask = np.zeros(len(X), dtype=int)
+            _mask[:samp_size] = 1
+            np.random.shuffle(_mask)
+            _mask = _mask.astype(bool)
+            X = X[_mask]
+            y = y[_mask]
+        # Replace all pd.NA values with np.nan values
+        for col in X.columns:
+            if X[col].isna().any():
+                X[col] = X[col].astype(object).where(~X[col].isna(), np.nan)
+        # Apply integer encoding to non-numeric columns.
+        # Currently training and explaining detasets are the same, but
+        # this can be changed in the future. The student model can be
+        # fitted on a much larger dataset since it takes a lot less time.
+        X = pd.concat(
+            [
+                X.select_dtypes(exclude=[object, "string"]),
+                pd.DataFrame(
+                    {
+                        name: LabelEncoder().fit_transform(data)
+                        for name, data in X.select_dtypes(
+                            include=[object, "string"]
+                        ).items()
+                    },
+                    index=X.index,
+                ),
+            ],
+            axis=1,
+        )
+        aug_df = pd.concat(
+            [
+                df,
+                Mimic(X, model_func).explain(X).rename(col_map, axis=1),
+            ],
+            axis=1,
+        )
+        # Fill null with zero so they're not counted as missing records by server
+        if not Mimic._testing:
+            aug_df.fillna({c: 0 for c in col_map.values()}, inplace=True)
+        return (
+            aug_df,
+            aug_schema,
+        )

arize/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "8.0.~~0a9~~"
1	+ __version__ = "8.0.0a10"

{arize-8.0.0a9.dist-info → arize-8.0.0a10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arize
-Version: 8.0.0a9
+Version: 8.0.0a10
 Summary: A helper library to interact with Arize AI APIs
 Project-URL: Homepage, https://arize.com
 Project-URL: Documentation, https://docs.arize.com/arize
@@ -30,6 +30,8 @@ Requires-Dist: numpy>=2.0.0
 Provides-Extra: dev
 Requires-Dist: pytest==8.4.2; extra == 'dev'
 Requires-Dist: ruff==0.13.2; extra == 'dev'
+Provides-Extra: mimic-explainer
+Requires-Dist: interpret-community[mimic]<1,>=0.22.0; extra == 'mimic-explainer'
 Provides-Extra: ml-batch
 Requires-Dist: pandas<3,>=1.0.0; extra == 'ml-batch'
 Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-batch'

{arize-8.0.0a9.dist-info → arize-8.0.0a10.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ arize/client.py,sha256=0LtZU3WeEatGd1QgQsMrJOuI-tFmzM3y1AfO74BLJys,5716
 arize/config.py,sha256=iynVEZhrOPdTNJTQ_KQmwKOPiwL0LfEP8AUIDYW86Xw,5801
 arize/logging.py,sha256=2vwdta2-kR78GeBFGK2vpk51rQ2d06HoKzuARI9qFQk,7317
 arize/types.py,sha256=z1yg5-brmTD4kVHDmmTVkYke53JpusXXeOOpdQw7rYg,69508
-arize/version.py,sha256=BDT_fj8aJ4pvQMjTUrdbEd6vY71GkAguJScCzty97gA,24
+arize/version.py,sha256=Wv8B6KxzS2ThGtkzs_13OkvwSugf5HITHYMQsGk1gjg,25
 arize/_exporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arize/_exporter/client.py,sha256=eAxJX1sUfdpLrtaQ0ynMTd5jI37JOp9fbl3NWp4WFEA,15216
 arize/_exporter/validation.py,sha256=6ROu5p7uaolxQ93lO_Eiwv9NVw_uyi3E5T--C5Klo5Q,1021
@@ -71,11 +71,13 @@ arize/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 arize/experiments/client.py,sha256=2fDq0fr_h6Knn_9zgDAlAhSUCKUrKozGLOQRTInCr4c,344
 arize/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arize/models/bounded_executor.py,sha256=o-PJsDAXQdiJ9dc-jzGCHMhT0-QBY9bvl4Ckn1017Eo,1131
-arize/models/client.py,sha256=aYXPv5Pq2Va2_aEEptw6-iD5zDEFV4UJz2bPnXvvIHw,31419
+arize/models/client.py,sha256=ZHxGYmCKP5ZX001qVNQc96QoclP4jvYVkLW11Xfqo2M,31199
 arize/models/stream_validation.py,sha256=PtmqWgRdCxVtTNkHHEHIM1S6ECbYLA1vuQQFBw_t3Lw,7118
 arize/models/batch_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arize/models/batch_validation/errors.py,sha256=__I8l25zf4kGv6qgiwEm9LzGNgqmMSM8Fb88pBtyMxE,39990
 arize/models/batch_validation/validator.py,sha256=acnGcMt-pETmPJUfYj5tIzIBvmBhWoXoWmDYi_Gkq6Y,146910
+arize/models/surrogate_explainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+arize/models/surrogate_explainer/mimic.py,sha256=MsMfhU9IhQJWm0kK6jpFkcTW6kw5IGJE3Kv94oOzMo0,5517
 arize/spans/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arize/spans/client.py,sha256=5yODUaSqxH-dLAenjRZBKbpsK7XgewZKwJpXzHWPNf0,47248
 arize/spans/columns.py,sha256=BbB11jF4YHYfjrKbSd1r3K2F0AGA8KULTj1W3e2rwhM,12912
@@ -107,7 +109,7 @@ arize/utils/arrow.py,sha256=4In1gQc0i4Rb8zuwI0w-Hv-10wiItu5opqqGrJ8tSzo,5277
 arize/utils/casting.py,sha256=KUrPUQN6qJEVe39nxbr0T-0GjAJLHjf4xWuzV71QezI,12468
 arize/utils/dataframe.py,sha256=I0FloPgNiqlKga32tMOvTE70598QA8Hhrgf-6zjYMAM,1120
 arize/utils/proto.py,sha256=9vLo53INYjdF78ffjm3E48jFwK6LbPD2FfKei7VaDy8,35477
-arize-8.0.0a9.dist-info/METADATA,sha256=WGsj9jvMjlNY9Xy4c9GpIT6mkJ1551ITCUYEhhQESck,12453
-arize-8.0.0a9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-arize-8.0.0a9.dist-info/licenses/LICENSE.md,sha256=8vLN8Gms62NCBorxIv9MUvuK7myueb6_-dhXHPmm4H0,1479
-arize-8.0.0a9.dist-info/RECORD,,
+arize-8.0.0a10.dist-info/METADATA,sha256=9u9UPm9jOeZp9pxLo9R5mDYvrACrOzbPET51mNyyXQU,12567
+arize-8.0.0a10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+arize-8.0.0a10.dist-info/licenses/LICENSE.md,sha256=8vLN8Gms62NCBorxIv9MUvuK7myueb6_-dhXHPmm4H0,1479
+arize-8.0.0a10.dist-info/RECORD,,

{arize-8.0.0a9.dist-info → arize-8.0.0a10.dist-info}/WHEEL RENAMED Viewed

File without changes

{arize-8.0.0a9.dist-info → arize-8.0.0a10.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

arize 8.0.0a9__py3-none-any.whl → 8.0.0a10__py3-none-any.whl

arize 8.0.0a9py3-none-any.whl → 8.0.0a10py3-none-any.whl