PyPI - explainiverse - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

explainiverse 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{explainiverse-0.2.0 → explainiverse-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: explainiverse
-Version: 0.2.0
+Version: 0.2.1
 Summary: Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more
 Home-page: https://github.com/jemsbhai/explainiverse
 License: MIT

{explainiverse-0.2.0 → explainiverse-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "explainiverse"
-version = "0.2.0"
+version = "0.2.1"
 description = "Unified, extensible explainability framework supporting LIME, SHAP, Anchors, Counterfactuals, PDP, ALE, SAGE, and more"
 authors = ["Muntaser Syed <jemsbhai@gmail.com>"]
 license = "MIT"

{explainiverse-0.2.0 → explainiverse-0.2.1}/src/explainiverse/__init__.py RENAMED Viewed

@@ -27,7 +27,7 @@ from explainiverse.core.registry import (
 from explainiverse.adapters.sklearn_adapter import SklearnAdapter
 from explainiverse.engine.suite import ExplanationSuite
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 __all__ = [
     # Core

{explainiverse-0.2.0 → explainiverse-0.2.1}/src/explainiverse/core/registry.py RENAMED Viewed

@@ -362,6 +362,7 @@ def _create_default_registry() -> ExplainerRegistry:
     """Create and populate the default global registry."""
     from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
     from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
+    from explainiverse.explainers.attribution.treeshap_wrapper import TreeShapExplainer
     from explainiverse.explainers.rule_based.anchors_wrapper import AnchorsExplainer
     from explainiverse.explainers.global_explainers.permutation_importance import PermutationImportanceExplainer
     from explainiverse.explainers.global_explainers.partial_dependence import PartialDependenceExplainer
@@ -409,6 +410,23 @@ def _create_default_registry() -> ExplainerRegistry:
         )
     )
+    # Register TreeSHAP (optimized for tree models)
+    registry.register(
+        name="treeshap",
+        explainer_class=TreeShapExplainer,
+        meta=ExplainerMeta(
+            scope="local",
+            model_types=["tree", "ensemble"],
+            data_types=["tabular"],
+            task_types=["classification", "regression"],
+            description="TreeSHAP - exact SHAP values for tree-based models (RandomForest, XGBoost, etc.)",
+            paper_reference="Lundberg et al., 2018 - 'Consistent Individualized Feature Attribution for Tree Ensembles'",
+            complexity="O(TLD^2) - polynomial in tree depth",
+            requires_training_data=False,
+            supports_batching=True
+        )
+    )
     # Register Anchors
     registry.register(
         name="anchors",

{explainiverse-0.2.0 → explainiverse-0.2.1}/src/explainiverse/explainers/__init__.py RENAMED Viewed

@@ -4,7 +4,8 @@ Explainiverse Explainers - comprehensive XAI method implementations.
 Local Explainers (instance-level):
 - LIME: Local Interpretable Model-agnostic Explanations
-- SHAP: SHapley Additive exPlanations
+- SHAP: SHapley Additive exPlanations (KernelSHAP - model-agnostic)
+- TreeSHAP: Optimized exact SHAP for tree-based models
 - Anchors: High-precision rule-based explanations
 - Counterfactual: Diverse counterfactual explanations
@@ -17,6 +18,7 @@ Global Explainers (model-level):
 from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
 from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
+from explainiverse.explainers.attribution.treeshap_wrapper import TreeShapExplainer
 from explainiverse.explainers.rule_based.anchors_wrapper import AnchorsExplainer
 from explainiverse.explainers.counterfactual.dice_wrapper import CounterfactualExplainer
 from explainiverse.explainers.global_explainers.permutation_importance import PermutationImportanceExplainer
@@ -28,6 +30,7 @@ __all__ = [
     # Local explainers
     "LimeExplainer",
     "ShapExplainer",
+    "TreeShapExplainer",
     "AnchorsExplainer",
     "CounterfactualExplainer",
     # Global explainers

{explainiverse-0.2.0 → explainiverse-0.2.1}/src/explainiverse/explainers/attribution/__init__.py RENAMED Viewed

@@ -5,5 +5,6 @@ Attribution-based explainers - feature importance explanations.
 from explainiverse.explainers.attribution.lime_wrapper import LimeExplainer
 from explainiverse.explainers.attribution.shap_wrapper import ShapExplainer
+from explainiverse.explainers.attribution.treeshap_wrapper import TreeShapExplainer
-__all__ = ["LimeExplainer", "ShapExplainer"]
+__all__ = ["LimeExplainer", "ShapExplainer", "TreeShapExplainer"]

explainiverse-0.2.1/src/explainiverse/explainers/attribution/treeshap_wrapper.py ADDED Viewed

@@ -0,0 +1,434 @@
+# src/explainiverse/explainers/attribution/treeshap_wrapper.py
+"""
+TreeSHAP Explainer - Optimized SHAP for Tree-based Models.
+TreeSHAP computes exact SHAP values in polynomial time for tree-based models,
+making it significantly faster than KernelSHAP while providing exact (not
+approximate) Shapley values.
+Reference:
+    Lundberg, S.M., Erion, G.G., & Lee, S.I. (2018). Consistent Individualized
+    Feature Attribution for Tree Ensembles. arXiv:1802.03888.
+Supported Models:
+    - scikit-learn: RandomForest, GradientBoosting, DecisionTree, ExtraTrees
+    - XGBoost: XGBClassifier, XGBRegressor
+    - LightGBM: LGBMClassifier, LGBMRegressor (if installed)
+    - CatBoost: CatBoostClassifier, CatBoostRegressor (if installed)
+"""
+import numpy as np
+import shap
+from typing import List, Optional, Union
+from explainiverse.core.explainer import BaseExplainer
+from explainiverse.core.explanation import Explanation
+# Tree-based model types that TreeSHAP supports
+SUPPORTED_TREE_MODELS = (
+    "RandomForestClassifier",
+    "RandomForestRegressor",
+    "GradientBoostingClassifier",
+    "GradientBoostingRegressor",
+    "DecisionTreeClassifier",
+    "DecisionTreeRegressor",
+    "ExtraTreesClassifier",
+    "ExtraTreesRegressor",
+    "XGBClassifier",
+    "XGBRegressor",
+    "XGBRFClassifier",
+    "XGBRFRegressor",
+    "LGBMClassifier",
+    "LGBMRegressor",
+    "CatBoostClassifier",
+    "CatBoostRegressor",
+    "HistGradientBoostingClassifier",
+    "HistGradientBoostingRegressor",
+)
+def _is_tree_model(model) -> bool:
+    """Check if a model is a supported tree-based model."""
+    model_name = type(model).__name__
+    return model_name in SUPPORTED_TREE_MODELS
+def _get_raw_model(model):
+    """
+    Extract the raw model from an adapter if necessary.
+    TreeExplainer needs the actual sklearn/xgboost model, not an adapter.
+    """
+    # If it's an adapter, get the underlying model
+    if hasattr(model, 'model'):
+        return model.model
+    return model
+class TreeShapExplainer(BaseExplainer):
+    """
+    TreeSHAP explainer for tree-based models.
+    Uses SHAP's TreeExplainer to compute exact SHAP values in polynomial time.
+    This is significantly faster than KernelSHAP for supported tree models
+    and provides exact Shapley values rather than approximations.
+    Key advantages over KernelSHAP:
+    - Exact SHAP values (not approximations)
+    - O(TLD²) complexity vs O(TL2^M) for KernelSHAP
+    - Can compute interaction values
+    - No background data sampling needed
+    Attributes:
+        model: The tree-based model (sklearn, XGBoost, LightGBM, or CatBoost)
+        feature_names: List of feature names
+        class_names: List of class names for classification
+        explainer: The underlying SHAP TreeExplainer
+        task: "classification" or "regression"
+    """
+    def __init__(
+        self,
+        model,
+        feature_names: List[str],
+        class_names: Optional[List[str]] = None,
+        background_data: Optional[np.ndarray] = None,
+        task: str = "classification",
+        model_output: str = "auto",
+        feature_perturbation: str = "tree_path_dependent"
+    ):
+        """
+        Initialize the TreeSHAP explainer.
+        Args:
+            model: A tree-based model or adapter containing one.
+                   Supported: RandomForest, GradientBoosting, XGBoost,
+                   LightGBM, CatBoost, DecisionTree, ExtraTrees.
+            feature_names: List of feature names.
+            class_names: List of class names (for classification).
+            background_data: Optional background dataset for interventional
+                            feature perturbation. If None, uses tree_path_dependent.
+            task: "classification" or "regression".
+            model_output: How to transform model output. Options:
+                         - "auto": Automatically detect
+                         - "raw": Raw model output
+                         - "probability": Probability output (classification)
+                         - "log_loss": Log loss output
+            feature_perturbation: Method for handling feature perturbation:
+                                 - "tree_path_dependent": Fast, uses tree structure
+                                 - "interventional": Slower, requires background data
+        """
+        # Extract raw model if wrapped in adapter
+        raw_model = _get_raw_model(model)
+        # Validate that it's a supported tree model
+        if not _is_tree_model(raw_model):
+            model_type = type(raw_model).__name__
+            raise ValueError(
+                f"TreeSHAP requires a tree-based model. Got {model_type}. "
+                f"Supported models: {', '.join(SUPPORTED_TREE_MODELS[:6])}..."
+            )
+        super().__init__(model)
+        self.raw_model = raw_model
+        self.feature_names = list(feature_names)
+        self.class_names = list(class_names) if class_names else None
+        self.task = task
+        self.model_output = model_output
+        self.feature_perturbation = feature_perturbation
+        # Create TreeExplainer
+        explainer_kwargs = {}
+        if feature_perturbation == "interventional" and background_data is not None:
+            explainer_kwargs["data"] = background_data
+            explainer_kwargs["feature_perturbation"] = "interventional"
+        if model_output != "auto":
+            explainer_kwargs["model_output"] = model_output
+        self.explainer = shap.TreeExplainer(raw_model, **explainer_kwargs)
+        self.background_data = background_data
+    def explain(
+        self,
+        instance: np.ndarray,
+        target_class: Optional[int] = None,
+        check_additivity: bool = False
+    ) -> Explanation:
+        """
+        Generate TreeSHAP explanation for a single instance.
+        Args:
+            instance: 1D numpy array of input features.
+            target_class: For multi-class, which class to explain.
+                         If None, uses the predicted class.
+            check_additivity: Whether to verify SHAP values sum to
+                             prediction - expected_value.
+        Returns:
+            Explanation object with feature attributions.
+        """
+        instance = np.array(instance).flatten()
+        instance_2d = instance.reshape(1, -1)
+        # Compute SHAP values
+        shap_values = self.explainer.shap_values(
+            instance_2d,
+            check_additivity=check_additivity
+        )
+        # Handle different output formats
+        if isinstance(shap_values, list):
+            # Multi-class classification: list of arrays, one per class
+            n_classes = len(shap_values)
+            if target_class is None:
+                # Use predicted class
+                if hasattr(self.raw_model, 'predict'):
+                    pred = self.raw_model.predict(instance_2d)[0]
+                    target_class = int(pred)
+                else:
+                    target_class = 0
+            # Ensure target_class is valid
+            target_class = min(target_class, n_classes - 1)
+            class_shap = shap_values[target_class][0]
+            # Get class name
+            if self.class_names and target_class < len(self.class_names):
+                label_name = self.class_names[target_class]
+            else:
+                label_name = f"class_{target_class}"
+            # Store all class SHAP values for reference
+            all_class_shap = {
+                (self.class_names[i] if self.class_names and i < len(self.class_names)
+                 else f"class_{i}"): shap_values[i][0].tolist()
+                for i in range(n_classes)
+            }
+        else:
+            # Binary classification or regression
+            class_shap = shap_values[0] if shap_values.ndim > 1 else shap_values.flatten()
+            label_name = self.class_names[1] if self.class_names and len(self.class_names) > 1 else "output"
+            all_class_shap = None
+        # Build attributions dict
+        flat_shap = np.array(class_shap).flatten()
+        attributions = {
+            fname: float(flat_shap[i])
+            for i, fname in enumerate(self.feature_names)
+        }
+        # Get expected value (base value)
+        expected_value = self.explainer.expected_value
+        if isinstance(expected_value, (list, np.ndarray)):
+            if target_class is not None and target_class < len(expected_value):
+                base_value = float(expected_value[target_class])
+            else:
+                base_value = float(expected_value[0])
+        else:
+            base_value = float(expected_value)
+        explanation_data = {
+            "feature_attributions": attributions,
+            "base_value": base_value,
+            "shap_values_raw": flat_shap.tolist(),
+        }
+        if all_class_shap is not None:
+            explanation_data["all_class_shap_values"] = all_class_shap
+        return Explanation(
+            explainer_name="TreeSHAP",
+            target_class=label_name,
+            explanation_data=explanation_data
+        )
+    def explain_batch(
+        self,
+        X: np.ndarray,
+        target_class: Optional[int] = None,
+        check_additivity: bool = False
+    ) -> List[Explanation]:
+        """
+        Generate TreeSHAP explanations for multiple instances efficiently.
+        TreeSHAP can process batches more efficiently than individual calls.
+        Args:
+            X: 2D numpy array of instances (n_samples, n_features).
+            target_class: For multi-class, which class to explain.
+            check_additivity: Whether to verify SHAP value additivity.
+        Returns:
+            List of Explanation objects.
+        """
+        X = np.array(X)
+        if X.ndim == 1:
+            X = X.reshape(1, -1)
+        # Compute SHAP values for all instances at once
+        shap_values = self.explainer.shap_values(X, check_additivity=check_additivity)
+        explanations = []
+        for i in range(X.shape[0]):
+            if isinstance(shap_values, list):
+                # Multi-class
+                n_classes = len(shap_values)
+                tc = target_class if target_class is not None else 0
+                tc = min(tc, n_classes - 1)
+                class_shap = shap_values[tc][i]
+                if self.class_names and tc < len(self.class_names):
+                    label_name = self.class_names[tc]
+                else:
+                    label_name = f"class_{tc}"
+            else:
+                class_shap = shap_values[i]
+                label_name = self.class_names[1] if self.class_names and len(self.class_names) > 1 else "output"
+            flat_shap = np.array(class_shap).flatten()
+            attributions = {
+                fname: float(flat_shap[j])
+                for j, fname in enumerate(self.feature_names)
+            }
+            expected_value = self.explainer.expected_value
+            if isinstance(expected_value, (list, np.ndarray)):
+                tc = target_class if target_class is not None else 0
+                base_value = float(expected_value[min(tc, len(expected_value) - 1)])
+            else:
+                base_value = float(expected_value)
+            explanations.append(Explanation(
+                explainer_name="TreeSHAP",
+                target_class=label_name,
+                explanation_data={
+                    "feature_attributions": attributions,
+                    "base_value": base_value,
+                    "shap_values_raw": flat_shap.tolist(),
+                }
+            ))
+        return explanations
+    def explain_interactions(
+        self,
+        instance: np.ndarray,
+        target_class: Optional[int] = None
+    ) -> Explanation:
+        """
+        Compute SHAP interaction values for an instance.
+        Interaction values show how pairs of features jointly contribute
+        to the prediction. The diagonal contains main effects.
+        Args:
+            instance: 1D numpy array of input features.
+            target_class: For multi-class, which class to explain.
+        Returns:
+            Explanation object with interaction matrix.
+        """
+        instance = np.array(instance).flatten()
+        instance_2d = instance.reshape(1, -1)
+        # Compute interaction values
+        interaction_values = self.explainer.shap_interaction_values(instance_2d)
+        # Determine target class for prediction
+        if target_class is None and hasattr(self.raw_model, 'predict'):
+            target_class = int(self.raw_model.predict(instance_2d)[0])
+        elif target_class is None:
+            target_class = 0
+        # Handle different return formats from shap_interaction_values
+        if isinstance(interaction_values, list):
+            # Multi-class: list of arrays, one per class
+            n_classes = len(interaction_values)
+            tc = min(target_class, n_classes - 1)
+            interactions = np.array(interaction_values[tc][0])
+            if self.class_names and tc < len(self.class_names):
+                label_name = self.class_names[tc]
+            else:
+                label_name = f"class_{tc}"
+        elif interaction_values.ndim == 4:
+            # Shape: (n_samples, n_features, n_features, n_classes)
+            n_classes = interaction_values.shape[3]
+            tc = min(target_class, n_classes - 1)
+            interactions = interaction_values[0, :, :, tc]
+            if self.class_names and tc < len(self.class_names):
+                label_name = self.class_names[tc]
+            else:
+                label_name = f"class_{tc}"
+        else:
+            # Binary or regression: (n_samples, n_features, n_features)
+            interactions = interaction_values[0]
+            label_name = self.class_names[1] if self.class_names and len(self.class_names) > 1 else "output"
+        # Ensure interactions is 2D (n_features x n_features)
+        interactions = np.array(interactions)
+        if interactions.ndim > 2:
+            # If still multi-dimensional, take first slice
+            interactions = interactions[:, :, 0] if interactions.ndim == 3 else interactions
+        # Build interaction dict with feature name pairs
+        n_features = len(self.feature_names)
+        interaction_dict = {}
+        main_effects = {}
+        for i in range(n_features):
+            fname_i = self.feature_names[i]
+            val = interactions[i, i]
+            main_effects[fname_i] = float(val) if np.isscalar(val) or val.size == 1 else float(val.flat[0])
+            for j in range(i + 1, n_features):
+                fname_j = self.feature_names[j]
+                # Interaction values are symmetric, so we sum both directions
+                val_ij = interactions[i, j]
+                val_ji = interactions[j, i]
+                ij = float(val_ij) if np.isscalar(val_ij) or val_ij.size == 1 else float(val_ij.flat[0])
+                ji = float(val_ji) if np.isscalar(val_ji) or val_ji.size == 1 else float(val_ji.flat[0])
+                interaction_dict[f"{fname_i} x {fname_j}"] = ij + ji
+        # Sort interactions by absolute value
+        sorted_interactions = dict(sorted(
+            interaction_dict.items(),
+            key=lambda x: abs(x[1]),
+            reverse=True
+        ))
+        return Explanation(
+            explainer_name="TreeSHAP_Interactions",
+            target_class=label_name,
+            explanation_data={
+                "feature_attributions": main_effects,
+                "interactions": sorted_interactions,
+                "interaction_matrix": interactions.tolist(),
+                "feature_names": self.feature_names
+            }
+        )
+    def get_expected_value(self, target_class: Optional[int] = None) -> float:
+        """
+        Get the expected (base) value of the model.
+        This is the average model output over the background dataset.
+        Args:
+            target_class: For multi-class, which class's expected value.
+        Returns:
+            The expected value as a float.
+        """
+        expected_value = self.explainer.expected_value
+        if isinstance(expected_value, (list, np.ndarray)):
+            tc = target_class if target_class is not None else 0
+            return float(expected_value[min(tc, len(expected_value) - 1)])
+        return float(expected_value)