PyPI - explainiverse - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

explainiverse 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

explainiverse/__init__.py +5 -4
explainiverse/adapters/pytorch_adapter.py +88 -25
explainiverse/core/explanation.py +165 -10
explainiverse/core/registry.py +18 -0
explainiverse/engine/suite.py +187 -78
explainiverse/evaluation/metrics.py +189 -108
explainiverse/explainers/attribution/lime_wrapper.py +90 -7
explainiverse/explainers/attribution/shap_wrapper.py +104 -8
explainiverse/explainers/gradient/__init__.py +3 -0
explainiverse/explainers/gradient/integrated_gradients.py +189 -76
explainiverse/explainers/gradient/lrp.py +1206 -0
{explainiverse-0.7.0.dist-info → explainiverse-0.8.0.dist-info}/METADATA +76 -13
{explainiverse-0.7.0.dist-info → explainiverse-0.8.0.dist-info}/RECORD +15 -14
{explainiverse-0.7.0.dist-info → explainiverse-0.8.0.dist-info}/LICENSE +0 -0
{explainiverse-0.7.0.dist-info → explainiverse-0.8.0.dist-info}/WHEEL +0 -0

explainiverse/__init__.py CHANGED Viewed

@@ -2,9 +2,10 @@
 """
 Explainiverse - A unified, extensible explainability framework.
-Supports multiple XAI methods including LIME, SHAP, TreeSHAP, Anchors,
-Counterfactuals, Permutation Importance, PDP, ALE, and SAGE through a
-consistent interface.
+Supports 18 state-of-the-art XAI methods including LIME, SHAP, TreeSHAP,
+Integrated Gradients, DeepLIFT, DeepSHAP, LRP, GradCAM, TCAV, Anchors,
+Counterfactuals, Permutation Importance, PDP, ALE, SAGE, and ProtoDash
+through a consistent interface.
 Quick Start:
     from explainiverse import default_registry
@@ -33,7 +34,7 @@ from explainiverse.adapters.sklearn_adapter import SklearnAdapter
 from explainiverse.adapters import TORCH_AVAILABLE
 from explainiverse.engine.suite import ExplanationSuite
-__version__ = "0.7.0"
+__version__ = "0.8.0"
 __all__ = [
     # Core

explainiverse/adapters/pytorch_adapter.py CHANGED Viewed

@@ -25,7 +25,7 @@ Example:
 """
 import numpy as np
-from typing import List, Optional, Union, Callable
+from typing import List, Optional, Union, Tuple
 from .base_adapter import BaseModelAdapter
@@ -57,6 +57,11 @@ class PyTorchAdapter(BaseModelAdapter):
     explainability methods. Handles device management, tensor/numpy
     conversions, and supports both classification and regression tasks.
+    Supports:
+        - Multi-class classification (output shape: [batch, n_classes])
+        - Binary classification (output shape: [batch, 1] or [batch])
+        - Regression (output shape: [batch, n_outputs] or [batch])
     Attributes:
         model: The PyTorch model (nn.Module)
         task: "classification" or "regression"
@@ -150,11 +155,27 @@ class PyTorchAdapter(BaseModelAdapter):
     def _apply_activation(self, output: "torch.Tensor") -> "torch.Tensor":
         """Apply output activation function."""
         if self.output_activation == "softmax":
+            # Handle different output shapes
+            if output.dim() == 1 or (output.dim() == 2 and output.shape[1] == 1):
+                # Binary: apply sigmoid instead of softmax
+                return torch.sigmoid(output)
             return torch.softmax(output, dim=-1)
         elif self.output_activation == "sigmoid":
             return torch.sigmoid(output)
         return output
+    def _normalize_output_shape(self, output: "torch.Tensor") -> "torch.Tensor":
+        """
+        Normalize output to consistent 2D shape (batch, outputs).
+        Handles:
+            - (batch,) -> (batch, 1)
+            - (batch, n) -> (batch, n)
+        """
+        if output.dim() == 1:
+            return output.unsqueeze(-1)
+        return output
     def predict(self, data: np.ndarray) -> np.ndarray:
         """
         Generate predictions for input data.
@@ -183,16 +204,66 @@ class PyTorchAdapter(BaseModelAdapter):
                 tensor_batch = self._to_tensor(batch)
                 output = self.model(tensor_batch)
+                output = self._normalize_output_shape(output)
                 output = self._apply_activation(output)
                 outputs.append(self._to_numpy(output))
         return np.vstack(outputs)
+    def _get_target_scores(
+        self,
+        output: "torch.Tensor",
+        target_class: Optional[Union[int, "torch.Tensor"]] = None
+    ) -> "torch.Tensor":
+        """
+        Extract target scores for gradient computation.
+        Handles both multi-class and binary classification outputs.
+        Args:
+            output: Raw model output (logits)
+            target_class: Target class index or tensor of indices
+        Returns:
+            Target scores tensor for backpropagation
+        """
+        batch_size = output.shape[0]
+        # Normalize to 2D
+        if output.dim() == 1:
+            output = output.unsqueeze(-1)
+        n_outputs = output.shape[1]
+        if self.task == "classification":
+            if n_outputs == 1:
+                # Binary classification with single logit
+                # Score is the logit itself (positive class score)
+                return output.squeeze(-1)
+            else:
+                # Multi-class classification
+                if target_class is None:
+                    target_class = output.argmax(dim=-1)
+                elif isinstance(target_class, int):
+                    target_class = torch.tensor(
+                        [target_class] * batch_size,
+                        device=self.device
+                    )
+                # Gather scores for target class
+                return output.gather(1, target_class.view(-1, 1)).squeeze(-1)
+        else:
+            # Regression: use first output or sum of outputs
+            if n_outputs == 1:
+                return output.squeeze(-1)
+            else:
+                return output.sum(dim=-1)
     def predict_with_gradients(
         self,
         data: np.ndarray,
         target_class: Optional[int] = None
-    ) -> tuple:
+    ) -> Tuple[np.ndarray, np.ndarray]:
         """
         Generate predictions and compute gradients w.r.t. inputs.
@@ -203,11 +274,17 @@ class PyTorchAdapter(BaseModelAdapter):
             data: Input data as numpy array.
             target_class: Class index for gradient computation.
                          If None, uses the predicted class.
+                         For binary classification with single output,
+                         this is ignored (gradient w.r.t. the single logit).
         Returns:
             Tuple of (predictions, gradients) as numpy arrays.
+            - predictions: (batch, n_classes) probabilities
+            - gradients: same shape as input data
         """
         data = np.array(data)
+        original_shape = data.shape
         if data.ndim == 1:
             data = data.reshape(1, -1)
@@ -217,20 +294,13 @@ class PyTorchAdapter(BaseModelAdapter):
         # Forward pass
         output = self.model(tensor_data)
-        activated_output = self._apply_activation(output)
-        # Determine target for gradient
-        if self.task == "classification":
-            if target_class is None:
-                target_class = output.argmax(dim=-1)
-            elif isinstance(target_class, int):
-                target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
-            # Select target class scores for gradient
-            target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
-        else:
-            # Regression: gradient w.r.t. output
-            target_scores = output.squeeze()
+        # Get activated output for return
+        output_normalized = self._normalize_output_shape(output)
+        activated_output = self._apply_activation(output_normalized)
+        # Get target scores for gradient computation
+        target_scores = self._get_target_scores(output, target_class)
         # Backward pass
         if target_scores.dim() == 0:
@@ -295,7 +365,7 @@ class PyTorchAdapter(BaseModelAdapter):
         data: np.ndarray,
         layer_name: str,
         target_class: Optional[int] = None
-    ) -> tuple:
+    ) -> Tuple[np.ndarray, np.ndarray]:
         """
         Get gradients of output w.r.t. a specific layer's activations.
@@ -339,15 +409,8 @@ class PyTorchAdapter(BaseModelAdapter):
             output = self.model(tensor_data)
-            if self.task == "classification":
-                if target_class is None:
-                    target_class = output.argmax(dim=-1)
-                elif isinstance(target_class, int):
-                    target_class = torch.tensor([target_class] * data.shape[0], device=self.device)
-                target_scores = output.gather(1, target_class.view(-1, 1)).squeeze()
-            else:
-                target_scores = output.squeeze()
+            # Get target scores using the new method
+            target_scores = self._get_target_scores(output, target_class)
             if target_scores.dim() == 0:
                 target_scores.backward()

explainiverse/core/explanation.py CHANGED Viewed

@@ -1,24 +1,179 @@
 # src/explainiverse/core/explanation.py
+"""
+Unified container for explanation results.
+The Explanation class provides a standardized format for all explainer outputs,
+enabling consistent handling across different explanation methods.
+"""
+from typing import Dict, List, Optional, Any
 class Explanation:
     """
     Unified container for explanation results.
+    Attributes:
+        explainer_name: Name of the explainer that generated this explanation
+        target_class: The class/output being explained
+        explanation_data: Dictionary containing explanation details
+            (e.g., feature_attributions, heatmaps, rules)
+        feature_names: Optional list of feature names for index resolution
+        metadata: Optional additional metadata about the explanation
+    Example:
+        >>> explanation = Explanation(
+        ...     explainer_name="LIME",
+        ...     target_class="cat",
+        ...     explanation_data={"feature_attributions": {"fur": 0.8, "whiskers": 0.6}},
+        ...     feature_names=["fur", "whiskers", "tail", "ears"]
+        ... )
+        >>> print(explanation.get_top_features(k=2))
+        [('fur', 0.8), ('whiskers', 0.6)]
     """
-    def __init__(self, explainer_name: str, target_class: str, explanation_data: dict):
+    def __init__(
+        self,
+        explainer_name: str,
+        target_class: str,
+        explanation_data: Dict[str, Any],
+        feature_names: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ):
+        """
+        Initialize an Explanation object.
+        Args:
+            explainer_name: Name of the explainer (e.g., "LIME", "SHAP")
+            target_class: The target class or output being explained
+            explanation_data: Dictionary containing the explanation details.
+                Common keys include:
+                - "feature_attributions": Dict[str, float] mapping feature names to importance
+                - "attributions_raw": List[float] of raw attribution values
+                - "heatmap": np.ndarray for image explanations
+                - "rules": List of rule strings for rule-based explanations
+            feature_names: Optional list of feature names. If provided, enables
+                index-based lookup in evaluation metrics.
+            metadata: Optional additional metadata (e.g., computation time, parameters)
+        """
         self.explainer_name = explainer_name
         self.target_class = target_class
-        self.explanation_data = explanation_data  # e.g., {'feature_attributions': {...}}
+        self.explanation_data = explanation_data
+        self.feature_names = list(feature_names) if feature_names is not None else None
+        self.metadata = metadata or {}
     def __repr__(self):
-        return (f"Explanation(explainer='{self.explainer_name}', "
-                f"target='{self.target_class}', "
-                f"keys={list(self.explanation_data.keys())})")
+        n_features = len(self.feature_names) if self.feature_names else "N/A"
+        return (
+            f"Explanation(explainer='{self.explainer_name}', "
+            f"target='{self.target_class}', "
+            f"keys={list(self.explanation_data.keys())}, "
+            f"n_features={n_features})"
+        )
+    def get_attributions(self) -> Optional[Dict[str, float]]:
+        """
+        Get feature attributions if available.
+        Returns:
+            Dictionary mapping feature names to attribution values,
+            or None if not available.
+        """
+        return self.explanation_data.get("feature_attributions")
+    def get_top_features(self, k: int = 5, absolute: bool = True) -> List[tuple]:
+        """
+        Get the top-k most important features.
+        Args:
+            k: Number of top features to return
+            absolute: If True, rank by absolute value of attribution
+        Returns:
+            List of (feature_name, attribution_value) tuples sorted by importance
+        """
+        attributions = self.get_attributions()
+        if not attributions:
+            return []
+        if absolute:
+            sorted_items = sorted(
+                attributions.items(),
+                key=lambda x: abs(x[1]),
+                reverse=True
+            )
+        else:
+            sorted_items = sorted(
+                attributions.items(),
+                key=lambda x: x[1],
+                reverse=True
+            )
+        return sorted_items[:k]
+    def get_feature_index(self, feature_name: str) -> Optional[int]:
+        """
+        Get the index of a feature by name.
+        Args:
+            feature_name: Name of the feature
+        Returns:
+            Index of the feature, or None if not found or feature_names not set
+        """
+        if self.feature_names is None:
+            return None
+        try:
+            return self.feature_names.index(feature_name)
+        except ValueError:
+            return None
-    def plot(self, type='bar'):
+    def plot(self, plot_type: str = 'bar', **kwargs):
+        """
+        Visualize the explanation.
+        Args:
+            plot_type: Type of plot ('bar', 'waterfall', 'heatmap')
+            **kwargs: Additional arguments passed to the plotting function
+        Note:
+            This is a placeholder for future visualization integration.
+        """
+        print(
+            f"[plot: {plot_type}] Plotting explanation for {self.target_class} "
+            f"from {self.explainer_name}."
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert explanation to a dictionary for serialization.
+        Returns:
+            Dictionary representation of the explanation
+        """
+        return {
+            "explainer_name": self.explainer_name,
+            "target_class": self.target_class,
+            "explanation_data": self.explanation_data,
+            "feature_names": self.feature_names,
+            "metadata": self.metadata
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Explanation":
         """
-        Visualizes the explanation.
-        This will later integrate with a proper visualization backend.
+        Create an Explanation from a dictionary.
+        Args:
+            data: Dictionary with explanation data
+        Returns:
+            Explanation instance
         """
-        print(f"[plot: {type}] Plotting explanation for {self.target_class} "
-              f"from {self.explainer_name}.")
+        return cls(
+            explainer_name=data["explainer_name"],
+            target_class=data["target_class"],
+            explanation_data=data["explanation_data"],
+            feature_names=data.get("feature_names"),
+            metadata=data.get("metadata", {})
+        )

explainiverse/core/registry.py CHANGED Viewed

@@ -375,6 +375,7 @@ def _create_default_registry() -> ExplainerRegistry:
     from explainiverse.explainers.gradient.smoothgrad import SmoothGradExplainer
     from explainiverse.explainers.gradient.saliency import SaliencyExplainer
     from explainiverse.explainers.gradient.tcav import TCAVExplainer
+    from explainiverse.explainers.gradient.lrp import LRPExplainer
     from explainiverse.explainers.example_based.protodash import ProtoDashExplainer
     registry = ExplainerRegistry()
@@ -587,6 +588,23 @@ def _create_default_registry() -> ExplainerRegistry:
         )
     )
+    # Register LRP (Layer-wise Relevance Propagation)
+    registry.register(
+        name="lrp",
+        explainer_class=LRPExplainer,
+        meta=ExplainerMeta(
+            scope="local",
+            model_types=["neural"],
+            data_types=["tabular", "image"],
+            task_types=["classification", "regression"],
+            description="LRP - Layer-wise Relevance Propagation for decomposition-based attributions (requires PyTorch)",
+            paper_reference="Bach et al., 2015 - 'On Pixel-wise Explanations for Non-Linear Classifier Decisions by Layer-wise Relevance Propagation' (PLOS ONE)",
+            complexity="O(n_layers * forward_pass)",
+            requires_training_data=False,
+            supports_batching=True
+        )
+    )
     # =========================================================================
     # Global Explainers (model-level)
     # =========================================================================

explainiverse 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

explainiverse 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl