PyPI - sae-lens - Versions diffs - 6.25.1__py3-none-any.whl → 6.26.1__py3-none-any.whl - Mend

sae-lens 6.25.1py3-none-any.whl → 6.26.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sae_lens/__init__.py +13 -1
sae_lens/config.py +5 -0
sae_lens/pretrained_saes.yaml +144 -144
sae_lens/saes/__init__.py +10 -0
sae_lens/saes/matching_pursuit_sae.py +334 -0
{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/METADATA +1 -1
{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/RECORD +9 -8
{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/WHEEL +0 -0
{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.25.1"
+__version__ = "6.26.1"
 import logging
@@ -21,6 +21,10 @@ from sae_lens.saes import (
     JumpReLUTrainingSAEConfig,
     JumpReLUTranscoder,
     JumpReLUTranscoderConfig,
+    MatchingPursuitSAE,
+    MatchingPursuitSAEConfig,
+    MatchingPursuitTrainingSAE,
+    MatchingPursuitTrainingSAEConfig,
     MatryoshkaBatchTopKTrainingSAE,
     MatryoshkaBatchTopKTrainingSAEConfig,
     SAEConfig,
@@ -113,6 +117,10 @@ __all__ = [
     "MatryoshkaBatchTopKTrainingSAEConfig",
     "TemporalSAE",
     "TemporalSAEConfig",
+    "MatchingPursuitSAE",
+    "MatchingPursuitTrainingSAE",
+    "MatchingPursuitSAEConfig",
+    "MatchingPursuitTrainingSAEConfig",
 ]
@@ -139,3 +147,7 @@ register_sae_class(
     "jumprelu_skip_transcoder", JumpReLUSkipTranscoder, JumpReLUSkipTranscoderConfig
 )
 register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)
+register_sae_class("matching_pursuit", MatchingPursuitSAE, MatchingPursuitSAEConfig)
+register_sae_training_class(
+    "matching_pursuit", MatchingPursuitTrainingSAE, MatchingPursuitTrainingSAEConfig
+)

sae_lens/config.py CHANGED Viewed

@@ -17,6 +17,11 @@ from datasets import (
 )
 from sae_lens import __version__, logger
+# keeping this unused import since some SAELens deps import DTYPE_MAP from config
+from sae_lens.constants import (
+    DTYPE_MAP,  # noqa: F401  # pyright: ignore[reportUnusedImport]
+)
 from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 from sae_lens.util import str_to_dtype

sae_lens/pretrained_saes.yaml CHANGED Viewed

@@ -9072,150 +9072,150 @@ gemma-scope-2-27b-it-transcoders-all:
   - id: layer_5_width_262k_l0_small_affine
     path: transcoder_all/layer_5_width_262k_l0_small_affine
     l0: 12
-  # - id: layer_60_width_16k_l0_big
-  #   path: transcoder_all/layer_60_width_16k_l0_big
-  #   l0: 120
-  # - id: layer_60_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_60_width_16k_l0_big_affine
-  #   l0: 120
-  # - id: layer_60_width_16k_l0_small
-  #   path: transcoder_all/layer_60_width_16k_l0_small
-  #   l0: 20
-  # - id: layer_60_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_60_width_16k_l0_small_affine
-  #   l0: 20
-  # - id: layer_60_width_262k_l0_big
-  #   path: transcoder_all/layer_60_width_262k_l0_big
-  #   l0: 120
-  # - id: layer_60_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_60_width_262k_l0_big_affine
-  #   l0: 120
-  # - id: layer_60_width_262k_l0_small
-  #   path: transcoder_all/layer_60_width_262k_l0_small
-  #   l0: 20
-  # - id: layer_60_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_60_width_262k_l0_small_affine
-  #   l0: 20
-  # - id: layer_61_width_16k_l0_big
-  #   path: transcoder_all/layer_61_width_16k_l0_big
-  #   l0: 120
-  # - id: layer_61_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_61_width_16k_l0_big_affine
-  #   l0: 120
-  # - id: layer_61_width_16k_l0_small
-  #   path: transcoder_all/layer_61_width_16k_l0_small
-  #   l0: 20
-  # - id: layer_61_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_61_width_16k_l0_small_affine
-  #   l0: 20
-  # - id: layer_61_width_262k_l0_big
-  #   path: transcoder_all/layer_61_width_262k_l0_big
-  #   l0: 120
-  # - id: layer_61_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_61_width_262k_l0_big_affine
-  #   l0: 120
-  # - id: layer_61_width_262k_l0_small
-  #   path: transcoder_all/layer_61_width_262k_l0_small
-  #   l0: 20
-  # - id: layer_61_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_61_width_262k_l0_small_affine
-  #   l0: 20
-  # - id: layer_6_width_16k_l0_big
-  #   path: transcoder_all/layer_6_width_16k_l0_big
-  #   l0: 77
-  # - id: layer_6_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_6_width_16k_l0_big_affine
-  #   l0: 77
-  # - id: layer_6_width_16k_l0_small
-  #   path: transcoder_all/layer_6_width_16k_l0_small
-  #   l0: 12
-  # - id: layer_6_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_6_width_16k_l0_small_affine
-  #   l0: 12
-  # - id: layer_6_width_262k_l0_big
-  #   path: transcoder_all/layer_6_width_262k_l0_big
-  #   l0: 77
-  # - id: layer_6_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_6_width_262k_l0_big_affine
-  #   l0: 77
-  # - id: layer_6_width_262k_l0_small
-  #   path: transcoder_all/layer_6_width_262k_l0_small
-  #   l0: 12
-  # - id: layer_6_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_6_width_262k_l0_small_affine
-  #   l0: 12
-  # - id: layer_7_width_16k_l0_big
-  #   path: transcoder_all/layer_7_width_16k_l0_big
-  #   l0: 80
-  # - id: layer_7_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_7_width_16k_l0_big_affine
-  #   l0: 80
-  # - id: layer_7_width_16k_l0_small
-  #   path: transcoder_all/layer_7_width_16k_l0_small
-  #   l0: 13
-  # - id: layer_7_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_7_width_16k_l0_small_affine
-  #   l0: 13
-  # - id: layer_7_width_262k_l0_big
-  #   path: transcoder_all/layer_7_width_262k_l0_big
-  #   l0: 80
-  # - id: layer_7_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_7_width_262k_l0_big_affine
-  #   l0: 80
-  # - id: layer_7_width_262k_l0_small
-  #   path: transcoder_all/layer_7_width_262k_l0_small
-  #   l0: 13
-  # - id: layer_7_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_7_width_262k_l0_small_affine
-  #   l0: 13
-  # - id: layer_8_width_16k_l0_big
-  #   path: transcoder_all/layer_8_width_16k_l0_big
-  #   l0: 83
-  # - id: layer_8_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_8_width_16k_l0_big_affine
-  #   l0: 83
-  # - id: layer_8_width_16k_l0_small
-  #   path: transcoder_all/layer_8_width_16k_l0_small
-  #   l0: 13
-  # - id: layer_8_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_8_width_16k_l0_small_affine
-  #   l0: 13
-  # - id: layer_8_width_262k_l0_big
-  #   path: transcoder_all/layer_8_width_262k_l0_big
-  #   l0: 83
-  # - id: layer_8_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_8_width_262k_l0_big_affine
-  #   l0: 83
-  # - id: layer_8_width_262k_l0_small
-  #   path: transcoder_all/layer_8_width_262k_l0_small
-  #   l0: 13
-  # - id: layer_8_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_8_width_262k_l0_small_affine
-  #   l0: 13
-  # - id: layer_9_width_16k_l0_big
-  #   path: transcoder_all/layer_9_width_16k_l0_big
-  #   l0: 86
-  # - id: layer_9_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_9_width_16k_l0_big_affine
-  #   l0: 86
-  # - id: layer_9_width_16k_l0_small
-  #   path: transcoder_all/layer_9_width_16k_l0_small
-  #   l0: 14
-  # - id: layer_9_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_9_width_16k_l0_small_affine
-  #   l0: 14
-  # - id: layer_9_width_262k_l0_big
-  #   path: transcoder_all/layer_9_width_262k_l0_big
-  #   l0: 86
-  # - id: layer_9_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_9_width_262k_l0_big_affine
-  #   l0: 86
-  # - id: layer_9_width_262k_l0_small
-  #   path: transcoder_all/layer_9_width_262k_l0_small
-  #   l0: 14
-  # - id: layer_9_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_9_width_262k_l0_small_affine
-  #   l0: 14
+  - id: layer_60_width_16k_l0_big
+    path: transcoder_all/layer_60_width_16k_l0_big
+    l0: 120
+  - id: layer_60_width_16k_l0_big_affine
+    path: transcoder_all/layer_60_width_16k_l0_big_affine
+    l0: 120
+  - id: layer_60_width_16k_l0_small
+    path: transcoder_all/layer_60_width_16k_l0_small
+    l0: 20
+  - id: layer_60_width_16k_l0_small_affine
+    path: transcoder_all/layer_60_width_16k_l0_small_affine
+    l0: 20
+  - id: layer_60_width_262k_l0_big
+    path: transcoder_all/layer_60_width_262k_l0_big
+    l0: 120
+  - id: layer_60_width_262k_l0_big_affine
+    path: transcoder_all/layer_60_width_262k_l0_big_affine
+    l0: 120
+  - id: layer_60_width_262k_l0_small
+    path: transcoder_all/layer_60_width_262k_l0_small
+    l0: 20
+  - id: layer_60_width_262k_l0_small_affine
+    path: transcoder_all/layer_60_width_262k_l0_small_affine
+    l0: 20
+  - id: layer_61_width_16k_l0_big
+    path: transcoder_all/layer_61_width_16k_l0_big
+    l0: 120
+  - id: layer_61_width_16k_l0_big_affine
+    path: transcoder_all/layer_61_width_16k_l0_big_affine
+    l0: 120
+  - id: layer_61_width_16k_l0_small
+    path: transcoder_all/layer_61_width_16k_l0_small
+    l0: 20
+  - id: layer_61_width_16k_l0_small_affine
+    path: transcoder_all/layer_61_width_16k_l0_small_affine
+    l0: 20
+  - id: layer_61_width_262k_l0_big
+    path: transcoder_all/layer_61_width_262k_l0_big
+    l0: 120
+  - id: layer_61_width_262k_l0_big_affine
+    path: transcoder_all/layer_61_width_262k_l0_big_affine
+    l0: 120
+  - id: layer_61_width_262k_l0_small
+    path: transcoder_all/layer_61_width_262k_l0_small
+    l0: 20
+  - id: layer_61_width_262k_l0_small_affine
+    path: transcoder_all/layer_61_width_262k_l0_small_affine
+    l0: 20
+  - id: layer_6_width_16k_l0_big
+    path: transcoder_all/layer_6_width_16k_l0_big
+    l0: 77
+  - id: layer_6_width_16k_l0_big_affine
+    path: transcoder_all/layer_6_width_16k_l0_big_affine
+    l0: 77
+  - id: layer_6_width_16k_l0_small
+    path: transcoder_all/layer_6_width_16k_l0_small
+    l0: 12
+  - id: layer_6_width_16k_l0_small_affine
+    path: transcoder_all/layer_6_width_16k_l0_small_affine
+    l0: 12
+  - id: layer_6_width_262k_l0_big
+    path: transcoder_all/layer_6_width_262k_l0_big
+    l0: 77
+  - id: layer_6_width_262k_l0_big_affine
+    path: transcoder_all/layer_6_width_262k_l0_big_affine
+    l0: 77
+  - id: layer_6_width_262k_l0_small
+    path: transcoder_all/layer_6_width_262k_l0_small
+    l0: 12
+  - id: layer_6_width_262k_l0_small_affine
+    path: transcoder_all/layer_6_width_262k_l0_small_affine
+    l0: 12
+  - id: layer_7_width_16k_l0_big
+    path: transcoder_all/layer_7_width_16k_l0_big
+    l0: 80
+  - id: layer_7_width_16k_l0_big_affine
+    path: transcoder_all/layer_7_width_16k_l0_big_affine
+    l0: 80
+  - id: layer_7_width_16k_l0_small
+    path: transcoder_all/layer_7_width_16k_l0_small
+    l0: 13
+  - id: layer_7_width_16k_l0_small_affine
+    path: transcoder_all/layer_7_width_16k_l0_small_affine
+    l0: 13
+  - id: layer_7_width_262k_l0_big
+    path: transcoder_all/layer_7_width_262k_l0_big
+    l0: 80
+  - id: layer_7_width_262k_l0_big_affine
+    path: transcoder_all/layer_7_width_262k_l0_big_affine
+    l0: 80
+  - id: layer_7_width_262k_l0_small
+    path: transcoder_all/layer_7_width_262k_l0_small
+    l0: 13
+  - id: layer_7_width_262k_l0_small_affine
+    path: transcoder_all/layer_7_width_262k_l0_small_affine
+    l0: 13
+  - id: layer_8_width_16k_l0_big
+    path: transcoder_all/layer_8_width_16k_l0_big
+    l0: 83
+  - id: layer_8_width_16k_l0_big_affine
+    path: transcoder_all/layer_8_width_16k_l0_big_affine
+    l0: 83
+  - id: layer_8_width_16k_l0_small
+    path: transcoder_all/layer_8_width_16k_l0_small
+    l0: 13
+  - id: layer_8_width_16k_l0_small_affine
+    path: transcoder_all/layer_8_width_16k_l0_small_affine
+    l0: 13
+  - id: layer_8_width_262k_l0_big
+    path: transcoder_all/layer_8_width_262k_l0_big
+    l0: 83
+  - id: layer_8_width_262k_l0_big_affine
+    path: transcoder_all/layer_8_width_262k_l0_big_affine
+    l0: 83
+  - id: layer_8_width_262k_l0_small
+    path: transcoder_all/layer_8_width_262k_l0_small
+    l0: 13
+  - id: layer_8_width_262k_l0_small_affine
+    path: transcoder_all/layer_8_width_262k_l0_small_affine
+    l0: 13
+  - id: layer_9_width_16k_l0_big
+    path: transcoder_all/layer_9_width_16k_l0_big
+    l0: 86
+  - id: layer_9_width_16k_l0_big_affine
+    path: transcoder_all/layer_9_width_16k_l0_big_affine
+    l0: 86
+  - id: layer_9_width_16k_l0_small
+    path: transcoder_all/layer_9_width_16k_l0_small
+    l0: 14
+  - id: layer_9_width_16k_l0_small_affine
+    path: transcoder_all/layer_9_width_16k_l0_small_affine
+    l0: 14
+  - id: layer_9_width_262k_l0_big
+    path: transcoder_all/layer_9_width_262k_l0_big
+    l0: 86
+  - id: layer_9_width_262k_l0_big_affine
+    path: transcoder_all/layer_9_width_262k_l0_big_affine
+    l0: 86
+  - id: layer_9_width_262k_l0_small
+    path: transcoder_all/layer_9_width_262k_l0_small
+    l0: 14
+  - id: layer_9_width_262k_l0_small_affine
+    path: transcoder_all/layer_9_width_262k_l0_small_affine
+    l0: 14
 gemma-scope-2-27b-it-transcoders:
   conversion_func: gemma_3
   model: google/gemma-3-27b-it

sae_lens/saes/__init__.py CHANGED Viewed

@@ -14,6 +14,12 @@ from .jumprelu_sae import (
     JumpReLUTrainingSAE,
     JumpReLUTrainingSAEConfig,
 )
+from .matching_pursuit_sae import (
+    MatchingPursuitSAE,
+    MatchingPursuitSAEConfig,
+    MatchingPursuitTrainingSAE,
+    MatchingPursuitTrainingSAEConfig,
+)
 from .matryoshka_batchtopk_sae import (
     MatryoshkaBatchTopKTrainingSAE,
     MatryoshkaBatchTopKTrainingSAEConfig,
@@ -78,4 +84,8 @@ __all__ = [
     "MatryoshkaBatchTopKTrainingSAEConfig",
     "TemporalSAE",
     "TemporalSAEConfig",
+    "MatchingPursuitSAE",
+    "MatchingPursuitTrainingSAE",
+    "MatchingPursuitSAEConfig",
+    "MatchingPursuitTrainingSAEConfig",
 ]

sae_lens/saes/matching_pursuit_sae.py ADDED Viewed

@@ -0,0 +1,334 @@
+"""Matching Pursuit SAE"""
+import warnings
+from dataclasses import dataclass
+from typing import Any
+import torch
+from typing_extensions import override
+from sae_lens.saes.sae import (
+    SAE,
+    SAEConfig,
+    TrainCoefficientConfig,
+    TrainingSAE,
+    TrainingSAEConfig,
+    TrainStepInput,
+    TrainStepOutput,
+)
+# --- inference ---
+@dataclass
+class MatchingPursuitSAEConfig(SAEConfig):
+    """
+    Configuration class for MatchingPursuitSAE inference.
+    Args:
+        residual_threshold (float): residual error at which to stop selecting latents. Default 1e-2.
+        max_iterations (int | None): Maximum iterations (default: d_in if set to None).
+            Defaults to None.
+        stop_on_duplicate_support (bool): Whether to stop selecting latents if the support set has not changed from the previous iteration. Defaults to True.
+        d_in (int): Input dimension (dimensionality of the activations being encoded).
+            Inherited from SAEConfig.
+        d_sae (int): SAE latent dimension (number of features in the SAE).
+            Inherited from SAEConfig.
+        dtype (str): Data type for the SAE parameters. Inherited from SAEConfig.
+            Defaults to "float32".
+        device (str): Device to place the SAE on. Inherited from SAEConfig.
+            Defaults to "cpu".
+        apply_b_dec_to_input (bool): Whether to apply decoder bias to the input
+            before encoding. Inherited from SAEConfig. Defaults to True.
+        normalize_activations (Literal["none", "expected_average_only_in", "constant_norm_rescale", "layer_norm"]):
+            Normalization strategy for input activations. Inherited from SAEConfig.
+            Defaults to "none".
+        reshape_activations (Literal["none", "hook_z"]): How to reshape activations
+            (useful for attention head outputs). Inherited from SAEConfig.
+            Defaults to "none".
+        metadata (SAEMetadata): Metadata about the SAE (model name, hook name, etc.).
+            Inherited from SAEConfig.
+    """
+    residual_threshold: float = 1e-2
+    max_iterations: int | None = None
+    stop_on_duplicate_support: bool = True
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "matching_pursuit"
+class MatchingPursuitSAE(SAE[MatchingPursuitSAEConfig]):
+    """
+    An inference-only sparse autoencoder using a "matching pursuit" activation function.
+    """
+    # Matching pursuit is a tied SAE, so we use W_enc as the decoder transposed
+    @property
+    def W_enc(self) -> torch.Tensor:  # pyright: ignore[reportIncompatibleVariableOverride]
+        return self.W_dec.T
+    # hacky way to get around the base class having W_enc.
+    # TODO: harmonize with the base class in next major release
+    @override
+    def __setattr__(self, name: str, value: Any):
+        if name == "W_enc":
+            return
+        super().__setattr__(name, value)
+    @override
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Converts input x into feature activations.
+        """
+        sae_in = self.process_sae_in(x)
+        return _encode_matching_pursuit(
+            sae_in,
+            self.W_dec,
+            self.cfg.residual_threshold,
+            max_iterations=self.cfg.max_iterations,
+            stop_on_duplicate_support=self.cfg.stop_on_duplicate_support,
+        )
+    @override
+    @torch.no_grad()
+    def fold_W_dec_norm(self) -> None:
+        raise NotImplementedError(
+            "Folding W_dec_norm is not safe for MatchingPursuit SAEs, as this may change the resulting activations"
+        )
+    @override
+    def decode(self, feature_acts: torch.Tensor) -> torch.Tensor:
+        """
+        Decode the feature activations back to the input space.
+        Now, if hook_z reshaping is turned on, we reverse the flattening.
+        """
+        sae_out_pre = feature_acts @ self.W_dec
+        # since this is a tied SAE, we need to make sure b_dec is only applied if applied at input
+        if self.cfg.apply_b_dec_to_input:
+            sae_out_pre = sae_out_pre + self.b_dec
+        sae_out_pre = self.hook_sae_recons(sae_out_pre)
+        sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
+        return self.reshape_fn_out(sae_out_pre, self.d_head)
+# --- training ---
+@dataclass
+class MatchingPursuitTrainingSAEConfig(TrainingSAEConfig):
+    """
+    Configuration class for training a MatchingPursuitTrainingSAE.
+    Args:
+        residual_threshold (float): residual error at which to stop selecting latents. Default 1e-2.
+        max_iterations (int | None): Maximum iterations (default: d_in if set to None).
+            Defaults to None.
+        stop_on_duplicate_support (bool): Whether to stop selecting latents if the support set has not changed from the previous iteration. Defaults to True.
+        decoder_init_norm (float | None): Norm to initialize decoder weights to.
+            0.1 corresponds to the "heuristic" initialization from Anthropic's April update.
+            Use None to disable. Inherited from TrainingSAEConfig. Defaults to 0.1.
+        d_in (int): Input dimension (dimensionality of the activations being encoded).
+            Inherited from SAEConfig.
+        d_sae (int): SAE latent dimension (number of features in the SAE).
+            Inherited from SAEConfig.
+        dtype (str): Data type for the SAE parameters. Inherited from SAEConfig.
+            Defaults to "float32".
+        device (str): Device to place the SAE on. Inherited from SAEConfig.
+            Defaults to "cpu".
+        apply_b_dec_to_input (bool): Whether to apply decoder bias to the input
+            before encoding. Inherited from SAEConfig. Defaults to True.
+        normalize_activations (Literal["none", "expected_average_only_in", "constant_norm_rescale", "layer_norm"]):
+            Normalization strategy for input activations. Inherited from SAEConfig.
+            Defaults to "none".
+        reshape_activations (Literal["none", "hook_z"]): How to reshape activations
+            (useful for attention head outputs). Inherited from SAEConfig.
+            Defaults to "none".
+        metadata (SAEMetadata): Metadata about the SAE training (model name, hook name, etc.).
+            Inherited from SAEConfig.
+    """
+    residual_threshold: float = 1e-2
+    max_iterations: int | None = None
+    stop_on_duplicate_support: bool = True
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "matching_pursuit"
+    @override
+    def __post_init__(self):
+        super().__post_init__()
+        if self.decoder_init_norm != 1.0:
+            self.decoder_init_norm = 1.0
+            warnings.warn(
+                "decoder_init_norm must be set to 1.0 for MatchingPursuitTrainingSAE, setting to 1.0"
+            )
+class MatchingPursuitTrainingSAE(TrainingSAE[MatchingPursuitTrainingSAEConfig]):
+    # Matching pursuit is a tied SAE, so we use W_enc as the decoder transposed
+    @property
+    def W_enc(self) -> torch.Tensor:  # pyright: ignore[reportIncompatibleVariableOverride]
+        return self.W_dec.T
+    # hacky way to get around the base class having W_enc.
+    # TODO: harmonize with the base class in next major release
+    @override
+    def __setattr__(self, name: str, value: Any):
+        if name == "W_enc":
+            return
+        super().__setattr__(name, value)
+    @override
+    def encode_with_hidden_pre(
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        hidden_pre doesn't make sense for matching pursuit, since there is not a single pre-activation.
+        We just return zeros for the hidden_pre.
+        """
+        sae_in = self.process_sae_in(x)
+        acts = _encode_matching_pursuit(
+            sae_in,
+            self.W_dec,
+            self.cfg.residual_threshold,
+            max_iterations=self.cfg.max_iterations,
+            stop_on_duplicate_support=self.cfg.stop_on_duplicate_support,
+        )
+        return acts, torch.zeros_like(acts)
+    @override
+    @torch.no_grad()
+    def fold_W_dec_norm(self) -> None:
+        raise NotImplementedError(
+            "Folding W_dec_norm is not safe for MatchingPursuit SAEs, as this may change the resulting activations"
+        )
+    @override
+    def get_coefficients(self) -> dict[str, float | TrainCoefficientConfig]:
+        return {}
+    @override
+    def calculate_aux_loss(
+        self,
+        step_input: TrainStepInput,
+        feature_acts: torch.Tensor,
+        hidden_pre: torch.Tensor,
+        sae_out: torch.Tensor,
+    ) -> dict[str, torch.Tensor]:
+        return {}
+    @override
+    def training_forward_pass(self, step_input: TrainStepInput) -> TrainStepOutput:
+        output = super().training_forward_pass(step_input)
+        l0 = output.feature_acts.bool().float().sum(-1).to_dense()
+        residual_norm = (step_input.sae_in - output.sae_out).norm(dim=-1)
+        output.metrics["max_l0"] = l0.max()
+        output.metrics["min_l0"] = l0.min()
+        output.metrics["residual_norm"] = residual_norm.mean()
+        output.metrics["residual_threshold_converged_portion"] = (
+            (residual_norm < self.cfg.residual_threshold).float().mean()
+        )
+        return output
+    @override
+    def decode(self, feature_acts: torch.Tensor) -> torch.Tensor:
+        """
+        Decode the feature activations back to the input space.
+        Now, if hook_z reshaping is turned on, we reverse the flattening.
+        """
+        sae_out_pre = feature_acts @ self.W_dec
+        # since this is a tied SAE, we need to make sure b_dec is only applied if applied at input
+        if self.cfg.apply_b_dec_to_input:
+            sae_out_pre = sae_out_pre + self.b_dec
+        sae_out_pre = self.hook_sae_recons(sae_out_pre)
+        sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
+        return self.reshape_fn_out(sae_out_pre, self.d_head)
+# --- shared ---
+def _encode_matching_pursuit(
+    sae_in_centered: torch.Tensor,
+    W_dec: torch.Tensor,
+    residual_threshold: float,
+    max_iterations: int | None,
+    stop_on_duplicate_support: bool,
+) -> torch.Tensor:
+    """
+    Matching pursuit encoding.
+    Args:
+        sae_in_centered: Input activations, centered by b_dec. Shape [..., d_in].
+        W_dec: Decoder weight matrix. Shape [d_sae, d_in].
+        residual_threshold: Stop when residual norm falls below this.
+        max_iterations: Maximum iterations (default: d_in). Prevents infinite loops.
+        stop_on_duplicate_support: Whether to stop selecting latents if the support set has not changed from the previous iteration.
+    """
+    residual = sae_in_centered.clone()
+    stop_on_residual_threshold = residual_threshold > 0
+    # Handle multi-dimensional inputs by flattening all but the last dimension
+    original_shape = residual.shape
+    if residual.ndim > 2:
+        residual = residual.reshape(-1, residual.shape[-1])
+    batch_size = residual.shape[0]
+    d_sae, d_in = W_dec.shape
+    if max_iterations is None:
+        max_iterations = d_in  # Sensible upper bound
+    acts = torch.zeros(batch_size, d_sae, device=W_dec.device, dtype=residual.dtype)
+    prev_support = torch.zeros(batch_size, d_sae, dtype=torch.bool, device=W_dec.device)
+    done = torch.zeros(batch_size, dtype=torch.bool, device=W_dec.device)
+    for _ in range(max_iterations):
+        # Find indices without gradients - the full [batch, d_sae] matmul result
+        # doesn't need to be saved for backward since max indices don't need gradients
+        with torch.no_grad():
+            indices = (residual @ W_dec.T).relu().max(dim=1, keepdim=True).indices
+            indices_flat = indices.squeeze(1)  # [batch_size]
+        # Compute values with gradients using only the selected decoder rows.
+        # This stores [batch, d_in] for backward instead of [batch, d_sae].
+        selected_dec = W_dec[indices_flat]  # [batch_size, d_in]
+        values = (residual * selected_dec).sum(dim=-1, keepdim=True).relu()
+        # Mask values for samples that are already done
+        active_mask = (~done).unsqueeze(1)
+        masked_values = (values * active_mask.to(values.dtype)).to(acts.dtype)
+        acts.scatter_add_(1, indices, masked_values)
+        # Update residual
+        residual = residual - masked_values * selected_dec
+        if stop_on_duplicate_support or stop_on_residual_threshold:
+            with torch.no_grad():
+                support = acts != 0
+                # A sample is considered converged if:
+                # (1) the support set hasn't changed from the previous iteration (stability), or
+                # (2) the residual norm is below a given threshold (good enough reconstruction)
+                if stop_on_duplicate_support:
+                    done = done | (support == prev_support).all(dim=1)
+                    prev_support = support
+                if stop_on_residual_threshold:
+                    done = done | (residual.norm(dim=-1) < residual_threshold)
+                if done.all():
+                    break
+    # Reshape acts back to original shape (replacing last dimension with d_sae)
+    if len(original_shape) > 2:
+        acts = acts.reshape(*original_shape[:-1], acts.shape[-1])
+    return acts

{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.25.1
+Version: 6.26.1
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE

{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-sae_lens/__init__.py,sha256=vWuA8EbynIJadj666RoFNCTIvoH9-HFpUxuHwoYt8Ks,4268
+sae_lens/__init__.py,sha256=zRp1nmb41W1Pt1rvlKvRWw73UxjGyz1iHAzH9_X6_WQ,4725
 sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sae_lens/analysis/hooked_sae_transformer.py,sha256=dQRgGVwce8XwylL2AzJE7l9elhtMRFCs2hdUj-Qyy4g,14038
 sae_lens/analysis/neuronpedia_integration.py,sha256=Gx1W7hUBEuMoasNcnOnZ1wmqbXDd1pSZ1nqKEya1HQc,4962
 sae_lens/cache_activations_runner.py,sha256=Lvlz-k5-3XxVRtUdC4b1CiKyx5s0ckLa8GDGv9_kcxs,12566
-sae_lens/config.py,sha256=JmcrXT4orJV2OulbEZAciz8RQmYv7DrtUtRbOLsNQ2Y,30330
+sae_lens/config.py,sha256=C982bUELhGHcfTwzeMTtXIf2hPtc946thYpUyctLiBo,30516
 sae_lens/constants.py,sha256=CM-h9AjZNAl2aP7hVpKk7YsFHpu-_Lfhhmq2d5qPEVc,887
 sae_lens/evals.py,sha256=P0NUsJeGzYxFBiVKhbPzd72IFKY4gH40HHlEZ3jEAmg,39598
 sae_lens/llm_sae_training_runner.py,sha256=M7BK55gSFYu2qFQKABHX3c8i46P1LfODCeyHFzGGuqU,15196
@@ -12,12 +12,13 @@ sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 sae_lens/loading/pretrained_sae_loaders.py,sha256=hq-dhxsEdUmlAnZEiZBqX7lNyQQwZ6KXmXZWpzAc5FY,63638
 sae_lens/loading/pretrained_saes_directory.py,sha256=hejNfLUepYCSGPalRfQwxxCEUqMMUPsn1tufwvwct5k,3820
 sae_lens/pretokenize_runner.py,sha256=amJwIz3CKi2s2wNQn-10E7eAV7VFhNqtFDNTeTkwEI8,7133
-sae_lens/pretrained_saes.yaml,sha256=Hy9mk4Liy50B0CIBD4ER1ETcho2drFFiIy-bPVCN_lc,1510210
+sae_lens/pretrained_saes.yaml,sha256=Hn8jXwZ7V6QQxzgu41LFEP-LAzuDxwYL5vhoar-pPX8,1509922
 sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
-sae_lens/saes/__init__.py,sha256=fYVujOzNnUgpzLL0MBLBt_DNX2CPcTaheukzCd2bEPo,1906
+sae_lens/saes/__init__.py,sha256=SBqPaP6Gl5uPFwHlumAZATC4Wd26xKIYLAAAo4MSa5Q,2200
 sae_lens/saes/batchtopk_sae.py,sha256=x4EbgZl0GUickRPcCmtKNGS2Ra3Uy1Z1OtF2FnrSabQ,5422
 sae_lens/saes/gated_sae.py,sha256=mHnmw-RD7hqIbP9_EBj3p2SK0OqQIkZivdOKRygeRgw,8825
 sae_lens/saes/jumprelu_sae.py,sha256=udjGHp3WTABQSL2Qq57j-bINWX61GCmo68EmdjMOXoo,13310
+sae_lens/saes/matching_pursuit_sae.py,sha256=08_G9p1YMLnE5qZVCPp6gll-iG6nHRbMMASf4_bkFt8,13207
 sae_lens/saes/matryoshka_batchtopk_sae.py,sha256=Qr6htt1HHOuO9FXI9hyaPSnGFIiJG-v7y1t1CEmkFzM,5995
 sae_lens/saes/sae.py,sha256=fzXv8lwHskSxsf8hm_wlKPkpq50iafmBjBNQzwZ6a00,40050
 sae_lens/saes/standard_sae.py,sha256=nEVETwAmRD2tyX7ESIic1fij48gAq1Dh7s_GQ2fqCZ4,5747
@@ -35,7 +36,7 @@ sae_lens/training/types.py,sha256=1FpLx_Doda9vZpmfm-x1e8wGBYpyhe9Kpb_JuM5nIFM,90
 sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
 sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
 sae_lens/util.py,sha256=spkcmQUsjVYFn5H2032nQYr1CKGVnv3tAdfIpY59-Mg,3919
-sae_lens-6.25.1.dist-info/METADATA,sha256=gClFVWzEWNNjrXsGqvCY6ry6ehXIFwp8PB0jIOhmQvc,5361
-sae_lens-6.25.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-sae_lens-6.25.1.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
-sae_lens-6.25.1.dist-info/RECORD,,
+sae_lens-6.26.1.dist-info/METADATA,sha256=yoE6CFgQ9L5SLzI3Zgr8H8CfUBgSimihGyEIvKd8TW8,5361
+sae_lens-6.26.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+sae_lens-6.26.1.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
+sae_lens-6.26.1.dist-info/RECORD,,

{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{sae_lens-6.25.1.dist-info → sae_lens-6.26.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sae-lens 6.25.1__py3-none-any.whl → 6.26.1__py3-none-any.whl

sae-lens 6.25.1py3-none-any.whl → 6.26.1py3-none-any.whl