PyPI - sae-lens - Versions diffs - 6.29.1__py3-none-any.whl → 6.33.0__py3-none-any.whl - Mend

sae-lens 6.29.1py3-none-any.whl → 6.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

sae_lens/__init__.py +14 -1
sae_lens/analysis/__init__.py +15 -0
sae_lens/analysis/compat.py +16 -0
sae_lens/analysis/hooked_sae_transformer.py +1 -1
sae_lens/analysis/sae_transformer_bridge.py +348 -0
sae_lens/config.py +9 -1
sae_lens/evals.py +2 -2
sae_lens/loading/pretrained_sae_loaders.py +11 -4
sae_lens/loading/pretrained_saes_directory.py +0 -22
sae_lens/pretrained_saes.yaml +36 -0
sae_lens/saes/sae.py +0 -31
sae_lens/saes/temporal_sae.py +1 -1
sae_lens/synthetic/__init__.py +13 -0
sae_lens/synthetic/correlation.py +12 -14
sae_lens/synthetic/stats.py +205 -0
sae_lens/training/activation_scaler.py +3 -1
{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/METADATA +2 -2
{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/RECORD +20 -17
{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/WHEEL +1 -1
{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.29.1"
+__version__ = "6.33.0"
 import logging
@@ -125,6 +125,19 @@ __all__ = [
     "MatchingPursuitTrainingSAEConfig",
 ]
+# Conditional export for SAETransformerBridge (requires transformer-lens v3+)
+try:
+    from sae_lens.analysis.compat import has_transformer_bridge
+    if has_transformer_bridge():
+        from sae_lens.analysis.sae_transformer_bridge import (  # noqa: F401
+            SAETransformerBridge,
+        )
+        __all__.append("SAETransformerBridge")
+except ImportError:
+    pass
 register_sae_class("standard", StandardSAE, StandardSAEConfig)
 register_sae_training_class("standard", StandardTrainingSAE, StandardTrainingSAEConfig)

sae_lens/analysis/__init__.py CHANGED Viewed

@@ -0,0 +1,15 @@
+from sae_lens.analysis.hooked_sae_transformer import HookedSAETransformer
+__all__ = ["HookedSAETransformer"]
+try:
+    from sae_lens.analysis.compat import has_transformer_bridge
+    if has_transformer_bridge():
+        from sae_lens.analysis.sae_transformer_bridge import (  # noqa: F401
+            SAETransformerBridge,
+        )
+        __all__.append("SAETransformerBridge")
+except ImportError:
+    pass

sae_lens/analysis/compat.py ADDED Viewed

@@ -0,0 +1,16 @@
+import importlib.metadata
+from packaging.version import parse as parse_version
+def get_transformer_lens_version() -> tuple[int, int, int]:
+    """Get transformer-lens version as (major, minor, patch)."""
+    version_str = importlib.metadata.version("transformer-lens")
+    version = parse_version(version_str)
+    return (version.major, version.minor, version.micro)
+def has_transformer_bridge() -> bool:
+    """Check if TransformerBridge is available (v3+)."""
+    major, _, _ = get_transformer_lens_version()
+    return major >= 3

sae_lens/analysis/hooked_sae_transformer.py CHANGED Viewed

@@ -126,7 +126,7 @@ class HookedSAETransformer(HookedTransformer):
             current_sae.use_error_term = current_sae._original_use_error_term  # type: ignore
             delattr(current_sae, "_original_use_error_term")
-        if prev_sae:
+        if prev_sae is not None:
             set_deep_attr(self, act_name, prev_sae)
             self.acts_to_saes[act_name] = prev_sae
         else:

sae_lens/analysis/sae_transformer_bridge.py ADDED Viewed

@@ -0,0 +1,348 @@
+from collections.abc import Callable
+from contextlib import contextmanager
+from typing import Any
+import torch
+from transformer_lens.ActivationCache import ActivationCache
+from transformer_lens.hook_points import HookPoint
+from transformer_lens.model_bridge import TransformerBridge
+from sae_lens import logger
+from sae_lens.analysis.hooked_sae_transformer import set_deep_attr
+from sae_lens.saes.sae import SAE
+SingleLoss = torch.Tensor  # Type alias for a single element tensor
+LossPerToken = torch.Tensor
+Loss = SingleLoss | LossPerToken
+class SAETransformerBridge(TransformerBridge):  # type: ignore[misc,no-untyped-call]
+    """A TransformerBridge subclass that supports attaching SAEs.
+    .. warning::
+        This class is in **beta**. The API may change in future versions.
+    This class provides the same SAE attachment functionality as HookedSAETransformer,
+    but for transformer-lens v3's TransformerBridge instead of HookedTransformer.
+    TransformerBridge is a lightweight wrapper around HuggingFace models that provides
+    hook points without the overhead of HookedTransformer's weight processing. This is
+    useful for models not natively supported by HookedTransformer, such as Gemma 3.
+    """
+    acts_to_saes: dict[str, SAE[Any]]
+    def __init__(self, *args: Any, **kwargs: Any):
+        super().__init__(*args, **kwargs)
+        self.acts_to_saes = {}
+    @classmethod
+    def boot_transformers(  # type: ignore[override]
+        cls,
+        model_name: str,
+        **kwargs: Any,
+    ) -> "SAETransformerBridge":
+        """Factory method to boot a model and return SAETransformerBridge instance.
+        Args:
+            model_name: The name of the model to load (e.g., "gpt2", "gemma-2-2b")
+            **kwargs: Additional arguments passed to TransformerBridge.boot_transformers
+        Returns:
+            SAETransformerBridge instance with the loaded model
+        """
+        # Boot parent TransformerBridge
+        bridge = TransformerBridge.boot_transformers(model_name, **kwargs)
+        # Convert to our class
+        # NOTE: this is super hacky and scary, but I don't know how else to achieve this given TLens' internal code
+        bridge.__class__ = cls
+        bridge.acts_to_saes = {}  # type: ignore[attr-defined]
+        return bridge  # type: ignore[return-value]
+    def _resolve_hook_name(self, hook_name: str) -> str:
+        """Resolve alias to actual hook name.
+        TransformerBridge supports hook aliases like 'blocks.0.hook_mlp_out'
+        that map to actual paths like 'blocks.0.mlp.hook_out'.
+        """
+        # Combine static and dynamic aliases
+        aliases: dict[str, Any] = {
+            **self.hook_aliases,
+            **self._collect_hook_aliases_from_registry(),
+        }
+        resolved = aliases.get(hook_name, hook_name)
+        # aliases values are always strings, but type checker doesn't know this
+        return resolved if isinstance(resolved, str) else hook_name
+    def add_sae(self, sae: SAE[Any], use_error_term: bool | None = None) -> None:
+        """Attaches an SAE to the model.
+        WARNING: This SAE will be permanently attached until you remove it with
+        reset_saes. This function will also overwrite any existing SAE attached
+        to the same hook point.
+        Args:
+            sae: The SAE to attach to the model
+            use_error_term: If provided, will set the use_error_term attribute of
+                the SAE to this value. Determines whether the SAE returns input
+                or reconstruction. Defaults to None.
+        """
+        alias_name = sae.cfg.metadata.hook_name
+        actual_name = self._resolve_hook_name(alias_name)
+        # Check if hook exists (either as alias or actual name)
+        if (alias_name not in self.acts_to_saes) and (
+            actual_name not in self._hook_registry
+        ):
+            logger.warning(
+                f"No hook found for {alias_name}. Skipping. "
+                f"Check model._hook_registry for available hooks."
+            )
+            return
+        if use_error_term is not None:
+            if not hasattr(sae, "_original_use_error_term"):
+                sae._original_use_error_term = sae.use_error_term  # type: ignore[attr-defined]
+            sae.use_error_term = use_error_term
+        # Replace hook and update registry
+        set_deep_attr(self, actual_name, sae)
+        self._hook_registry[actual_name] = sae  # type: ignore[assignment]
+        self.acts_to_saes[alias_name] = sae
+    def _reset_sae(self, act_name: str, prev_sae: SAE[Any] | None = None) -> None:
+        """Resets an SAE that was attached to the model.
+        By default will remove the SAE from that hook_point.
+        If prev_sae is provided, will replace the current SAE with the provided one.
+        This is mainly used to restore previously attached SAEs after temporarily
+        running with different SAEs (e.g., with run_with_saes).
+        Args:
+            act_name: The hook_name of the SAE to reset
+            prev_sae: The SAE to replace the current one with. If None, will just
+                remove the SAE from this hook point. Defaults to None.
+        """
+        if act_name not in self.acts_to_saes:
+            logger.warning(
+                f"No SAE is attached to {act_name}. There's nothing to reset."
+            )
+            return
+        actual_name = self._resolve_hook_name(act_name)
+        current_sae = self.acts_to_saes[act_name]
+        if hasattr(current_sae, "_original_use_error_term"):
+            current_sae.use_error_term = current_sae._original_use_error_term  # type: ignore[attr-defined]
+            delattr(current_sae, "_original_use_error_term")
+        if prev_sae is not None:
+            set_deep_attr(self, actual_name, prev_sae)
+            self._hook_registry[actual_name] = prev_sae  # type: ignore[assignment]
+            self.acts_to_saes[act_name] = prev_sae
+        else:
+            new_hook = HookPoint()
+            new_hook.name = actual_name
+            set_deep_attr(self, actual_name, new_hook)
+            self._hook_registry[actual_name] = new_hook
+            del self.acts_to_saes[act_name]
+    def reset_saes(
+        self,
+        act_names: str | list[str] | None = None,
+        prev_saes: list[SAE[Any] | None] | None = None,
+    ) -> None:
+        """Reset the SAEs attached to the model.
+        If act_names are provided will just reset SAEs attached to those hooks.
+        Otherwise will reset all SAEs attached to the model.
+        Optionally can provide a list of prev_saes to reset to. This is mainly
+        used to restore previously attached SAEs after temporarily running with
+        different SAEs (e.g., with run_with_saes).
+        Args:
+            act_names: The act_names of the SAEs to reset. If None, will reset all
+                SAEs attached to the model. Defaults to None.
+            prev_saes: List of SAEs to replace the current ones with. If None, will
+                just remove the SAEs. Defaults to None.
+        """
+        if isinstance(act_names, str):
+            act_names = [act_names]
+        elif act_names is None:
+            act_names = list(self.acts_to_saes.keys())
+        if prev_saes:
+            if len(act_names) != len(prev_saes):
+                raise ValueError("act_names and prev_saes must have the same length")
+        else:
+            prev_saes = [None] * len(act_names)  # type: ignore[assignment]
+        for act_name, prev_sae in zip(act_names, prev_saes):  # type: ignore[arg-type]
+            self._reset_sae(act_name, prev_sae)
+    def run_with_saes(
+        self,
+        *model_args: Any,
+        saes: SAE[Any] | list[SAE[Any]] = [],
+        reset_saes_end: bool = True,
+        use_error_term: bool | None = None,
+        **model_kwargs: Any,
+    ) -> torch.Tensor | Loss | tuple[torch.Tensor, Loss] | None:
+        """Wrapper around forward pass.
+        Runs the model with the given SAEs attached for one forward pass, then
+        removes them. By default, will reset all SAEs to original state after.
+        Args:
+            *model_args: Positional arguments for the model forward pass
+            saes: The SAEs to be attached for this forward pass
+            reset_saes_end: If True, all SAEs added during this run are removed
+                at the end, and previously attached SAEs are restored to their
+                original state. Default is True.
+            use_error_term: If provided, will set the use_error_term attribute
+                of all SAEs attached during this run to this value. Defaults to None.
+            **model_kwargs: Keyword arguments for the model forward pass
+        """
+        with self.saes(
+            saes=saes, reset_saes_end=reset_saes_end, use_error_term=use_error_term
+        ):
+            return self(*model_args, **model_kwargs)
+    def run_with_cache_with_saes(
+        self,
+        *model_args: Any,
+        saes: SAE[Any] | list[SAE[Any]] = [],
+        reset_saes_end: bool = True,
+        use_error_term: bool | None = None,
+        return_cache_object: bool = True,
+        remove_batch_dim: bool = False,
+        **kwargs: Any,
+    ) -> tuple[
+        torch.Tensor | Loss | tuple[torch.Tensor, Loss] | None,
+        ActivationCache | dict[str, torch.Tensor],
+    ]:
+        """Wrapper around 'run_with_cache'.
+        Attaches given SAEs before running the model with cache and then removes them.
+        By default, will reset all SAEs to original state after.
+        Args:
+            *model_args: Positional arguments for the model forward pass
+            saes: The SAEs to be attached for this forward pass
+            reset_saes_end: If True, all SAEs added during this run are removed
+                at the end, and previously attached SAEs are restored to their
+                original state. Default is True.
+            use_error_term: If provided, will set the use_error_term attribute
+                of all SAEs attached during this run to this value. Defaults to None.
+            return_cache_object: If True, returns an ActivationCache object with
+                useful methods, otherwise returns a dictionary of activations.
+            remove_batch_dim: Whether to remove the batch dimension
+                (only works for batch_size==1). Defaults to False.
+            **kwargs: Keyword arguments for the model forward pass
+        """
+        with self.saes(
+            saes=saes, reset_saes_end=reset_saes_end, use_error_term=use_error_term
+        ):
+            return self.run_with_cache(
+                *model_args,
+                return_cache_object=return_cache_object,  # type: ignore[arg-type]
+                remove_batch_dim=remove_batch_dim,
+                **kwargs,
+            )  # type: ignore[return-value]
+    def run_with_hooks_with_saes(
+        self,
+        *model_args: Any,
+        saes: SAE[Any] | list[SAE[Any]] = [],
+        reset_saes_end: bool = True,
+        fwd_hooks: list[tuple[str | Callable[..., Any], Callable[..., Any]]] = [],
+        bwd_hooks: list[tuple[str | Callable[..., Any], Callable[..., Any]]] = [],
+        reset_hooks_end: bool = True,
+        clear_contexts: bool = False,
+        **model_kwargs: Any,
+    ) -> Any:
+        """Wrapper around 'run_with_hooks'.
+        Attaches the given SAEs to the model before running the model with hooks
+        and then removes them. By default, will reset all SAEs to original state after.
+        Args:
+            *model_args: Positional arguments for the model forward pass
+            saes: The SAEs to be attached for this forward pass
+            reset_saes_end: If True, all SAEs added during this run are removed
+                at the end, and previously attached SAEs are restored to their
+                original state. Default is True.
+            fwd_hooks: List of forward hooks to apply
+            bwd_hooks: List of backward hooks to apply
+            reset_hooks_end: Whether to reset the hooks at the end of the forward
+                pass. Default is True.
+            clear_contexts: Whether to clear the contexts at the end of the forward
+                pass. Default is False.
+            **model_kwargs: Keyword arguments for the model forward pass
+        """
+        with self.saes(saes=saes, reset_saes_end=reset_saes_end):
+            return self.run_with_hooks(
+                *model_args,
+                fwd_hooks=fwd_hooks,
+                bwd_hooks=bwd_hooks,
+                reset_hooks_end=reset_hooks_end,
+                clear_contexts=clear_contexts,
+                **model_kwargs,
+            )
+    @contextmanager
+    def saes(
+        self,
+        saes: SAE[Any] | list[SAE[Any]] = [],
+        reset_saes_end: bool = True,
+        use_error_term: bool | None = None,
+    ):  # type: ignore[no-untyped-def]
+        """A context manager for adding temporary SAEs to the model.
+        By default will keep track of previously attached SAEs, and restore them
+        when the context manager exits.
+        Args:
+            saes: SAEs to be attached.
+            reset_saes_end: If True, removes all SAEs added by this context manager
+                when the context manager exits, returning previously attached SAEs
+                to their original state.
+            use_error_term: If provided, will set the use_error_term attribute of
+                all SAEs attached during this run to this value. Defaults to None.
+        """
+        act_names_to_reset: list[str] = []
+        prev_saes: list[SAE[Any] | None] = []
+        if isinstance(saes, SAE):
+            saes = [saes]
+        try:
+            for sae in saes:
+                act_names_to_reset.append(sae.cfg.metadata.hook_name)
+                prev_sae = self.acts_to_saes.get(sae.cfg.metadata.hook_name, None)
+                prev_saes.append(prev_sae)
+                self.add_sae(sae, use_error_term=use_error_term)
+            yield self
+        finally:
+            if reset_saes_end:
+                self.reset_saes(act_names_to_reset, prev_saes)
+    @property
+    def hook_dict(self) -> dict[str, HookPoint]:
+        """Return combined hook registry including SAE internal hooks.
+        When SAEs are attached, they replace HookPoint entries in the registry.
+        This property returns both the base hooks and any internal hooks from
+        attached SAEs (like hook_sae_acts_post, hook_sae_input, etc.) with
+        their full path names.
+        """
+        hooks: dict[str, HookPoint] = {}
+        for name, hook_or_sae in self._hook_registry.items():
+            if isinstance(hook_or_sae, SAE):
+                # Include SAE's internal hooks with full path names
+                for sae_hook_name, sae_hook in hook_or_sae.hook_dict.items():
+                    full_name = f"{name}.{sae_hook_name}"
+                    hooks[full_name] = sae_hook
+            else:
+                hooks[name] = hook_or_sae
+        return hooks

sae_lens/config.py CHANGED Viewed

@@ -82,6 +82,7 @@ class LoggingConfig:
     log_to_wandb: bool = True
     log_activations_store_to_wandb: bool = False
     log_optimizer_state_to_wandb: bool = False
+    log_weights_to_wandb: bool = True
     wandb_project: str = "sae_lens_training"
     wandb_id: str | None = None
     run_name: str | None = None
@@ -107,7 +108,8 @@ class LoggingConfig:
             type="model",
             metadata=dict(trainer.cfg.__dict__),
         )
-        model_artifact.add_file(str(weights_path))
+        if self.log_weights_to_wandb:
+            model_artifact.add_file(str(weights_path))
         model_artifact.add_file(str(cfg_path))
         wandb.log_artifact(model_artifact, aliases=wandb_aliases)
@@ -557,6 +559,12 @@ class CacheActivationsRunnerConfig:
                 context_size=self.context_size,
             )
+        if self.context_size > self.training_tokens:
+            raise ValueError(
+                f"context_size ({self.context_size}) is greater than training_tokens "
+                f"({self.training_tokens}). Please reduce context_size or increase training_tokens."
+            )
         if self.new_cached_activations_path is None:
             self.new_cached_activations_path = _default_cached_activations_path(  # type: ignore
                 self.dataset_path, self.model_name, self.hook_name, None

sae_lens/evals.py CHANGED Viewed

@@ -335,7 +335,7 @@ def get_downstream_reconstruction_metrics(
     batch_iter = range(n_batches)
     if verbose:
-        batch_iter = tqdm(batch_iter, desc="Reconstruction Batches")
+        batch_iter = tqdm(batch_iter, desc="Reconstruction Batches", leave=False)
     for _ in batch_iter:
         batch_tokens = activation_store.get_batch_tokens(eval_batch_size_prompts)
@@ -430,7 +430,7 @@ def get_sparsity_and_variance_metrics(
     batch_iter = range(n_batches)
     if verbose:
-        batch_iter = tqdm(batch_iter, desc="Sparsity and Variance Batches")
+        batch_iter = tqdm(batch_iter, desc="Sparsity and Variance Batches", leave=False)
     for _ in batch_iter:
         batch_tokens = activation_store.get_batch_tokens(eval_batch_size_prompts)

sae_lens/loading/pretrained_sae_loaders.py CHANGED Viewed

@@ -575,6 +575,8 @@ def _infer_gemma_3_raw_cfg_dict(repo_id: str, folder_name: str) -> dict[str, Any
         "model_name": model_name,
         "hf_hook_point_in": hf_hook_point_in,
     }
+    if "transcoder" in folder_name or "clt" in folder_name:
+        cfg["affine_connection"] = "affine" in folder_name
     if hf_hook_point_out is not None:
         cfg["hf_hook_point_out"] = hf_hook_point_out
@@ -614,11 +616,11 @@ def get_gemma_3_config_from_hf(
     if "resid_post" in folder_name:
         hook_name = f"blocks.{layer}.hook_resid_post"
     elif "attn_out" in folder_name:
-        hook_name = f"blocks.{layer}.hook_attn_out"
+        hook_name = f"blocks.{layer}.attn.hook_z"
     elif "mlp_out" in folder_name:
         hook_name = f"blocks.{layer}.hook_mlp_out"
     elif "transcoder" in folder_name or "clt" in folder_name:
-        hook_name = f"blocks.{layer}.ln2.hook_normalized"
+        hook_name = f"blocks.{layer}.hook_mlp_in"
         hook_name_out = f"blocks.{layer}.hook_mlp_out"
     else:
         raise ValueError("Hook name not found in folder_name.")
@@ -643,7 +645,11 @@ def get_gemma_3_config_from_hf(
     architecture = "jumprelu"
     if "transcoder" in folder_name or "clt" in folder_name:
-        architecture = "jumprelu_skip_transcoder"
+        architecture = (
+            "jumprelu_skip_transcoder"
+            if raw_cfg_dict.get("affine_connection", False)
+            else "jumprelu_transcoder"
+        )
         d_out = shapes_dict["w_dec"][-1]
     cfg = {
@@ -660,7 +666,8 @@ def get_gemma_3_config_from_hf(
         "dataset_path": "monology/pile-uncopyrighted",
         "context_size": 1024,
         "apply_b_dec_to_input": False,
-        "normalize_activations": None,
+        "normalize_activations": "none",
+        "reshape_activations": "none",
         "hf_hook_name": raw_cfg_dict.get("hf_hook_point_in"),
     }
     if hook_name_out is not None:

sae_lens/loading/pretrained_saes_directory.py CHANGED Viewed

@@ -57,28 +57,6 @@ def get_pretrained_saes_directory() -> dict[str, PretrainedSAELookup]:
     return directory
-def get_norm_scaling_factor(release: str, sae_id: str) -> float | None:
-    """
-    Retrieve the norm_scaling_factor for a specific SAE if it exists.
-    Args:
-        release (str): The release name of the SAE.
-        sae_id (str): The ID of the specific SAE.
-    Returns:
-        float | None: The norm_scaling_factor if it exists, None otherwise.
-    """
-    package = "sae_lens"
-    yaml_file = files(package).joinpath("pretrained_saes.yaml")
-    with yaml_file.open("r") as file:
-        data = yaml.safe_load(file)
-        if release in data:
-            for sae_info in data[release]["saes"]:
-                if sae_info["id"] == sae_id:
-                    return sae_info.get("norm_scaling_factor")
-    return None
 def get_repo_id_and_folder_name(release: str, sae_id: str) -> tuple[str, str]:
     saes_directory = get_pretrained_saes_directory()
     sae_info = saes_directory.get(release, None)

sae_lens/pretrained_saes.yaml CHANGED Viewed

@@ -4148,6 +4148,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_16k_l0_medium
     path: resid_post/layer_17_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-16k
   - id: layer_17_width_16k_l0_small
     path: resid_post/layer_17_width_16k_l0_small
     l0: 20
@@ -4166,6 +4167,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_262k_l0_medium
     path: resid_post/layer_17_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-262k
   - id: layer_17_width_262k_l0_medium_seed_1
     path: resid_post/layer_17_width_262k_l0_medium_seed_1
     l0: 60
@@ -4178,6 +4180,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_65k_l0_medium
     path: resid_post/layer_17_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-65k
   - id: layer_17_width_65k_l0_small
     path: resid_post/layer_17_width_65k_l0_small
     l0: 20
@@ -4187,6 +4190,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_16k_l0_medium
     path: resid_post/layer_22_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-16k
   - id: layer_22_width_16k_l0_small
     path: resid_post/layer_22_width_16k_l0_small
     l0: 20
@@ -4205,6 +4209,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_262k_l0_medium
     path: resid_post/layer_22_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-262k
   - id: layer_22_width_262k_l0_medium_seed_1
     path: resid_post/layer_22_width_262k_l0_medium_seed_1
     l0: 60
@@ -4217,6 +4222,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_65k_l0_medium
     path: resid_post/layer_22_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-65k
   - id: layer_22_width_65k_l0_small
     path: resid_post/layer_22_width_65k_l0_small
     l0: 20
@@ -4226,6 +4232,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_16k_l0_medium
     path: resid_post/layer_29_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-16k
   - id: layer_29_width_16k_l0_small
     path: resid_post/layer_29_width_16k_l0_small
     l0: 20
@@ -4244,6 +4251,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_262k_l0_medium
     path: resid_post/layer_29_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-262k
   - id: layer_29_width_262k_l0_medium_seed_1
     path: resid_post/layer_29_width_262k_l0_medium_seed_1
     l0: 60
@@ -4256,6 +4264,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_65k_l0_medium
     path: resid_post/layer_29_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-65k
   - id: layer_29_width_65k_l0_small
     path: resid_post/layer_29_width_65k_l0_small
     l0: 20
@@ -4265,6 +4274,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_16k_l0_medium
     path: resid_post/layer_9_width_16k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-16k
   - id: layer_9_width_16k_l0_small
     path: resid_post/layer_9_width_16k_l0_small
     l0: 17
@@ -4283,6 +4293,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_262k_l0_medium
     path: resid_post/layer_9_width_262k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-262k
   - id: layer_9_width_262k_l0_medium_seed_1
     path: resid_post/layer_9_width_262k_l0_medium_seed_1
     l0: 53
@@ -4295,6 +4306,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_65k_l0_medium
     path: resid_post/layer_9_width_65k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-65k
   - id: layer_9_width_65k_l0_small
     path: resid_post/layer_9_width_65k_l0_small
     l0: 17
@@ -14491,6 +14503,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_16k_l0_medium
     path: resid_post/layer_12_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-16k
   - id: layer_12_width_16k_l0_small
     path: resid_post/layer_12_width_16k_l0_small
     l0: 20
@@ -14509,6 +14522,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_262k_l0_medium
     path: resid_post/layer_12_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-262k
   - id: layer_12_width_262k_l0_medium_seed_1
     path: resid_post/layer_12_width_262k_l0_medium_seed_1
     l0: 60
@@ -14521,6 +14535,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_65k_l0_medium
     path: resid_post/layer_12_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-65k
   - id: layer_12_width_65k_l0_small
     path: resid_post/layer_12_width_65k_l0_small
     l0: 20
@@ -14530,6 +14545,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_16k_l0_medium
     path: resid_post/layer_15_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-16k
   - id: layer_15_width_16k_l0_small
     path: resid_post/layer_15_width_16k_l0_small
     l0: 20
@@ -14548,6 +14564,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_262k_l0_medium
     path: resid_post/layer_15_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-262k
   - id: layer_15_width_262k_l0_medium_seed_1
     path: resid_post/layer_15_width_262k_l0_medium_seed_1
     l0: 60
@@ -14560,6 +14577,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_65k_l0_medium
     path: resid_post/layer_15_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-65k
   - id: layer_15_width_65k_l0_small
     path: resid_post/layer_15_width_65k_l0_small
     l0: 20
@@ -14569,6 +14587,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_16k_l0_medium
     path: resid_post/layer_5_width_16k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-16k
   - id: layer_5_width_16k_l0_small
     path: resid_post/layer_5_width_16k_l0_small
     l0: 18
@@ -14587,6 +14606,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_262k_l0_medium
     path: resid_post/layer_5_width_262k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-262k
   - id: layer_5_width_262k_l0_medium_seed_1
     path: resid_post/layer_5_width_262k_l0_medium_seed_1
     l0: 55
@@ -14599,6 +14619,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_65k_l0_medium
     path: resid_post/layer_5_width_65k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-65k
   - id: layer_5_width_65k_l0_small
     path: resid_post/layer_5_width_65k_l0_small
     l0: 18
@@ -14608,6 +14629,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_16k_l0_medium
     path: resid_post/layer_9_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-16k
   - id: layer_9_width_16k_l0_small
     path: resid_post/layer_9_width_16k_l0_small
     l0: 20
@@ -14626,6 +14648,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_262k_l0_medium
     path: resid_post/layer_9_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-262k
   - id: layer_9_width_262k_l0_medium_seed_1
     path: resid_post/layer_9_width_262k_l0_medium_seed_1
     l0: 60
@@ -14638,6 +14661,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_65k_l0_medium
     path: resid_post/layer_9_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-65k
   - id: layer_9_width_65k_l0_small
     path: resid_post/layer_9_width_65k_l0_small
     l0: 20
@@ -18727,6 +18751,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_16k_l0_medium
     path: resid_post/layer_13_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-16k
   - id: layer_13_width_16k_l0_small
     path: resid_post/layer_13_width_16k_l0_small
     l0: 20
@@ -18745,6 +18770,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_262k_l0_medium
     path: resid_post/layer_13_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-262k
   - id: layer_13_width_262k_l0_medium_seed_1
     path: resid_post/layer_13_width_262k_l0_medium_seed_1
     l0: 60
@@ -18757,6 +18783,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_65k_l0_medium
     path: resid_post/layer_13_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-65k
   - id: layer_13_width_65k_l0_small
     path: resid_post/layer_13_width_65k_l0_small
     l0: 20
@@ -18766,6 +18793,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_16k_l0_medium
     path: resid_post/layer_17_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-16k
   - id: layer_17_width_16k_l0_small
     path: resid_post/layer_17_width_16k_l0_small
     l0: 20
@@ -18784,6 +18812,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_262k_l0_medium
     path: resid_post/layer_17_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-262k
   - id: layer_17_width_262k_l0_medium_seed_1
     path: resid_post/layer_17_width_262k_l0_medium_seed_1
     l0: 60
@@ -18796,6 +18825,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_65k_l0_medium
     path: resid_post/layer_17_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-65k
   - id: layer_17_width_65k_l0_small
     path: resid_post/layer_17_width_65k_l0_small
     l0: 20
@@ -18805,6 +18835,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_16k_l0_medium
     path: resid_post/layer_22_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-16k
   - id: layer_22_width_16k_l0_small
     path: resid_post/layer_22_width_16k_l0_small
     l0: 20
@@ -18823,6 +18854,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_262k_l0_medium
     path: resid_post/layer_22_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-262k
   - id: layer_22_width_262k_l0_medium_seed_1
     path: resid_post/layer_22_width_262k_l0_medium_seed_1
     l0: 60
@@ -18835,6 +18867,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_65k_l0_medium
     path: resid_post/layer_22_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-65k
   - id: layer_22_width_65k_l0_small
     path: resid_post/layer_22_width_65k_l0_small
     l0: 20
@@ -18844,6 +18877,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_16k_l0_medium
     path: resid_post/layer_7_width_16k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-16k
   - id: layer_7_width_16k_l0_small
     path: resid_post/layer_7_width_16k_l0_small
     l0: 18
@@ -18862,6 +18896,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_262k_l0_medium
     path: resid_post/layer_7_width_262k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-262k
   - id: layer_7_width_262k_l0_medium_seed_1
     path: resid_post/layer_7_width_262k_l0_medium_seed_1
     l0: 54
@@ -18874,6 +18909,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_65k_l0_medium
     path: resid_post/layer_7_width_65k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-65k
   - id: layer_7_width_65k_l0_small
     path: resid_post/layer_7_width_65k_l0_small
     l0: 18

sae_lens/saes/sae.py CHANGED Viewed

@@ -45,7 +45,6 @@ from sae_lens.loading.pretrained_sae_loaders import (
 )
 from sae_lens.loading.pretrained_saes_directory import (
     get_config_overrides,
-    get_norm_scaling_factor,
     get_pretrained_saes_directory,
     get_releases_for_repo_id,
     get_repo_id_and_folder_name,
@@ -638,24 +637,6 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
                     stacklevel=2,
                 )
         elif sae_id not in sae_directory[release].saes_map:
-            # Handle special cases like Gemma Scope
-            if (
-                "gemma-scope" in release
-                and "canonical" not in release
-                and f"{release}-canonical" in sae_directory
-            ):
-                canonical_ids = list(
-                    sae_directory[release + "-canonical"].saes_map.keys()
-                )
-                # Shorten the lengthy string of valid IDs
-                if len(canonical_ids) > 5:
-                    str_canonical_ids = str(canonical_ids[:5])[:-1] + ", ...]"
-                else:
-                    str_canonical_ids = str(canonical_ids)
-                value_suffix = f" If you don't want to specify an L0 value, consider using release {release}-canonical which has valid IDs {str_canonical_ids}"
-            else:
-                value_suffix = ""
             valid_ids = list(sae_directory[release].saes_map.keys())
             # Shorten the lengthy string of valid IDs
             if len(valid_ids) > 5:
@@ -665,7 +646,6 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
             raise ValueError(
                 f"ID {sae_id} not found in release {release}. Valid IDs are {str_valid_ids}."
-                + value_suffix
             )
         conversion_loader = (
@@ -702,17 +682,6 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
         sae.process_state_dict_for_loading(state_dict)
         sae.load_state_dict(state_dict, assign=True)
-        # Apply normalization if needed
-        if cfg_dict.get("normalize_activations") == "expected_average_only_in":
-            norm_scaling_factor = get_norm_scaling_factor(release, sae_id)
-            if norm_scaling_factor is not None:
-                sae.fold_activation_norm_scaling_factor(norm_scaling_factor)
-                cfg_dict["normalize_activations"] = "none"
-            else:
-                warnings.warn(
-                    f"norm_scaling_factor not found for {release} and {sae_id}, but normalize_activations is 'expected_average_only_in'. Skipping normalization folding."
-                )
         # the loaders should already handle the dtype / device conversion
         # but this is a fallback to guarantee the SAE is on the correct device and dtype
         return (

sae_lens/saes/temporal_sae.py CHANGED Viewed

@@ -4,7 +4,7 @@ TemporalSAE decomposes activations into:
 1. Predicted codes (from attention over context)
 2. Novel codes (sparse features of the residual)
-See: https://arxiv.org/abs/2410.04185
+See: https://arxiv.org/pdf/2511.01836
 """
 import math

sae_lens/synthetic/__init__.py CHANGED Viewed

@@ -50,6 +50,13 @@ from sae_lens.synthetic.plotting import (
     find_best_feature_ordering_from_sae,
     plot_sae_feature_similarity,
 )
+from sae_lens.synthetic.stats import (
+    CorrelationMatrixStats,
+    SuperpositionStats,
+    compute_correlation_matrix_stats,
+    compute_low_rank_correlation_matrix_stats,
+    compute_superposition_stats,
+)
 from sae_lens.synthetic.training import (
     SyntheticActivationIterator,
     train_toy_sae,
@@ -80,6 +87,12 @@ __all__ = [
     "orthogonal_initializer",
     "FeatureDictionaryInitializer",
     "cosine_similarities",
+    # Statistics
+    "compute_correlation_matrix_stats",
+    "compute_low_rank_correlation_matrix_stats",
+    "compute_superposition_stats",
+    "CorrelationMatrixStats",
+    "SuperpositionStats",
     # Training utilities
     "SyntheticActivationIterator",
     "SyntheticDataEvalResult",

sae_lens/synthetic/correlation.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import NamedTuple
 import torch
+from sae_lens import logger
 from sae_lens.util import str_to_dtype
@@ -268,7 +269,7 @@ def generate_random_correlation_matrix(
 def generate_random_low_rank_correlation_matrix(
     num_features: int,
     rank: int,
-    correlation_scale: float = 0.1,
+    correlation_scale: float = 0.075,
     seed: int | None = None,
     device: torch.device | str = "cpu",
     dtype: torch.dtype | str = torch.float32,
@@ -331,20 +332,17 @@ def generate_random_low_rank_correlation_matrix(
     factor_sq_sum = (factor**2).sum(dim=1)
     diag_term = 1 - factor_sq_sum
-    # Ensure diagonal terms are at least _MIN_DIAG for numerical stability
-    # If any diagonal term is too small, scale down the factor matrix
-    if torch.any(diag_term < _MIN_DIAG):
-        # Scale factor so max row norm squared is at most (1 - _MIN_DIAG)
-        # This ensures all diagonal terms are >= _MIN_DIAG
-        max_factor_contribution = 1 - _MIN_DIAG
-        max_sq_sum = factor_sq_sum.max()
-        scale = torch.sqrt(
-            torch.tensor(max_factor_contribution, device=device, dtype=dtype)
-            / max_sq_sum
+    # alternatively, we can rescale each row independently to ensure the diagonal is 1
+    mask = diag_term < _MIN_DIAG
+    factor[mask, :] *= torch.sqrt((1 - _MIN_DIAG) / factor_sq_sum[mask].unsqueeze(1))
+    factor_sq_sum = (factor**2).sum(dim=1)
+    diag_term = 1 - factor_sq_sum
+    total_rescaled = mask.sum().item()
+    if total_rescaled > 0:
+        logger.warning(
+            f"{total_rescaled} / {num_features} rows were capped. Either reduce the rank or reduce the correlation_scale to avoid rescaling."
         )
-        factor = factor * scale
-        factor_sq_sum = (factor**2).sum(dim=1)
-        diag_term = 1 - factor_sq_sum
     return LowRankCorrelationMatrix(
         correlation_factor=factor, correlation_diag=diag_term

sae_lens/synthetic/stats.py ADDED Viewed

@@ -0,0 +1,205 @@
+from dataclasses import dataclass
+import torch
+from sae_lens.synthetic.correlation import LowRankCorrelationMatrix
+from sae_lens.synthetic.feature_dictionary import FeatureDictionary
+@dataclass
+class CorrelationMatrixStats:
+    """Statistics computed from a correlation matrix."""
+    rms_correlation: float  # Root mean square of off-diagonal correlations
+    mean_correlation: float  # Mean of off-diagonal correlations (not absolute)
+    num_features: int
+@torch.no_grad()
+def compute_correlation_matrix_stats(
+    correlation_matrix: torch.Tensor,
+) -> CorrelationMatrixStats:
+    """Compute correlation statistics from a dense correlation matrix.
+    Args:
+        correlation_matrix: Dense correlation matrix of shape (n, n)
+    Returns:
+        CorrelationMatrixStats with correlation statistics
+    """
+    num_features = correlation_matrix.shape[0]
+    # Extract off-diagonal elements
+    mask = ~torch.eye(num_features, dtype=torch.bool, device=correlation_matrix.device)
+    off_diag = correlation_matrix[mask]
+    rms_correlation = (off_diag**2).mean().sqrt().item()
+    mean_correlation = off_diag.mean().item()
+    return CorrelationMatrixStats(
+        rms_correlation=rms_correlation,
+        mean_correlation=mean_correlation,
+        num_features=num_features,
+    )
+@torch.no_grad()
+def compute_low_rank_correlation_matrix_stats(
+    correlation_matrix: LowRankCorrelationMatrix,
+) -> CorrelationMatrixStats:
+    """Compute correlation statistics from a LowRankCorrelationMatrix.
+    The correlation matrix is represented as:
+        correlation = factor @ factor.T + diag(diag_term)
+    The off-diagonal elements are simply factor @ factor.T (the diagonal term
+    only affects the diagonal).
+    All statistics are computed efficiently in O(n*r²) time and O(r²) memory
+    without materializing the full n×n correlation matrix.
+    Args:
+        correlation_matrix: Low-rank correlation matrix
+    Returns:
+        CorrelationMatrixStats with correlation statistics
+    """
+    factor = correlation_matrix.correlation_factor
+    num_features = factor.shape[0]
+    num_off_diag = num_features * (num_features - 1)
+    # RMS correlation: uses ||F @ F.T||_F² = ||F.T @ F||_F²
+    # This avoids computing the (num_features, num_features) matrix
+    G = factor.T @ factor  # (rank, rank) - small!
+    frobenius_sq = (G**2).sum()
+    row_norms_sq = (factor**2).sum(dim=1)  # ||F[i]||² for each row
+    diag_sq_sum = (row_norms_sq**2).sum()  # Σᵢ ||F[i]||⁴
+    off_diag_sq_sum = frobenius_sq - diag_sq_sum
+    rms_correlation = (off_diag_sq_sum / num_off_diag).sqrt().item()
+    # Mean correlation (not absolute): sum(C) = ||col_sums(F)||², trace(C) = Σ||F[i]||²
+    col_sums = factor.sum(dim=0)  # (rank,)
+    sum_all = (col_sums**2).sum()  # 1ᵀ C 1
+    trace_C = row_norms_sq.sum()
+    mean_correlation = ((sum_all - trace_C) / num_off_diag).item()
+    return CorrelationMatrixStats(
+        rms_correlation=rms_correlation,
+        mean_correlation=mean_correlation,
+        num_features=num_features,
+    )
+@dataclass
+class SuperpositionStats:
+    """Statistics measuring superposition in a feature dictionary."""
+    # Per-latent statistics: for each latent, max and percentile of |cos_sim| with others
+    max_abs_cos_sims: torch.Tensor  # Shape: (num_features,)
+    percentile_abs_cos_sims: dict[int, torch.Tensor]  # {percentile: (num_features,)}
+    # Summary statistics (means of the per-latent values)
+    mean_max_abs_cos_sim: float
+    mean_percentile_abs_cos_sim: dict[int, float]
+    mean_abs_cos_sim: float  # Mean |cos_sim| across all pairs
+    # Metadata
+    num_features: int
+    hidden_dim: int
+@torch.no_grad()
+def compute_superposition_stats(
+    feature_dictionary: FeatureDictionary,
+    batch_size: int = 1024,
+    device: str | torch.device | None = None,
+    percentiles: list[int] | None = None,
+) -> SuperpositionStats:
+    """Compute superposition statistics for a feature dictionary.
+    Computes pairwise cosine similarities in batches to handle large dictionaries.
+    For each latent i, computes:
+    - max |cos_sim(i, j)| over all j != i
+    - kth percentile of |cos_sim(i, j)| over all j != i (for each k in percentiles)
+    Args:
+        feature_dictionary: FeatureDictionary containing the feature vectors
+        batch_size: Number of features to process per batch
+        device: Device for computation (defaults to feature dictionary's device)
+        percentiles: List of percentiles to compute per latent (default: [95, 99])
+    Returns:
+        SuperpositionStats with superposition metrics
+    """
+    if percentiles is None:
+        percentiles = [95, 99]
+    feature_vectors = feature_dictionary.feature_vectors
+    num_features, hidden_dim = feature_vectors.shape
+    if num_features < 2:
+        raise ValueError("Need at least 2 features to compute superposition stats")
+    if device is None:
+        device = feature_vectors.device
+    # Normalize features to unit norm for cosine similarity
+    features_normalized = feature_vectors.to(device).float()
+    norms = torch.linalg.norm(features_normalized, dim=1, keepdim=True)
+    features_normalized = features_normalized / norms.clamp(min=1e-8)
+    # Track per-latent statistics
+    max_abs_cos_sims = torch.zeros(num_features, device=device)
+    percentile_abs_cos_sims = {
+        p: torch.zeros(num_features, device=device) for p in percentiles
+    }
+    sum_abs_cos_sim = 0.0
+    n_pairs = 0
+    # Process in batches: for each batch of features, compute similarities with all others
+    for i in range(0, num_features, batch_size):
+        batch_end = min(i + batch_size, num_features)
+        batch = features_normalized[i:batch_end]  # (batch_size, hidden_dim)
+        # Compute cosine similarities with all features: (batch_size, num_features)
+        cos_sims = batch @ features_normalized.T
+        # Absolute cosine similarities
+        abs_cos_sims = cos_sims.abs()
+        # Process each latent in the batch
+        for j, idx in enumerate(range(i, batch_end)):
+            # Get similarities with all other features (exclude self)
+            row = abs_cos_sims[j].clone()
+            row[idx] = 0.0  # Exclude self for max
+            max_abs_cos_sims[idx] = row.max()
+            # For percentiles, exclude self and compute
+            other_sims = torch.cat([abs_cos_sims[j, :idx], abs_cos_sims[j, idx + 1 :]])
+            for p in percentiles:
+                percentile_abs_cos_sims[p][idx] = torch.quantile(other_sims, p / 100.0)
+            # Sum for mean computation (only count pairs once - with features after this one)
+            sum_abs_cos_sim += abs_cos_sims[j, idx + 1 :].sum().item()
+            n_pairs += num_features - idx - 1
+    # Compute summary statistics
+    mean_max_abs_cos_sim = max_abs_cos_sims.mean().item()
+    mean_percentile_abs_cos_sim = {
+        p: percentile_abs_cos_sims[p].mean().item() for p in percentiles
+    }
+    mean_abs_cos_sim = sum_abs_cos_sim / n_pairs if n_pairs > 0 else 0.0
+    return SuperpositionStats(
+        max_abs_cos_sims=max_abs_cos_sims.cpu(),
+        percentile_abs_cos_sims={
+            p: v.cpu() for p, v in percentile_abs_cos_sims.items()
+        },
+        mean_max_abs_cos_sim=mean_max_abs_cos_sim,
+        mean_percentile_abs_cos_sim=mean_percentile_abs_cos_sim,
+        mean_abs_cos_sim=mean_abs_cos_sim,
+        num_features=num_features,
+        hidden_dim=hidden_dim,
+    )

sae_lens/training/activation_scaler.py CHANGED Viewed

@@ -28,7 +28,9 @@ class ActivationScaler:
     ) -> float:
         norms_per_batch: list[float] = []
         for _ in tqdm(
-            range(n_batches_for_norm_estimate), desc="Estimating norm scaling factor"
+            range(n_batches_for_norm_estimate),
+            desc="Estimating norm scaling factor",
+            leave=False,
         ):
             acts = next(data_provider)
             norms_per_batch.append(acts.norm(dim=-1).mean().item())

{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.29.1
+Version: 6.33.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE
@@ -27,7 +27,7 @@ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
 Requires-Dist: safetensors (>=0.4.2,<1.0.0)
 Requires-Dist: simple-parsing (>=0.1.6,<0.2.0)
 Requires-Dist: tenacity (>=9.0.0)
-Requires-Dist: transformer-lens (>=2.16.1,<3.0.0)
+Requires-Dist: transformer-lens (>=2.16.1)
 Requires-Dist: transformers (>=4.38.1,<5.0.0)
 Requires-Dist: typing-extensions (>=4.10.0,<5.0.0)
 Project-URL: Homepage, https://decoderesearch.github.io/SAELens

{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,20 @@
-sae_lens/__init__.py,sha256=emqKVNiJwD8YtYhtgHJyAT8YSX1QmruQYuG-J4CStC4,4788
-sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/analysis/hooked_sae_transformer.py,sha256=dQRgGVwce8XwylL2AzJE7l9elhtMRFCs2hdUj-Qyy4g,14038
+sae_lens/__init__.py,sha256=gHaxlySzLskrAUg2oUZ3aOpnI3U_AVIHce-agGJL9rI,5168
+sae_lens/analysis/__init__.py,sha256=FZExlMviNwWR7OGUSGRbd0l-yUDGSp80gglI_ivILrY,412
+sae_lens/analysis/compat.py,sha256=cgE3nhFcJTcuhppxbL71VanJS7YqVEOefuneB5eOaPw,538
+sae_lens/analysis/hooked_sae_transformer.py,sha256=LpnjxSAcItqqXA4SJyZuxY4Ki0UOuWV683wg9laYAsY,14050
 sae_lens/analysis/neuronpedia_integration.py,sha256=Gx1W7hUBEuMoasNcnOnZ1wmqbXDd1pSZ1nqKEya1HQc,4962
+sae_lens/analysis/sae_transformer_bridge.py,sha256=xpJRRcB0g47EOQcmNCwMyrJJsbqMsGxVViDrV6C3upU,14916
 sae_lens/cache_activations_runner.py,sha256=TjqNWIc46Nw09jHWFjzQzgzG5wdu_87Ahe-iFjI5_0Q,13117
-sae_lens/config.py,sha256=sseYcRMsAyopj8FICup1RGTXjFxzAithZ2OH7OpQV3Y,30839
+sae_lens/config.py,sha256=V0BXV8rvpbm5YuVukow9FURPpdyE4HSflbdymAo0Ycg,31205
 sae_lens/constants.py,sha256=CM-h9AjZNAl2aP7hVpKk7YsFHpu-_Lfhhmq2d5qPEVc,887
-sae_lens/evals.py,sha256=P0NUsJeGzYxFBiVKhbPzd72IFKY4gH40HHlEZ3jEAmg,39598
+sae_lens/evals.py,sha256=nEZpUfEUN-plw6Mj9GEqm-cU_tb1qrIF9km9ktQ0vVU,39624
 sae_lens/llm_sae_training_runner.py,sha256=M7BK55gSFYu2qFQKABHX3c8i46P1LfODCeyHFzGGuqU,15196
 sae_lens/load_model.py,sha256=C8AMykctj6H7tz_xRwB06-EXj6TfW64PtSJZR5Jxn1Y,8649
 sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/loading/pretrained_sae_loaders.py,sha256=hHMlew1u6zVlbzvS9S_SfUPnAG0_OAjjIcjoUTIUZrU,63657
-sae_lens/loading/pretrained_saes_directory.py,sha256=1at_aQbD8WFywchQCKuwfP-yvCq_Z2aUYrpKDnSN5Nc,4283
+sae_lens/loading/pretrained_sae_loaders.py,sha256=kshvA0NivOc7B3sL19lHr_zrC_DDfW2T6YWb5j0hgAk,63930
+sae_lens/loading/pretrained_saes_directory.py,sha256=lSnHl77IO5dd7iO21ynCzZNMrzuJAT8Za4W5THNq0qw,3554
 sae_lens/pretokenize_runner.py,sha256=amJwIz3CKi2s2wNQn-10E7eAV7VFhNqtFDNTeTkwEI8,7133
-sae_lens/pretrained_saes.yaml,sha256=Nq43dTcFvDDONTuJ9Me_HQ5nHqr9BdbP5-ZJGXj0TAQ,1509932
+sae_lens/pretrained_saes.yaml,sha256=IVBLLR8_XNllJ1O-kVv9ED4u0u44Yn8UOL9R-f8Idp4,1511936
 sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
 sae_lens/saes/__init__.py,sha256=SBqPaP6Gl5uPFwHlumAZATC4Wd26xKIYLAAAo4MSa5Q,2200
 sae_lens/saes/batchtopk_sae.py,sha256=x4EbgZl0GUickRPcCmtKNGS2Ra3Uy1Z1OtF2FnrSabQ,5422
@@ -20,24 +22,25 @@ sae_lens/saes/gated_sae.py,sha256=V_2ZNlV4gRD-rX5JSx1xqY7idT8ChfdQ5yxWDdu_6hg,88
 sae_lens/saes/jumprelu_sae.py,sha256=miiF-xI_yXdV9EkKjwAbU9zSMsx9KtKCz5YdXEzkN8g,13313
 sae_lens/saes/matching_pursuit_sae.py,sha256=08_G9p1YMLnE5qZVCPp6gll-iG6nHRbMMASf4_bkFt8,13207
 sae_lens/saes/matryoshka_batchtopk_sae.py,sha256=Qr6htt1HHOuO9FXI9hyaPSnGFIiJG-v7y1t1CEmkFzM,5995
-sae_lens/saes/sae.py,sha256=xRmgiLuaFlDCv8SyLbL-5TwdrWHpNLqSGe8mC1L6WcI,40942
+sae_lens/saes/sae.py,sha256=wkwqzNragj-1189cV52S3_XeRtEgBd2ZNwvL2EsKkWw,39429
 sae_lens/saes/standard_sae.py,sha256=_hldNZkFPAf9VGrxouR1-tN8T2OEk8IkWBcXoatrC1o,5749
-sae_lens/saes/temporal_sae.py,sha256=83Ap4mYGfdN3sKdPF8nKjhdXph3-7E2QuLobqJ_YuoM,13273
+sae_lens/saes/temporal_sae.py,sha256=S44sPddVj2xujA02CC8gT1tG0in7c_CSAhspu9FHbaA,13273
 sae_lens/saes/topk_sae.py,sha256=vrMRPrCQR1o8G_kXqY_EAoGZARupkQNFB2dNZVLsusE,21073
 sae_lens/saes/transcoder.py,sha256=CTpJs8ASOK06npih7gZHygZuxqTR7HICWlOYfTiKjI4,13501
-sae_lens/synthetic/__init__.py,sha256=MtTnGkTfHV2WjkIgs7zZyx10EK9U5fjOHXy69Aq3uKw,3095
+sae_lens/synthetic/__init__.py,sha256=hRRA3xhEQUacGyFbJXkLVYg_8A1bbSYYWlVovb0g4KU,3503
 sae_lens/synthetic/activation_generator.py,sha256=8L9nwC4jFRv_wg3QN-n1sFwX8w1NqwJMysWaJ41lLlY,15197
-sae_lens/synthetic/correlation.py,sha256=tMTLo9fBfDpeXwqhyUgFqnTipj9x2W0t4oEtNxB7AG0,13256
+sae_lens/synthetic/correlation.py,sha256=tD8J9abWfuFtGZrEbbFn4P8FeTcNKF2V5JhBLwDUmkg,13146
 sae_lens/synthetic/evals.py,sha256=Nhi314ZnRgLfhBj-3tm_zzI-pGyFTcwllDXbIpPFXeU,4584
 sae_lens/synthetic/feature_dictionary.py,sha256=Nd4xjSTxKMnKilZ3uYi8Gv5SS5D4bv4wHiSL1uGB69E,6933
 sae_lens/synthetic/firing_probabilities.py,sha256=yclz1pWl5gE1r8LAxFvzQS88Lxwk5-3r8BCX9HLVejA,3370
 sae_lens/synthetic/hierarchy.py,sha256=nm7nwnTswktVJeKUsRZ0hLOdXcFWGbxnA1b6lefHm-4,33592
 sae_lens/synthetic/initialization.py,sha256=orMGW-786wRDHIS2W7bEH0HmlVFQ4g2z4bnnwdv5w4s,1386
 sae_lens/synthetic/plotting.py,sha256=5lFrej1QOkGAcImFNo5-o-8mI_rUVqvEI57KzUQPPtQ,8208
+sae_lens/synthetic/stats.py,sha256=BoDPKDx8pgFF5Ko_IaBRZTczm7-ANUIRjjF5W5Qh3Lk,7441
 sae_lens/synthetic/training.py,sha256=fHcX2cZ6nDupr71GX0Gk17f1NvQ0SKIVXIA6IuAb2dw,5692
 sae_lens/tokenization_and_batching.py,sha256=uoHtAs9z3XqG0Fh-iQVYVlrbyB_E3kFFhrKU30BosCo,5438
 sae_lens/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/training/activation_scaler.py,sha256=FzNfgBplLWmyiSlZ6TUvE-nur3lOiGTrlvC97ys8S24,1973
+sae_lens/training/activation_scaler.py,sha256=SJZzIMX1TGdeN_wT_wqgx2ij6f4p5Dm5lWH6DGNSt5g,2011
 sae_lens/training/activations_store.py,sha256=kp4-6R4rTJUSt-g-Ifg5B1h7iIe7jZj-XQSKDvDpQMI,32187
 sae_lens/training/mixing_buffer.py,sha256=1Z-S2CcQXMWGxRZJFnXeZFxbZcALkO_fP6VO37XdJQQ,2519
 sae_lens/training/optim.py,sha256=bJpqqcK4enkcPvQAJkeH4Ci1LUOlfjIMTv6-IlaAbRA,5588
@@ -46,7 +49,7 @@ sae_lens/training/types.py,sha256=1FpLx_Doda9vZpmfm-x1e8wGBYpyhe9Kpb_JuM5nIFM,90
 sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
 sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
 sae_lens/util.py,sha256=oIMoeyEP2IzcPFmRbKUzOAycgEyMcOasGeO_BGVZbc4,4846
-sae_lens-6.29.1.dist-info/METADATA,sha256=0Pp1L3vNiUGzkMox_BdQR6B064tTHFgwAPGJz8FY8UM,6573
-sae_lens-6.29.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-sae_lens-6.29.1.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
-sae_lens-6.29.1.dist-info/RECORD,,
+sae_lens-6.33.0.dist-info/METADATA,sha256=X6XqngWTNEsfdaPPWXxtF8Kvdp8fAk8i68sfRtDb2xo,6566
+sae_lens-6.33.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
+sae_lens-6.33.0.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
+sae_lens-6.33.0.dist-info/RECORD,,

{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.2.1
+Generator: poetry-core 2.3.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{sae_lens-6.29.1.dist-info → sae_lens-6.33.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sae-lens 6.29.1__py3-none-any.whl → 6.33.0__py3-none-any.whl

sae-lens 6.29.1py3-none-any.whl → 6.33.0py3-none-any.whl