PyPI - sae-lens - Versions diffs - 6.15.0__py3-none-any.whl → 6.22.1__py3-none-any.whl - Mend

sae-lens 6.15.0py3-none-any.whl → 6.22.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sae_lens/__init__.py +6 -1
sae_lens/analysis/hooked_sae_transformer.py +4 -13
sae_lens/cache_activations_runner.py +3 -4
sae_lens/config.py +39 -2
sae_lens/constants.py +1 -0
sae_lens/llm_sae_training_runner.py +9 -4
sae_lens/loading/pretrained_sae_loaders.py +188 -0
sae_lens/loading/pretrained_saes_directory.py +5 -3
sae_lens/pretrained_saes.yaml +77 -1
sae_lens/saes/__init__.py +3 -0
sae_lens/saes/batchtopk_sae.py +3 -1
sae_lens/saes/gated_sae.py +4 -9
sae_lens/saes/jumprelu_sae.py +4 -9
sae_lens/saes/matryoshka_batchtopk_sae.py +8 -15
sae_lens/saes/sae.py +19 -31
sae_lens/saes/standard_sae.py +4 -9
sae_lens/saes/temporal_sae.py +365 -0
sae_lens/saes/topk_sae.py +7 -10
sae_lens/training/activation_scaler.py +7 -0
sae_lens/training/activations_store.py +49 -7
sae_lens/training/optim.py +11 -0
sae_lens/training/sae_trainer.py +50 -11
{sae_lens-6.15.0.dist-info → sae_lens-6.22.1.dist-info}/METADATA +16 -16
sae_lens-6.22.1.dist-info/RECORD +41 -0
sae_lens-6.15.0.dist-info/RECORD +0 -40
{sae_lens-6.15.0.dist-info → sae_lens-6.22.1.dist-info}/WHEEL +0 -0
{sae_lens-6.15.0.dist-info → sae_lens-6.22.1.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.15.0"
+__version__ = "6.22.1"
 import logging
@@ -28,6 +28,8 @@ from sae_lens.saes import (
     StandardSAEConfig,
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
+    TemporalSAE,
+    TemporalSAEConfig,
     TopKSAE,
     TopKSAEConfig,
     TopKTrainingSAE,
@@ -105,6 +107,8 @@ __all__ = [
     "JumpReLUTranscoderConfig",
     "MatryoshkaBatchTopKTrainingSAE",
     "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]
@@ -127,3 +131,4 @@ register_sae_training_class(
 register_sae_class("transcoder", Transcoder, TranscoderConfig)
 register_sae_class("skip_transcoder", SkipTranscoder, SkipTranscoderConfig)
 register_sae_class("jumprelu_transcoder", JumpReLUTranscoder, JumpReLUTranscoderConfig)
+register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)

sae_lens/analysis/hooked_sae_transformer.py CHANGED Viewed

@@ -3,7 +3,6 @@ from contextlib import contextmanager
 from typing import Any, Callable
 import torch
-from jaxtyping import Float
 from transformer_lens.ActivationCache import ActivationCache
 from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
 from transformer_lens.hook_points import HookPoint  # Hooking utilities
@@ -11,8 +10,8 @@ from transformer_lens.HookedTransformer import HookedTransformer
 from sae_lens.saes.sae import SAE
-SingleLoss = Float[torch.Tensor, ""]  # Type alias for a single element tensor
-LossPerToken = Float[torch.Tensor, "batch pos-1"]
+SingleLoss = torch.Tensor  # Type alias for a single element tensor
+LossPerToken = torch.Tensor
 Loss = SingleLoss | LossPerToken
@@ -171,12 +170,7 @@ class HookedSAETransformer(HookedTransformer):
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         **model_kwargs: Any,
-    ) -> (
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss]
-    ):
+    ) -> None | torch.Tensor | Loss | tuple[torch.Tensor, Loss]:
         """Wrapper around HookedTransformer forward pass.
         Runs the model with the given SAEs attached for one forward pass, then removes them. By default, will reset all SAEs to original state after.
@@ -203,10 +197,7 @@ class HookedSAETransformer(HookedTransformer):
         remove_batch_dim: bool = False,
         **kwargs: Any,
     ) -> tuple[
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss],
+        None | torch.Tensor | Loss | tuple[torch.Tensor, Loss],
         ActivationCache | dict[str, torch.Tensor],
     ]:
         """Wrapper around 'run_with_cache' in HookedTransformer.

sae_lens/cache_activations_runner.py CHANGED Viewed

@@ -9,8 +9,7 @@ import torch
 from datasets import Array2D, Dataset, Features, Sequence, Value
 from datasets.fingerprint import generate_fingerprint
 from huggingface_hub import HfApi
-from jaxtyping import Float, Int
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger
@@ -318,8 +317,8 @@ class CacheActivationsRunner:
     def _create_shard(
         self,
         buffer: tuple[
-            Float[torch.Tensor, "(bs context_size) d_in"],
-            Int[torch.Tensor, "(bs context_size)"] | None,
+            torch.Tensor,  # shape: (bs context_size) d_in
+            torch.Tensor | None,  # shape: (bs context_size) or None
         ],
     ) -> Dataset:
         hook_names = [self.cfg.hook_name]

sae_lens/config.py CHANGED Viewed

@@ -18,6 +18,7 @@ from datasets import (
 from sae_lens import __version__, logger
 from sae_lens.constants import DTYPE_MAP
+from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 if TYPE_CHECKING:
@@ -171,6 +172,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         n_checkpoints (int): The number of checkpoints to save during training. 0 means no checkpoints.
         checkpoint_path (str | None): The path to save checkpoints. A unique ID will be appended to this path. Set to None to disable checkpoint saving. (default is "checkpoints")
         save_final_checkpoint (bool): Whether to include an additional final checkpoint when training is finished. (default is False).
+        resume_from_checkpoint (str | None): The path to the checkpoint to resume training from. (default is None).
         output_path (str | None): The path to save outputs. Set to None to disable output saving. (default is "output")
         verbose (bool): Whether to print verbose output. (default is True)
         model_kwargs (dict[str, Any]): Keyword arguments for `model.run_with_cache`
@@ -261,6 +263,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     checkpoint_path: str | None = "checkpoints"
     save_final_checkpoint: bool = False
     output_path: str | None = "output"
+    resume_from_checkpoint: str | None = None
     # Misc
     verbose: bool = True
@@ -385,8 +388,11 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         return self.sae.to_dict()
     def to_dict(self) -> dict[str, Any]:
-        # Make a shallow copy of config's dictionary
-        d = dict(self.__dict__)
+        """
+        Convert the config to a dictionary.
+        """
+        d = asdict(self)
         d["logger"] = asdict(self.logger)
         d["sae"] = self.sae.to_dict()
@@ -396,6 +402,37 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         d["act_store_device"] = str(self.act_store_device)
         return d
+    @classmethod
+    def from_dict(cls, cfg_dict: dict[str, Any]) -> "LanguageModelSAERunnerConfig[Any]":
+        """
+        Load a LanguageModelSAERunnerConfig from a dictionary given by `to_dict`.
+        Args:
+            cfg_dict (dict[str, Any]): The dictionary to load the config from.
+        Returns:
+            LanguageModelSAERunnerConfig: The loaded config.
+        """
+        if "sae" not in cfg_dict:
+            raise ValueError("sae field is required in the config dictionary")
+        if "architecture" not in cfg_dict["sae"]:
+            raise ValueError("architecture field is required in the sae dictionary")
+        if "logger" not in cfg_dict:
+            raise ValueError("logger field is required in the config dictionary")
+        sae_config_class = get_sae_training_class(cfg_dict["sae"]["architecture"])[1]
+        sae_cfg = sae_config_class.from_dict(cfg_dict["sae"])
+        logger_cfg = LoggingConfig(**cfg_dict["logger"])
+        updated_cfg_dict: dict[str, Any] = {
+            **cfg_dict,
+            "sae": sae_cfg,
+            "logger": logger_cfg,
+        }
+        output = cls(**updated_cfg_dict)
+        # the post_init always appends to checkpoint path, so we need to set it explicitly here.
+        if "checkpoint_path" in cfg_dict:
+            output.checkpoint_path = cfg_dict["checkpoint_path"]
+        return output
     def to_sae_trainer_config(self) -> "SAETrainerConfig":
         return SAETrainerConfig(
             n_checkpoints=self.n_checkpoints,

sae_lens/constants.py CHANGED Viewed

@@ -17,5 +17,6 @@ SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
 SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
+TRAINER_STATE_FILENAME = "trainer_state.pt"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

sae_lens/llm_sae_training_runner.py CHANGED Viewed

@@ -16,7 +16,6 @@ from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import (
-    ACTIVATIONS_STORE_STATE_FILENAME,
     RUNNER_CFG_FILENAME,
     SPARSITY_FILENAME,
 )
@@ -112,6 +111,7 @@ class LanguageModelSAETrainingRunner:
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
         override_sae: TrainingSAE[Any] | None = None,
+        resume_from_checkpoint: Path | str | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -153,6 +153,7 @@ class LanguageModelSAETrainingRunner:
                 )
         else:
             self.sae = override_sae
         self.sae.to(self.cfg.device)
     def run(self):
@@ -185,6 +186,12 @@ class LanguageModelSAETrainingRunner:
             cfg=self.cfg.to_sae_trainer_config(),
         )
+        if self.cfg.resume_from_checkpoint is not None:
+            logger.info(f"Resuming from checkpoint: {self.cfg.resume_from_checkpoint}")
+            trainer.load_trainer_state(self.cfg.resume_from_checkpoint)
+            self.sae.load_weights_from_checkpoint(self.cfg.resume_from_checkpoint)
+            self.activations_store.load_from_checkpoint(self.cfg.resume_from_checkpoint)
         self._compile_if_needed()
         sae = self.run_trainer_with_interruption_handling(trainer)
@@ -304,9 +311,7 @@ class LanguageModelSAETrainingRunner:
         if checkpoint_path is None:
             return
-        self.activations_store.save(
-            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
-        )
+        self.activations_store.save_to_checkpoint(checkpoint_path)
         runner_config = self.cfg.to_dict()
         with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:

sae_lens/loading/pretrained_sae_loaders.py CHANGED Viewed

@@ -523,6 +523,82 @@ def gemma_2_sae_huggingface_loader(
     return cfg_dict, state_dict, log_sparsity
+def get_goodfire_config_from_hf(
+    repo_id: str,
+    folder_name: str,  # noqa: ARG001
+    device: str,
+    force_download: bool = False,  # noqa: ARG001
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    cfg_dict = None
+    if repo_id == "Goodfire/Llama-3.3-70B-Instruct-SAE-l50":
+        if folder_name != "Llama-3.3-70B-Instruct-SAE-l50.pt":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 8192,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.3-70B-Instruct",
+            "hook_name": "blocks.50.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    elif repo_id == "Goodfire/Llama-3.1-8B-Instruct-SAE-l19":
+        if folder_name != "Llama-3.1-8B-Instruct-SAE-l19.pth":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 4096,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.1-8B-Instruct",
+            "hook_name": "blocks.19.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    if cfg_dict is None:
+        raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+    if device is not None:
+        cfg_dict["device"] = device
+    if cfg_overrides is not None:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def get_goodfire_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    cfg_dict = get_goodfire_config_from_hf(
+        repo_id,
+        folder_name,
+        device,
+        force_download,
+        cfg_overrides,
+    )
+    # Download the SAE weights
+    sae_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=folder_name,
+        force_download=force_download,
+    )
+    raw_state_dict = torch.load(sae_path, map_location=device)
+    state_dict = {
+        "W_enc": raw_state_dict["encoder_linear.weight"].T,
+        "W_dec": raw_state_dict["decoder_linear.weight"].T,
+        "b_enc": raw_state_dict["encoder_linear.bias"],
+        "b_dec": raw_state_dict["decoder_linear.bias"],
+    }
+    return cfg_dict, state_dict, None
 def get_llama_scope_config_from_hf(
     repo_id: str,
     folder_name: str,
@@ -1475,6 +1551,114 @@ def get_mntss_clt_layer_config_from_hf(
     }
+def get_temporal_sae_config_from_hf(
+    repo_id: str,
+    folder_name: str,
+    device: str,
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Get TemporalSAE config without loading weights."""
+    # Download config file
+    conf_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/conf.yaml",
+        force_download=force_download,
+    )
+    # Load and parse config
+    with open(conf_path) as f:
+        yaml_config = yaml.safe_load(f)
+    # Extract parameters
+    d_in = yaml_config["llm"]["dimin"]
+    exp_factor = yaml_config["sae"]["exp_factor"]
+    d_sae = int(d_in * exp_factor)
+    # extract layer from folder_name eg : "layer_12/temporal"
+    layer = re.search(r"layer_(\d+)", folder_name)
+    if layer is None:
+        raise ValueError(f"Could not find layer in folder_name: {folder_name}")
+    layer = int(layer.group(1))
+    # Build config dict
+    cfg_dict = {
+        "architecture": "temporal",
+        "hook_name": f"blocks.{layer}.hook_resid_post",
+        "d_in": d_in,
+        "d_sae": d_sae,
+        "n_heads": yaml_config["sae"]["n_heads"],
+        "n_attn_layers": yaml_config["sae"]["n_attn_layers"],
+        "bottleneck_factor": yaml_config["sae"]["bottleneck_factor"],
+        "sae_diff_type": yaml_config["sae"]["sae_diff_type"],
+        "kval_topk": yaml_config["sae"]["kval_topk"],
+        "tied_weights": yaml_config["sae"]["tied_weights"],
+        "dtype": yaml_config["data"]["dtype"],
+        "device": device,
+        "normalize_activations": "constant_scalar_rescale",
+        "activation_normalization_factor": yaml_config["sae"]["scaling_factor"],
+        "apply_b_dec_to_input": True,
+    }
+    if cfg_overrides:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def temporal_sae_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    """
+    Load TemporalSAE from canrager/temporalSAEs format (safetensors version).
+    Expects folder_name to contain:
+    - conf.yaml (configuration)
+    - latest_ckpt.safetensors (model weights)
+    """
+    cfg_dict = get_temporal_sae_config_from_hf(
+        repo_id=repo_id,
+        folder_name=folder_name,
+        device=device,
+        force_download=force_download,
+        cfg_overrides=cfg_overrides,
+    )
+    # Download checkpoint (safetensors format)
+    ckpt_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/latest_ckpt.safetensors",
+        force_download=force_download,
+    )
+    # Load checkpoint from safetensors
+    state_dict_raw = load_file(ckpt_path, device=device)
+    # Convert to SAELens naming convention
+    # TemporalSAE uses: D (decoder), E (encoder), b (bias), attn_layers.*
+    state_dict = {}
+    # Copy attention layers as-is
+    for key, value in state_dict_raw.items():
+        if key.startswith("attn_layers."):
+            state_dict[key] = value.to(device)
+    # Main parameters
+    state_dict["W_dec"] = state_dict_raw["D"].to(device)
+    state_dict["b_dec"] = state_dict_raw["b"].to(device)
+    # Handle tied/untied weights
+    if "E" in state_dict_raw:
+        state_dict["W_enc"] = state_dict_raw["E"].to(device)
+    return cfg_dict, state_dict, None
 NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "sae_lens": sae_lens_huggingface_loader,
     "connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
@@ -1487,6 +1671,8 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "gemma_2_transcoder": gemma_2_transcoder_huggingface_loader,
     "mwhanna_transcoder": mwhanna_transcoder_huggingface_loader,
     "mntss_clt_layer_transcoder": mntss_clt_layer_huggingface_loader,
+    "temporal": temporal_sae_huggingface_loader,
+    "goodfire": get_goodfire_huggingface_loader,
 }
@@ -1502,4 +1688,6 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
     "gemma_2_transcoder": get_gemma_2_transcoder_config_from_hf,
     "mwhanna_transcoder": get_mwhanna_transcoder_config_from_hf,
     "mntss_clt_layer_transcoder": get_mntss_clt_layer_config_from_hf,
+    "temporal": get_temporal_sae_config_from_hf,
+    "goodfire": get_goodfire_config_from_hf,
 }

sae_lens/loading/pretrained_saes_directory.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from functools import cache
-from importlib import resources
+from importlib.resources import files
 from typing import Any
 import yaml
@@ -24,7 +24,8 @@ def get_pretrained_saes_directory() -> dict[str, PretrainedSAELookup]:
     package = "sae_lens"
     # Access the file within the package using importlib.resources
     directory: dict[str, PretrainedSAELookup] = {}
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         # Load the YAML file content
         data = yaml.safe_load(file)
         for release, value in data.items():
@@ -68,7 +69,8 @@ def get_norm_scaling_factor(release: str, sae_id: str) -> float | None:
         float | None: The norm_scaling_factor if it exists, None otherwise.
     """
     package = "sae_lens"
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         data = yaml.safe_load(file)
         if release in data:
             for sae_info in data[release]["saes"]:

sae_lens/pretrained_saes.yaml CHANGED Viewed

@@ -1,3 +1,35 @@
+temporal-sae-gemma-2-2b:
+  conversion_func: temporal
+  model: gemma-2-2b
+  repo_id: canrager/temporalSAEs
+  config_overrides:
+    model_name: gemma-2-2b
+    hook_name: blocks.12.hook_resid_post
+    dataset_path: monology/pile-uncopyrighted
+  saes:
+  - id: blocks.12.hook_resid_post
+    l0: 192
+    norm_scaling_factor: 0.00666666667
+    path: gemma-2-2B/layer_12/temporal
+    neuronpedia: gemma-2-2b/12-temporal-res
+temporal-sae-llama-3.1-8b:
+  conversion_func: temporal
+  model: meta-llama/Llama-3.1-8B
+  repo_id: canrager/temporalSAEs
+  config_overrides:
+    model_name: meta-llama/Llama-3.1-8B
+    dataset_path: monology/pile-uncopyrighted
+  saes:
+  - id: blocks.15.hook_resid_post
+    l0: 256
+    norm_scaling_factor: 0.029
+    path: llama-3.1-8B/layer_15/temporal
+    neuronpedia: llama3.1-8b/15-temporal-res
+  - id: blocks.26.hook_resid_post
+    l0: 256
+    norm_scaling_factor: 0.029
+    path: llama-3.1-8B/layer_26/temporal
+    neuronpedia: llama3.1-8b/26-temporal-res
 deepseek-r1-distill-llama-8b-qresearch:
   conversion_func: deepseek_r1
   model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -14882,4 +14914,48 @@ qwen2.5-7b-instruct-andyrdt:
     neuronpedia: qwen2.5-7b-it/23-resid-post-aa
   - id: resid_post_layer_27_trainer_1
     path: resid_post_layer_27/trainer_1
-    neuronpedia: qwen2.5-7b-it/27-resid-post-aa
+    neuronpedia: qwen2.5-7b-it/27-resid-post-aa
+gpt-oss-20b-andyrdt:
+  conversion_func: dictionary_learning_1
+  model: openai/gpt-oss-20b
+  repo_id: andyrdt/saes-gpt-oss-20b
+  saes:
+  - id: resid_post_layer_3_trainer_0
+    path: resid_post_layer_3/trainer_0
+    neuronpedia: gpt-oss-20b/3-resid-post-aa
+  - id: resid_post_layer_7_trainer_0
+    path: resid_post_layer_7/trainer_0
+    neuronpedia: gpt-oss-20b/7-resid-post-aa
+  - id: resid_post_layer_11_trainer_0
+    path: resid_post_layer_11/trainer_0
+    neuronpedia: gpt-oss-20b/11-resid-post-aa
+  - id: resid_post_layer_15_trainer_0
+    path: resid_post_layer_15/trainer_0
+    neuronpedia: gpt-oss-20b/15-resid-post-aa
+  - id: resid_post_layer_19_trainer_0
+    path: resid_post_layer_19/trainer_0
+    neuronpedia: gpt-oss-20b/19-resid-post-aa
+  - id: resid_post_layer_23_trainer_0
+    path: resid_post_layer_23/trainer_0
+    neuronpedia: gpt-oss-20b/23-resid-post-aa
+goodfire-llama-3.3-70b-instruct:
+  conversion_func: goodfire
+  model: meta-llama/Llama-3.3-70B-Instruct
+  repo_id: Goodfire/Llama-3.3-70B-Instruct-SAE-l50
+  saes:
+  - id: layer_50
+    path: Llama-3.3-70B-Instruct-SAE-l50.pt
+    l0: 121
+    neuronpedia: llama3.3-70b-it/50-resid-post-gf
+goodfire-llama-3.1-8b-instruct:
+  conversion_func: goodfire
+  model: meta-llama/Llama-3.1-8B-Instruct
+  repo_id: Goodfire/Llama-3.1-8B-Instruct-SAE-l19
+  saes:
+  - id: layer_19
+    path: Llama-3.1-8B-Instruct-SAE-l19.pth
+    l0: 91
+    neuronpedia: llama3.1-8b-it/19-resid-post-gf

sae_lens/saes/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .standard_sae import (
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
 )
+from .temporal_sae import TemporalSAE, TemporalSAEConfig
 from .topk_sae import (
     TopKSAE,
     TopKSAEConfig,
@@ -71,4 +72,6 @@ __all__ = [
     "JumpReLUTranscoderConfig",
     "MatryoshkaBatchTopKTrainingSAE",
     "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]

sae_lens/saes/batchtopk_sae.py CHANGED Viewed

@@ -23,7 +23,9 @@ class BatchTopK(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         acts = x.relu()
         flat_acts = acts.flatten()
-        acts_topk_flat = torch.topk(flat_acts, int(self.k * acts.shape[0]), dim=-1)
+        # Calculate total number of samples across all non-feature dimensions
+        num_samples = acts.shape[:-1].numel()
+        acts_topk_flat = torch.topk(flat_acts, int(self.k * num_samples), dim=-1)
         return (
             torch.zeros_like(flat_acts)
             .scatter(-1, acts_topk_flat.indices, acts_topk_flat.values)

sae_lens/saes/gated_sae.py CHANGED Viewed

@@ -2,7 +2,6 @@ from dataclasses import dataclass
 from typing import Any
 import torch
-from jaxtyping import Float
 from numpy.typing import NDArray
 from torch import nn
 from typing_extensions import override
@@ -49,9 +48,7 @@ class GatedSAE(SAE[GatedSAEConfig]):
         super().initialize_weights()
         _init_weights_gated(self)
-    def encode(
-        self, x: Float[torch.Tensor, "... d_in"]
-    ) -> Float[torch.Tensor, "... d_sae"]:
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
         """
         Encode the input tensor into the feature space using a gated encoder.
         This must match the original encode_gated implementation from SAE class.
@@ -72,9 +69,7 @@ class GatedSAE(SAE[GatedSAEConfig]):
         # Combine gating and magnitudes
         return self.hook_sae_acts_post(active_features * feature_magnitudes)
-    def decode(
-        self, feature_acts: Float[torch.Tensor, "... d_sae"]
-    ) -> Float[torch.Tensor, "... d_in"]:
+    def decode(self, feature_acts: torch.Tensor) -> torch.Tensor:
         """
         Decode the feature activations back into the input space:
           1) Apply optional finetuning scaling.
@@ -147,8 +142,8 @@ class GatedTrainingSAE(TrainingSAE[GatedTrainingSAEConfig]):
         _init_weights_gated(self)
     def encode_with_hidden_pre(
-        self, x: Float[torch.Tensor, "... d_in"]
-    ) -> tuple[Float[torch.Tensor, "... d_sae"], Float[torch.Tensor, "... d_sae"]]:
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Gated forward pass with pre-activation (for training).
         """

sae_lens/saes/jumprelu_sae.py CHANGED Viewed

@@ -3,7 +3,6 @@ from typing import Any, Literal
 import numpy as np
 import torch
-from jaxtyping import Float
 from torch import nn
 from typing_extensions import override
@@ -130,9 +129,7 @@ class JumpReLUSAE(SAE[JumpReLUSAEConfig]):
             torch.zeros(self.cfg.d_sae, dtype=self.dtype, device=self.device)
         )
-    def encode(
-        self, x: Float[torch.Tensor, "... d_in"]
-    ) -> Float[torch.Tensor, "... d_sae"]:
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
         """
         Encode the input tensor into the feature space using JumpReLU.
         The threshold parameter determines which units remain active.
@@ -150,9 +147,7 @@ class JumpReLUSAE(SAE[JumpReLUSAEConfig]):
         # 3) Multiply the normally activated units by that mask.
         return self.hook_sae_acts_post(base_acts * jump_relu_mask)
-    def decode(
-        self, feature_acts: Float[torch.Tensor, "... d_sae"]
-    ) -> Float[torch.Tensor, "... d_in"]:
+    def decode(self, feature_acts: torch.Tensor) -> torch.Tensor:
         """
         Decode the feature activations back to the input space.
         Follows the same steps as StandardSAE: apply scaling, transform, hook, and optionally reshape.
@@ -265,8 +260,8 @@ class JumpReLUTrainingSAE(TrainingSAE[JumpReLUTrainingSAEConfig]):
         return torch.exp(self.log_threshold)
     def encode_with_hidden_pre(
-        self, x: Float[torch.Tensor, "... d_in"]
-    ) -> tuple[Float[torch.Tensor, "... d_sae"], Float[torch.Tensor, "... d_sae"]]:
+        self, x: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         sae_in = self.process_sae_in(x)
         hidden_pre = sae_in @ self.W_enc + self.b_enc

sae-lens 6.15.0__py3-none-any.whl → 6.22.1__py3-none-any.whl

sae-lens 6.15.0py3-none-any.whl → 6.22.1py3-none-any.whl