PyPI - sae-lens - Versions diffs - 6.14.1__py3-none-any.whl → 6.22.1__py3-none-any.whl - Mend

sae-lens 6.14.1py3-none-any.whl → 6.22.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

sae_lens/__init__.py +15 -1
sae_lens/analysis/hooked_sae_transformer.py +4 -13
sae_lens/cache_activations_runner.py +3 -4
sae_lens/config.py +39 -2
sae_lens/constants.py +1 -0
sae_lens/evals.py +18 -14
sae_lens/llm_sae_training_runner.py +17 -18
sae_lens/loading/pretrained_sae_loaders.py +188 -0
sae_lens/loading/pretrained_saes_directory.py +5 -3
sae_lens/pretrained_saes.yaml +77 -1
sae_lens/saes/__init__.py +9 -0
sae_lens/saes/batchtopk_sae.py +3 -1
sae_lens/saes/gated_sae.py +4 -9
sae_lens/saes/jumprelu_sae.py +4 -9
sae_lens/saes/matryoshka_batchtopk_sae.py +136 -0
sae_lens/saes/sae.py +19 -31
sae_lens/saes/standard_sae.py +4 -9
sae_lens/saes/temporal_sae.py +365 -0
sae_lens/saes/topk_sae.py +7 -10
sae_lens/training/activation_scaler.py +7 -0
sae_lens/training/activations_store.py +54 -34
sae_lens/training/optim.py +11 -0
sae_lens/training/sae_trainer.py +50 -11
sae_lens/util.py +27 -0
{sae_lens-6.14.1.dist-info → sae_lens-6.22.1.dist-info}/METADATA +16 -16
sae_lens-6.22.1.dist-info/RECORD +41 -0
sae_lens-6.14.1.dist-info/RECORD +0 -39
{sae_lens-6.14.1.dist-info → sae_lens-6.22.1.dist-info}/WHEEL +0 -0
{sae_lens-6.14.1.dist-info → sae_lens-6.22.1.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.14.1"
+__version__ = "6.22.1"
 import logging
@@ -19,6 +19,8 @@ from sae_lens.saes import (
     JumpReLUTrainingSAEConfig,
     JumpReLUTranscoder,
     JumpReLUTranscoderConfig,
+    MatryoshkaBatchTopKTrainingSAE,
+    MatryoshkaBatchTopKTrainingSAEConfig,
     SAEConfig,
     SkipTranscoder,
     SkipTranscoderConfig,
@@ -26,6 +28,8 @@ from sae_lens.saes import (
     StandardSAEConfig,
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
+    TemporalSAE,
+    TemporalSAEConfig,
     TopKSAE,
     TopKSAEConfig,
     TopKTrainingSAE,
@@ -101,6 +105,10 @@ __all__ = [
     "SkipTranscoderConfig",
     "JumpReLUTranscoder",
     "JumpReLUTranscoderConfig",
+    "MatryoshkaBatchTopKTrainingSAE",
+    "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]
@@ -115,6 +123,12 @@ register_sae_training_class("jumprelu", JumpReLUTrainingSAE, JumpReLUTrainingSAE
 register_sae_training_class(
     "batchtopk", BatchTopKTrainingSAE, BatchTopKTrainingSAEConfig
 )
+register_sae_training_class(
+    "matryoshka_batchtopk",
+    MatryoshkaBatchTopKTrainingSAE,
+    MatryoshkaBatchTopKTrainingSAEConfig,
+)
 register_sae_class("transcoder", Transcoder, TranscoderConfig)
 register_sae_class("skip_transcoder", SkipTranscoder, SkipTranscoderConfig)
 register_sae_class("jumprelu_transcoder", JumpReLUTranscoder, JumpReLUTranscoderConfig)
+register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)

sae_lens/analysis/hooked_sae_transformer.py CHANGED Viewed

@@ -3,7 +3,6 @@ from contextlib import contextmanager
 from typing import Any, Callable
 import torch
-from jaxtyping import Float
 from transformer_lens.ActivationCache import ActivationCache
 from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
 from transformer_lens.hook_points import HookPoint  # Hooking utilities
@@ -11,8 +10,8 @@ from transformer_lens.HookedTransformer import HookedTransformer
 from sae_lens.saes.sae import SAE
-SingleLoss = Float[torch.Tensor, ""]  # Type alias for a single element tensor
-LossPerToken = Float[torch.Tensor, "batch pos-1"]
+SingleLoss = torch.Tensor  # Type alias for a single element tensor
+LossPerToken = torch.Tensor
 Loss = SingleLoss | LossPerToken
@@ -171,12 +170,7 @@ class HookedSAETransformer(HookedTransformer):
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         **model_kwargs: Any,
-    ) -> (
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss]
-    ):
+    ) -> None | torch.Tensor | Loss | tuple[torch.Tensor, Loss]:
         """Wrapper around HookedTransformer forward pass.
         Runs the model with the given SAEs attached for one forward pass, then removes them. By default, will reset all SAEs to original state after.
@@ -203,10 +197,7 @@ class HookedSAETransformer(HookedTransformer):
         remove_batch_dim: bool = False,
         **kwargs: Any,
     ) -> tuple[
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss],
+        None | torch.Tensor | Loss | tuple[torch.Tensor, Loss],
         ActivationCache | dict[str, torch.Tensor],
     ]:
         """Wrapper around 'run_with_cache' in HookedTransformer.

sae_lens/cache_activations_runner.py CHANGED Viewed

@@ -9,8 +9,7 @@ import torch
 from datasets import Array2D, Dataset, Features, Sequence, Value
 from datasets.fingerprint import generate_fingerprint
 from huggingface_hub import HfApi
-from jaxtyping import Float, Int
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger
@@ -318,8 +317,8 @@ class CacheActivationsRunner:
     def _create_shard(
         self,
         buffer: tuple[
-            Float[torch.Tensor, "(bs context_size) d_in"],
-            Int[torch.Tensor, "(bs context_size)"] | None,
+            torch.Tensor,  # shape: (bs context_size) d_in
+            torch.Tensor | None,  # shape: (bs context_size) or None
         ],
     ) -> Dataset:
         hook_names = [self.cfg.hook_name]

sae_lens/config.py CHANGED Viewed

@@ -18,6 +18,7 @@ from datasets import (
 from sae_lens import __version__, logger
 from sae_lens.constants import DTYPE_MAP
+from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 if TYPE_CHECKING:
@@ -171,6 +172,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         n_checkpoints (int): The number of checkpoints to save during training. 0 means no checkpoints.
         checkpoint_path (str | None): The path to save checkpoints. A unique ID will be appended to this path. Set to None to disable checkpoint saving. (default is "checkpoints")
         save_final_checkpoint (bool): Whether to include an additional final checkpoint when training is finished. (default is False).
+        resume_from_checkpoint (str | None): The path to the checkpoint to resume training from. (default is None).
         output_path (str | None): The path to save outputs. Set to None to disable output saving. (default is "output")
         verbose (bool): Whether to print verbose output. (default is True)
         model_kwargs (dict[str, Any]): Keyword arguments for `model.run_with_cache`
@@ -261,6 +263,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     checkpoint_path: str | None = "checkpoints"
     save_final_checkpoint: bool = False
     output_path: str | None = "output"
+    resume_from_checkpoint: str | None = None
     # Misc
     verbose: bool = True
@@ -385,8 +388,11 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         return self.sae.to_dict()
     def to_dict(self) -> dict[str, Any]:
-        # Make a shallow copy of config's dictionary
-        d = dict(self.__dict__)
+        """
+        Convert the config to a dictionary.
+        """
+        d = asdict(self)
         d["logger"] = asdict(self.logger)
         d["sae"] = self.sae.to_dict()
@@ -396,6 +402,37 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         d["act_store_device"] = str(self.act_store_device)
         return d
+    @classmethod
+    def from_dict(cls, cfg_dict: dict[str, Any]) -> "LanguageModelSAERunnerConfig[Any]":
+        """
+        Load a LanguageModelSAERunnerConfig from a dictionary given by `to_dict`.
+        Args:
+            cfg_dict (dict[str, Any]): The dictionary to load the config from.
+        Returns:
+            LanguageModelSAERunnerConfig: The loaded config.
+        """
+        if "sae" not in cfg_dict:
+            raise ValueError("sae field is required in the config dictionary")
+        if "architecture" not in cfg_dict["sae"]:
+            raise ValueError("architecture field is required in the sae dictionary")
+        if "logger" not in cfg_dict:
+            raise ValueError("logger field is required in the config dictionary")
+        sae_config_class = get_sae_training_class(cfg_dict["sae"]["architecture"])[1]
+        sae_cfg = sae_config_class.from_dict(cfg_dict["sae"])
+        logger_cfg = LoggingConfig(**cfg_dict["logger"])
+        updated_cfg_dict: dict[str, Any] = {
+            **cfg_dict,
+            "sae": sae_cfg,
+            "logger": logger_cfg,
+        }
+        output = cls(**updated_cfg_dict)
+        # the post_init always appends to checkpoint path, so we need to set it explicitly here.
+        if "checkpoint_path" in cfg_dict:
+            output.checkpoint_path = cfg_dict["checkpoint_path"]
+        return output
     def to_sae_trainer_config(self) -> "SAETrainerConfig":
         return SAETrainerConfig(
             n_checkpoints=self.n_checkpoints,

sae_lens/constants.py CHANGED Viewed

@@ -17,5 +17,6 @@ SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
 SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
+TRAINER_STATE_FILENAME = "trainer_state.pt"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

sae_lens/evals.py CHANGED Viewed

@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
 from functools import partial
 from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
-from typing import Any
+from typing import Any, Iterable
 import einops
 import pandas as pd
@@ -24,7 +24,10 @@ from sae_lens.loading.pretrained_saes_directory import get_pretrained_saes_direc
 from sae_lens.saes.sae import SAE, SAEConfig
 from sae_lens.training.activation_scaler import ActivationScaler
 from sae_lens.training.activations_store import ActivationsStore
-from sae_lens.util import extract_stop_at_layer_from_tlens_hook_name
+from sae_lens.util import (
+    extract_stop_at_layer_from_tlens_hook_name,
+    get_special_token_ids,
+)
 def get_library_version() -> str:
@@ -109,9 +112,15 @@ def run_evals(
     activation_scaler: ActivationScaler,
     eval_config: EvalConfig = EvalConfig(),
     model_kwargs: Mapping[str, Any] = {},
-    ignore_tokens: set[int | None] = set(),
+    exclude_special_tokens: Iterable[int] | bool = True,
     verbose: bool = False,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
+    ignore_tokens = None
+    if exclude_special_tokens is True:
+        ignore_tokens = list(get_special_token_ids(model.tokenizer))  # type: ignore
+    elif exclude_special_tokens:
+        ignore_tokens = list(exclude_special_tokens)
     hook_name = sae.cfg.metadata.hook_name
     actual_batch_size = (
         eval_config.batch_size_prompts or activation_store.store_batch_size_prompts
@@ -312,7 +321,7 @@ def get_downstream_reconstruction_metrics(
     compute_ce_loss: bool,
     n_batches: int,
     eval_batch_size_prompts: int,
-    ignore_tokens: set[int | None] = set(),
+    ignore_tokens: list[int] | None = None,
     verbose: bool = False,
 ):
     metrics_dict = {}
@@ -339,7 +348,7 @@ def get_downstream_reconstruction_metrics(
             compute_ce_loss=compute_ce_loss,
             ignore_tokens=ignore_tokens,
         ).items():
-            if len(ignore_tokens) > 0:
+            if ignore_tokens:
                 mask = torch.logical_not(
                     torch.any(
                         torch.stack(
@@ -384,7 +393,7 @@ def get_sparsity_and_variance_metrics(
     compute_featurewise_density_statistics: bool,
     eval_batch_size_prompts: int,
     model_kwargs: Mapping[str, Any],
-    ignore_tokens: set[int | None] = set(),
+    ignore_tokens: list[int] | None = None,
     verbose: bool = False,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     hook_name = sae.cfg.metadata.hook_name
@@ -426,7 +435,7 @@ def get_sparsity_and_variance_metrics(
     for _ in batch_iter:
         batch_tokens = activation_store.get_batch_tokens(eval_batch_size_prompts)
-        if len(ignore_tokens) > 0:
+        if ignore_tokens:
             mask = torch.logical_not(
                 torch.any(
                     torch.stack(
@@ -596,7 +605,7 @@ def get_recons_loss(
     batch_tokens: torch.Tensor,
     compute_kl: bool,
     compute_ce_loss: bool,
-    ignore_tokens: set[int | None] = set(),
+    ignore_tokens: list[int] | None = None,
     model_kwargs: Mapping[str, Any] = {},
     hook_name: str | None = None,
 ) -> dict[str, Any]:
@@ -610,7 +619,7 @@ def get_recons_loss(
         batch_tokens, return_type="both", loss_per_token=True, **model_kwargs
     )
-    if len(ignore_tokens) > 0:
+    if ignore_tokens:
         mask = torch.logical_not(
             torch.any(
                 torch.stack([batch_tokens == token for token in ignore_tokens], dim=0),
@@ -856,11 +865,6 @@ def multiple_evals(
                     activation_scaler=ActivationScaler(),
                     model=current_model,
                     eval_config=eval_config,
-                    ignore_tokens={
-                        current_model.tokenizer.pad_token_id,  # type: ignore
-                        current_model.tokenizer.eos_token_id,  # type: ignore
-                        current_model.tokenizer.bos_token_id,  # type: ignore
-                    },
                     verbose=verbose,
                 )
                 eval_metrics["metrics"] = scalar_metrics

sae_lens/llm_sae_training_runner.py CHANGED Viewed

@@ -16,23 +16,18 @@ from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import (
-    ACTIVATIONS_STORE_STATE_FILENAME,
     RUNNER_CFG_FILENAME,
     SPARSITY_FILENAME,
 )
 from sae_lens.evals import EvalConfig, run_evals
 from sae_lens.load_model import load_model
-from sae_lens.saes.batchtopk_sae import BatchTopKTrainingSAEConfig
-from sae_lens.saes.gated_sae import GatedTrainingSAEConfig
-from sae_lens.saes.jumprelu_sae import JumpReLUTrainingSAEConfig
+from sae_lens.registry import SAE_TRAINING_CLASS_REGISTRY
 from sae_lens.saes.sae import (
     T_TRAINING_SAE,
     T_TRAINING_SAE_CONFIG,
     TrainingSAE,
     TrainingSAEConfig,
 )
-from sae_lens.saes.standard_sae import StandardTrainingSAEConfig
-from sae_lens.saes.topk_sae import TopKTrainingSAEConfig
 from sae_lens.training.activation_scaler import ActivationScaler
 from sae_lens.training.activations_store import ActivationsStore
 from sae_lens.training.sae_trainer import SAETrainer
@@ -61,9 +56,11 @@ class LLMSaeEvaluator(Generic[T_TRAINING_SAE]):
         data_provider: DataProvider,
         activation_scaler: ActivationScaler,
     ) -> dict[str, Any]:
-        ignore_tokens = set()
+        exclude_special_tokens = False
         if self.activations_store.exclude_special_tokens is not None:
-            ignore_tokens = set(self.activations_store.exclude_special_tokens.tolist())
+            exclude_special_tokens = (
+                self.activations_store.exclude_special_tokens.tolist()
+            )
         eval_config = EvalConfig(
             batch_size_prompts=self.eval_batch_size_prompts,
@@ -81,7 +78,7 @@ class LLMSaeEvaluator(Generic[T_TRAINING_SAE]):
             model=self.model,
             activation_scaler=activation_scaler,
             eval_config=eval_config,
-            ignore_tokens=ignore_tokens,
+            exclude_special_tokens=exclude_special_tokens,
             model_kwargs=self.model_kwargs,
         )  # not calculating featurwise metrics here.
@@ -114,6 +111,7 @@ class LanguageModelSAETrainingRunner:
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
         override_sae: TrainingSAE[Any] | None = None,
+        resume_from_checkpoint: Path | str | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -155,6 +153,7 @@ class LanguageModelSAETrainingRunner:
                 )
         else:
             self.sae = override_sae
         self.sae.to(self.cfg.device)
     def run(self):
@@ -187,6 +186,12 @@ class LanguageModelSAETrainingRunner:
             cfg=self.cfg.to_sae_trainer_config(),
         )
+        if self.cfg.resume_from_checkpoint is not None:
+            logger.info(f"Resuming from checkpoint: {self.cfg.resume_from_checkpoint}")
+            trainer.load_trainer_state(self.cfg.resume_from_checkpoint)
+            self.sae.load_weights_from_checkpoint(self.cfg.resume_from_checkpoint)
+            self.activations_store.load_from_checkpoint(self.cfg.resume_from_checkpoint)
         self._compile_if_needed()
         sae = self.run_trainer_with_interruption_handling(trainer)
@@ -306,9 +311,7 @@ class LanguageModelSAETrainingRunner:
         if checkpoint_path is None:
             return
-        self.activations_store.save(
-            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
-        )
+        self.activations_store.save_to_checkpoint(checkpoint_path)
         runner_config = self.cfg.to_dict()
         with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:
@@ -393,12 +396,8 @@ def _parse_cfg_args(
         )
     # Map architecture to concrete config class
-    sae_config_map = {
-        "standard": StandardTrainingSAEConfig,
-        "gated": GatedTrainingSAEConfig,
-        "jumprelu": JumpReLUTrainingSAEConfig,
-        "topk": TopKTrainingSAEConfig,
-        "batchtopk": BatchTopKTrainingSAEConfig,
+    sae_config_map: dict[str, type[TrainingSAEConfig]] = {
+        name: cfg for name, (_, cfg) in SAE_TRAINING_CLASS_REGISTRY.items()
     }
     sae_config_type = sae_config_map[architecture]

sae_lens/loading/pretrained_sae_loaders.py CHANGED Viewed

@@ -523,6 +523,82 @@ def gemma_2_sae_huggingface_loader(
     return cfg_dict, state_dict, log_sparsity
+def get_goodfire_config_from_hf(
+    repo_id: str,
+    folder_name: str,  # noqa: ARG001
+    device: str,
+    force_download: bool = False,  # noqa: ARG001
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    cfg_dict = None
+    if repo_id == "Goodfire/Llama-3.3-70B-Instruct-SAE-l50":
+        if folder_name != "Llama-3.3-70B-Instruct-SAE-l50.pt":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 8192,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.3-70B-Instruct",
+            "hook_name": "blocks.50.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    elif repo_id == "Goodfire/Llama-3.1-8B-Instruct-SAE-l19":
+        if folder_name != "Llama-3.1-8B-Instruct-SAE-l19.pth":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 4096,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.1-8B-Instruct",
+            "hook_name": "blocks.19.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    if cfg_dict is None:
+        raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+    if device is not None:
+        cfg_dict["device"] = device
+    if cfg_overrides is not None:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def get_goodfire_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    cfg_dict = get_goodfire_config_from_hf(
+        repo_id,
+        folder_name,
+        device,
+        force_download,
+        cfg_overrides,
+    )
+    # Download the SAE weights
+    sae_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=folder_name,
+        force_download=force_download,
+    )
+    raw_state_dict = torch.load(sae_path, map_location=device)
+    state_dict = {
+        "W_enc": raw_state_dict["encoder_linear.weight"].T,
+        "W_dec": raw_state_dict["decoder_linear.weight"].T,
+        "b_enc": raw_state_dict["encoder_linear.bias"],
+        "b_dec": raw_state_dict["decoder_linear.bias"],
+    }
+    return cfg_dict, state_dict, None
 def get_llama_scope_config_from_hf(
     repo_id: str,
     folder_name: str,
@@ -1475,6 +1551,114 @@ def get_mntss_clt_layer_config_from_hf(
     }
+def get_temporal_sae_config_from_hf(
+    repo_id: str,
+    folder_name: str,
+    device: str,
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Get TemporalSAE config without loading weights."""
+    # Download config file
+    conf_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/conf.yaml",
+        force_download=force_download,
+    )
+    # Load and parse config
+    with open(conf_path) as f:
+        yaml_config = yaml.safe_load(f)
+    # Extract parameters
+    d_in = yaml_config["llm"]["dimin"]
+    exp_factor = yaml_config["sae"]["exp_factor"]
+    d_sae = int(d_in * exp_factor)
+    # extract layer from folder_name eg : "layer_12/temporal"
+    layer = re.search(r"layer_(\d+)", folder_name)
+    if layer is None:
+        raise ValueError(f"Could not find layer in folder_name: {folder_name}")
+    layer = int(layer.group(1))
+    # Build config dict
+    cfg_dict = {
+        "architecture": "temporal",
+        "hook_name": f"blocks.{layer}.hook_resid_post",
+        "d_in": d_in,
+        "d_sae": d_sae,
+        "n_heads": yaml_config["sae"]["n_heads"],
+        "n_attn_layers": yaml_config["sae"]["n_attn_layers"],
+        "bottleneck_factor": yaml_config["sae"]["bottleneck_factor"],
+        "sae_diff_type": yaml_config["sae"]["sae_diff_type"],
+        "kval_topk": yaml_config["sae"]["kval_topk"],
+        "tied_weights": yaml_config["sae"]["tied_weights"],
+        "dtype": yaml_config["data"]["dtype"],
+        "device": device,
+        "normalize_activations": "constant_scalar_rescale",
+        "activation_normalization_factor": yaml_config["sae"]["scaling_factor"],
+        "apply_b_dec_to_input": True,
+    }
+    if cfg_overrides:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def temporal_sae_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    """
+    Load TemporalSAE from canrager/temporalSAEs format (safetensors version).
+    Expects folder_name to contain:
+    - conf.yaml (configuration)
+    - latest_ckpt.safetensors (model weights)
+    """
+    cfg_dict = get_temporal_sae_config_from_hf(
+        repo_id=repo_id,
+        folder_name=folder_name,
+        device=device,
+        force_download=force_download,
+        cfg_overrides=cfg_overrides,
+    )
+    # Download checkpoint (safetensors format)
+    ckpt_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/latest_ckpt.safetensors",
+        force_download=force_download,
+    )
+    # Load checkpoint from safetensors
+    state_dict_raw = load_file(ckpt_path, device=device)
+    # Convert to SAELens naming convention
+    # TemporalSAE uses: D (decoder), E (encoder), b (bias), attn_layers.*
+    state_dict = {}
+    # Copy attention layers as-is
+    for key, value in state_dict_raw.items():
+        if key.startswith("attn_layers."):
+            state_dict[key] = value.to(device)
+    # Main parameters
+    state_dict["W_dec"] = state_dict_raw["D"].to(device)
+    state_dict["b_dec"] = state_dict_raw["b"].to(device)
+    # Handle tied/untied weights
+    if "E" in state_dict_raw:
+        state_dict["W_enc"] = state_dict_raw["E"].to(device)
+    return cfg_dict, state_dict, None
 NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "sae_lens": sae_lens_huggingface_loader,
     "connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
@@ -1487,6 +1671,8 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "gemma_2_transcoder": gemma_2_transcoder_huggingface_loader,
     "mwhanna_transcoder": mwhanna_transcoder_huggingface_loader,
     "mntss_clt_layer_transcoder": mntss_clt_layer_huggingface_loader,
+    "temporal": temporal_sae_huggingface_loader,
+    "goodfire": get_goodfire_huggingface_loader,
 }
@@ -1502,4 +1688,6 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
     "gemma_2_transcoder": get_gemma_2_transcoder_config_from_hf,
     "mwhanna_transcoder": get_mwhanna_transcoder_config_from_hf,
     "mntss_clt_layer_transcoder": get_mntss_clt_layer_config_from_hf,
+    "temporal": get_temporal_sae_config_from_hf,
+    "goodfire": get_goodfire_config_from_hf,
 }

sae_lens/loading/pretrained_saes_directory.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from functools import cache
-from importlib import resources
+from importlib.resources import files
 from typing import Any
 import yaml
@@ -24,7 +24,8 @@ def get_pretrained_saes_directory() -> dict[str, PretrainedSAELookup]:
     package = "sae_lens"
     # Access the file within the package using importlib.resources
     directory: dict[str, PretrainedSAELookup] = {}
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         # Load the YAML file content
         data = yaml.safe_load(file)
         for release, value in data.items():
@@ -68,7 +69,8 @@ def get_norm_scaling_factor(release: str, sae_id: str) -> float | None:
         float | None: The norm_scaling_factor if it exists, None otherwise.
     """
     package = "sae_lens"
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         data = yaml.safe_load(file)
         if release in data:
             for sae_info in data[release]["saes"]:

sae-lens 6.14.1__py3-none-any.whl → 6.22.1__py3-none-any.whl

sae-lens 6.14.1py3-none-any.whl → 6.22.1py3-none-any.whl