PyPI - sae-lens - Versions diffs - 6.16.3__py3-none-any.whl → 6.21.0__py3-none-any.whl - Mend

sae-lens 6.16.3py3-none-any.whl → 6.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sae-lens might be problematic. Click here for more details.

Files changed (19) hide show

sae_lens/__init__.py +6 -1
sae_lens/cache_activations_runner.py +1 -1
sae_lens/config.py +39 -2
sae_lens/constants.py +1 -0
sae_lens/llm_sae_training_runner.py +9 -4
sae_lens/loading/pretrained_sae_loaders.py +188 -0
sae_lens/loading/pretrained_saes_directory.py +5 -3
sae_lens/pretrained_saes.yaml +75 -1
sae_lens/saes/__init__.py +3 -0
sae_lens/saes/sae.py +11 -13
sae_lens/saes/temporal_sae.py +372 -0
sae_lens/training/activation_scaler.py +7 -0
sae_lens/training/activations_store.py +47 -4
sae_lens/training/optim.py +11 -0
sae_lens/training/sae_trainer.py +49 -11
{sae_lens-6.16.3.dist-info → sae_lens-6.21.0.dist-info}/METADATA +16 -16
{sae_lens-6.16.3.dist-info → sae_lens-6.21.0.dist-info}/RECORD +19 -18
{sae_lens-6.16.3.dist-info → sae_lens-6.21.0.dist-info}/WHEEL +0 -0
{sae_lens-6.16.3.dist-info → sae_lens-6.21.0.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.16.3"
+__version__ = "6.21.0"
 import logging
@@ -28,6 +28,8 @@ from sae_lens.saes import (
     StandardSAEConfig,
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
+    TemporalSAE,
+    TemporalSAEConfig,
     TopKSAE,
     TopKSAEConfig,
     TopKTrainingSAE,
@@ -105,6 +107,8 @@ __all__ = [
     "JumpReLUTranscoderConfig",
     "MatryoshkaBatchTopKTrainingSAE",
     "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]
@@ -127,3 +131,4 @@ register_sae_training_class(
 register_sae_class("transcoder", Transcoder, TranscoderConfig)
 register_sae_class("skip_transcoder", SkipTranscoder, SkipTranscoderConfig)
 register_sae_class("jumprelu_transcoder", JumpReLUTranscoder, JumpReLUTranscoderConfig)
+register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)

sae_lens/cache_activations_runner.py CHANGED Viewed

@@ -10,7 +10,7 @@ from datasets import Array2D, Dataset, Features, Sequence, Value
 from datasets.fingerprint import generate_fingerprint
 from huggingface_hub import HfApi
 from jaxtyping import Float, Int
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger

sae_lens/config.py CHANGED Viewed

@@ -18,6 +18,7 @@ from datasets import (
 from sae_lens import __version__, logger
 from sae_lens.constants import DTYPE_MAP
+from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 if TYPE_CHECKING:
@@ -171,6 +172,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         n_checkpoints (int): The number of checkpoints to save during training. 0 means no checkpoints.
         checkpoint_path (str | None): The path to save checkpoints. A unique ID will be appended to this path. Set to None to disable checkpoint saving. (default is "checkpoints")
         save_final_checkpoint (bool): Whether to include an additional final checkpoint when training is finished. (default is False).
+        resume_from_checkpoint (str | None): The path to the checkpoint to resume training from. (default is None).
         output_path (str | None): The path to save outputs. Set to None to disable output saving. (default is "output")
         verbose (bool): Whether to print verbose output. (default is True)
         model_kwargs (dict[str, Any]): Keyword arguments for `model.run_with_cache`
@@ -261,6 +263,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     checkpoint_path: str | None = "checkpoints"
     save_final_checkpoint: bool = False
     output_path: str | None = "output"
+    resume_from_checkpoint: str | None = None
     # Misc
     verbose: bool = True
@@ -385,8 +388,11 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         return self.sae.to_dict()
     def to_dict(self) -> dict[str, Any]:
-        # Make a shallow copy of config's dictionary
-        d = dict(self.__dict__)
+        """
+        Convert the config to a dictionary.
+        """
+        d = asdict(self)
         d["logger"] = asdict(self.logger)
         d["sae"] = self.sae.to_dict()
@@ -396,6 +402,37 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         d["act_store_device"] = str(self.act_store_device)
         return d
+    @classmethod
+    def from_dict(cls, cfg_dict: dict[str, Any]) -> "LanguageModelSAERunnerConfig[Any]":
+        """
+        Load a LanguageModelSAERunnerConfig from a dictionary given by `to_dict`.
+        Args:
+            cfg_dict (dict[str, Any]): The dictionary to load the config from.
+        Returns:
+            LanguageModelSAERunnerConfig: The loaded config.
+        """
+        if "sae" not in cfg_dict:
+            raise ValueError("sae field is required in the config dictionary")
+        if "architecture" not in cfg_dict["sae"]:
+            raise ValueError("architecture field is required in the sae dictionary")
+        if "logger" not in cfg_dict:
+            raise ValueError("logger field is required in the config dictionary")
+        sae_config_class = get_sae_training_class(cfg_dict["sae"]["architecture"])[1]
+        sae_cfg = sae_config_class.from_dict(cfg_dict["sae"])
+        logger_cfg = LoggingConfig(**cfg_dict["logger"])
+        updated_cfg_dict: dict[str, Any] = {
+            **cfg_dict,
+            "sae": sae_cfg,
+            "logger": logger_cfg,
+        }
+        output = cls(**updated_cfg_dict)
+        # the post_init always appends to checkpoint path, so we need to set it explicitly here.
+        if "checkpoint_path" in cfg_dict:
+            output.checkpoint_path = cfg_dict["checkpoint_path"]
+        return output
     def to_sae_trainer_config(self) -> "SAETrainerConfig":
         return SAETrainerConfig(
             n_checkpoints=self.n_checkpoints,

sae_lens/constants.py CHANGED Viewed

@@ -17,5 +17,6 @@ SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
 SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
+TRAINER_STATE_FILENAME = "trainer_state.pt"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

sae_lens/llm_sae_training_runner.py CHANGED Viewed

@@ -16,7 +16,6 @@ from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import (
-    ACTIVATIONS_STORE_STATE_FILENAME,
     RUNNER_CFG_FILENAME,
     SPARSITY_FILENAME,
 )
@@ -112,6 +111,7 @@ class LanguageModelSAETrainingRunner:
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
         override_sae: TrainingSAE[Any] | None = None,
+        resume_from_checkpoint: Path | str | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -153,6 +153,7 @@ class LanguageModelSAETrainingRunner:
                 )
         else:
             self.sae = override_sae
         self.sae.to(self.cfg.device)
     def run(self):
@@ -185,6 +186,12 @@ class LanguageModelSAETrainingRunner:
             cfg=self.cfg.to_sae_trainer_config(),
         )
+        if self.cfg.resume_from_checkpoint is not None:
+            logger.info(f"Resuming from checkpoint: {self.cfg.resume_from_checkpoint}")
+            trainer.load_trainer_state(self.cfg.resume_from_checkpoint)
+            self.sae.load_weights_from_checkpoint(self.cfg.resume_from_checkpoint)
+            self.activations_store.load_from_checkpoint(self.cfg.resume_from_checkpoint)
         self._compile_if_needed()
         sae = self.run_trainer_with_interruption_handling(trainer)
@@ -304,9 +311,7 @@ class LanguageModelSAETrainingRunner:
         if checkpoint_path is None:
             return
-        self.activations_store.save(
-            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
-        )
+        self.activations_store.save_to_checkpoint(checkpoint_path)
         runner_config = self.cfg.to_dict()
         with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:

sae_lens/loading/pretrained_sae_loaders.py CHANGED Viewed

@@ -523,6 +523,82 @@ def gemma_2_sae_huggingface_loader(
     return cfg_dict, state_dict, log_sparsity
+def get_goodfire_config_from_hf(
+    repo_id: str,
+    folder_name: str,  # noqa: ARG001
+    device: str,
+    force_download: bool = False,  # noqa: ARG001
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    cfg_dict = None
+    if repo_id == "Goodfire/Llama-3.3-70B-Instruct-SAE-l50":
+        if folder_name != "Llama-3.3-70B-Instruct-SAE-l50.pt":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 8192,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.3-70B-Instruct",
+            "hook_name": "blocks.50.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    elif repo_id == "Goodfire/Llama-3.1-8B-Instruct-SAE-l19":
+        if folder_name != "Llama-3.1-8B-Instruct-SAE-l19.pth":
+            raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+        cfg_dict = {
+            "architecture": "standard",
+            "d_in": 4096,
+            "d_sae": 65536,
+            "model_name": "meta-llama/Llama-3.1-8B-Instruct",
+            "hook_name": "blocks.19.hook_resid_post",
+            "hook_head_index": None,
+            "dataset_path": "lmsys/lmsys-chat-1m",
+            "apply_b_dec_to_input": False,
+        }
+    if cfg_dict is None:
+        raise ValueError(f"Unsupported Goodfire SAE: {repo_id}/{folder_name}")
+    if device is not None:
+        cfg_dict["device"] = device
+    if cfg_overrides is not None:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def get_goodfire_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    cfg_dict = get_goodfire_config_from_hf(
+        repo_id,
+        folder_name,
+        device,
+        force_download,
+        cfg_overrides,
+    )
+    # Download the SAE weights
+    sae_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=folder_name,
+        force_download=force_download,
+    )
+    raw_state_dict = torch.load(sae_path, map_location=device)
+    state_dict = {
+        "W_enc": raw_state_dict["encoder_linear.weight"].T,
+        "W_dec": raw_state_dict["decoder_linear.weight"].T,
+        "b_enc": raw_state_dict["encoder_linear.bias"],
+        "b_dec": raw_state_dict["decoder_linear.bias"],
+    }
+    return cfg_dict, state_dict, None
 def get_llama_scope_config_from_hf(
     repo_id: str,
     folder_name: str,
@@ -1475,6 +1551,114 @@ def get_mntss_clt_layer_config_from_hf(
     }
+def get_temporal_sae_config_from_hf(
+    repo_id: str,
+    folder_name: str,
+    device: str,
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Get TemporalSAE config without loading weights."""
+    # Download config file
+    conf_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/conf.yaml",
+        force_download=force_download,
+    )
+    # Load and parse config
+    with open(conf_path) as f:
+        yaml_config = yaml.safe_load(f)
+    # Extract parameters
+    d_in = yaml_config["llm"]["dimin"]
+    exp_factor = yaml_config["sae"]["exp_factor"]
+    d_sae = int(d_in * exp_factor)
+    # extract layer from folder_name eg : "layer_12/temporal"
+    layer = re.search(r"layer_(\d+)", folder_name)
+    if layer is None:
+        raise ValueError(f"Could not find layer in folder_name: {folder_name}")
+    layer = int(layer.group(1))
+    # Build config dict
+    cfg_dict = {
+        "architecture": "temporal",
+        "hook_name": f"blocks.{layer}.hook_resid_post",
+        "d_in": d_in,
+        "d_sae": d_sae,
+        "n_heads": yaml_config["sae"]["n_heads"],
+        "n_attn_layers": yaml_config["sae"]["n_attn_layers"],
+        "bottleneck_factor": yaml_config["sae"]["bottleneck_factor"],
+        "sae_diff_type": yaml_config["sae"]["sae_diff_type"],
+        "kval_topk": yaml_config["sae"]["kval_topk"],
+        "tied_weights": yaml_config["sae"]["tied_weights"],
+        "dtype": yaml_config["data"]["dtype"],
+        "device": device,
+        "normalize_activations": "constant_scalar_rescale",
+        "activation_normalization_factor": yaml_config["sae"]["scaling_factor"],
+        "apply_b_dec_to_input": True,
+    }
+    if cfg_overrides:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def temporal_sae_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
+    """
+    Load TemporalSAE from canrager/temporalSAEs format (safetensors version).
+    Expects folder_name to contain:
+    - conf.yaml (configuration)
+    - latest_ckpt.safetensors (model weights)
+    """
+    cfg_dict = get_temporal_sae_config_from_hf(
+        repo_id=repo_id,
+        folder_name=folder_name,
+        device=device,
+        force_download=force_download,
+        cfg_overrides=cfg_overrides,
+    )
+    # Download checkpoint (safetensors format)
+    ckpt_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=f"{folder_name}/latest_ckpt.safetensors",
+        force_download=force_download,
+    )
+    # Load checkpoint from safetensors
+    state_dict_raw = load_file(ckpt_path, device=device)
+    # Convert to SAELens naming convention
+    # TemporalSAE uses: D (decoder), E (encoder), b (bias), attn_layers.*
+    state_dict = {}
+    # Copy attention layers as-is
+    for key, value in state_dict_raw.items():
+        if key.startswith("attn_layers."):
+            state_dict[key] = value.to(device)
+    # Main parameters
+    state_dict["W_dec"] = state_dict_raw["D"].to(device)
+    state_dict["b_dec"] = state_dict_raw["b"].to(device)
+    # Handle tied/untied weights
+    if "E" in state_dict_raw:
+        state_dict["W_enc"] = state_dict_raw["E"].to(device)
+    return cfg_dict, state_dict, None
 NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "sae_lens": sae_lens_huggingface_loader,
     "connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
@@ -1487,6 +1671,8 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "gemma_2_transcoder": gemma_2_transcoder_huggingface_loader,
     "mwhanna_transcoder": mwhanna_transcoder_huggingface_loader,
     "mntss_clt_layer_transcoder": mntss_clt_layer_huggingface_loader,
+    "temporal": temporal_sae_huggingface_loader,
+    "goodfire": get_goodfire_huggingface_loader,
 }
@@ -1502,4 +1688,6 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
     "gemma_2_transcoder": get_gemma_2_transcoder_config_from_hf,
     "mwhanna_transcoder": get_mwhanna_transcoder_config_from_hf,
     "mntss_clt_layer_transcoder": get_mntss_clt_layer_config_from_hf,
+    "temporal": get_temporal_sae_config_from_hf,
+    "goodfire": get_goodfire_config_from_hf,
 }

sae_lens/loading/pretrained_saes_directory.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from functools import cache
-from importlib import resources
+from importlib.resources import files
 from typing import Any
 import yaml
@@ -24,7 +24,8 @@ def get_pretrained_saes_directory() -> dict[str, PretrainedSAELookup]:
     package = "sae_lens"
     # Access the file within the package using importlib.resources
     directory: dict[str, PretrainedSAELookup] = {}
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         # Load the YAML file content
         data = yaml.safe_load(file)
         for release, value in data.items():
@@ -68,7 +69,8 @@ def get_norm_scaling_factor(release: str, sae_id: str) -> float | None:
         float | None: The norm_scaling_factor if it exists, None otherwise.
     """
     package = "sae_lens"
-    with resources.open_text(package, "pretrained_saes.yaml") as file:
+    yaml_file = files(package).joinpath("pretrained_saes.yaml")
+    with yaml_file.open("r") as file:
         data = yaml.safe_load(file)
         if release in data:
             for sae_info in data[release]["saes"]:

sae_lens/pretrained_saes.yaml CHANGED Viewed

@@ -1,3 +1,35 @@
+temporal-sae-gemma-2-2b:
+  conversion_func: temporal
+  model: gemma-2-2b
+  repo_id: canrager/temporalSAEs
+  config_overrides:
+    model_name: gemma-2-2b
+    hook_name: blocks.12.hook_resid_post
+    dataset_path: monology/pile-uncopyrighted
+  saes:
+  - id: blocks.12.hook_resid_post
+    l0: 192
+    norm_scaling_factor: 0.00666666667
+    path: gemma-2-2B/layer_12/temporal
+    neuronpedia: gemma-2-2b/12-temporal-res
+temporal-sae-llama-3.1-8b:
+  conversion_func: temporal
+  model: meta-llama/Llama-3.1-8B
+  repo_id: canrager/temporalSAEs
+  config_overrides:
+    model_name: meta-llama/Llama-3.1-8B
+    dataset_path: monology/pile-uncopyrighted
+  saes:
+  - id: blocks.15.hook_resid_post
+    l0: 256
+    norm_scaling_factor: 0.029
+    path: llama-3.1-8B/layer_15/temporal
+    neuronpedia: llama3.1-8b/15-temporal-res
+  - id: blocks.26.hook_resid_post
+    l0: 256
+    norm_scaling_factor: 0.029
+    path: llama-3.1-8B/layer_26/temporal
+    neuronpedia: llama3.1-8b/26-temporal-res
 deepseek-r1-distill-llama-8b-qresearch:
   conversion_func: deepseek_r1
   model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -14882,4 +14914,46 @@ qwen2.5-7b-instruct-andyrdt:
     neuronpedia: qwen2.5-7b-it/23-resid-post-aa
   - id: resid_post_layer_27_trainer_1
     path: resid_post_layer_27/trainer_1
-    neuronpedia: qwen2.5-7b-it/27-resid-post-aa
+    neuronpedia: qwen2.5-7b-it/27-resid-post-aa
+gpt-oss-20b-andyrdt:
+  conversion_func: dictionary_learning_1
+  model: openai/gpt-oss-20b
+  repo_id: andyrdt/saes-gpt-oss-20b
+  saes:
+  - id: resid_post_layer_3_trainer_0
+    path: resid_post_layer_3/trainer_0
+    neuronpedia: gpt-oss-20b/3-resid-post-aa
+  - id: resid_post_layer_7_trainer_0
+    path: resid_post_layer_7/trainer_0
+    neuronpedia: gpt-oss-20b/7-resid-post-aa
+  - id: resid_post_layer_11_trainer_0
+    path: resid_post_layer_11/trainer_0
+    neuronpedia: gpt-oss-20b/11-resid-post-aa
+  - id: resid_post_layer_15_trainer_0
+    path: resid_post_layer_15/trainer_0
+    neuronpedia: gpt-oss-20b/15-resid-post-aa
+  - id: resid_post_layer_19_trainer_0
+    path: resid_post_layer_19/trainer_0
+    neuronpedia: gpt-oss-20b/19-resid-post-aa
+  - id: resid_post_layer_23_trainer_0
+    path: resid_post_layer_23/trainer_0
+    neuronpedia: gpt-oss-20b/23-resid-post-aa
+goodfire-llama-3.3-70b-instruct:
+  conversion_func: goodfire
+  model: meta-llama/Llama-3.3-70B-Instruct
+  repo_id: Goodfire/Llama-3.3-70B-Instruct-SAE-l50
+  saes:
+  - id: layer_50
+    path: Llama-3.3-70B-Instruct-SAE-l50.pt
+    l0: 121
+goodfire-llama-3.1-8b-instruct:
+  conversion_func: goodfire
+  model: meta-llama/Llama-3.1-8B-Instruct
+  repo_id: Goodfire/Llama-3.1-8B-Instruct-SAE-l19
+  saes:
+  - id: layer_19
+    path: Llama-3.1-8B-Instruct-SAE-l19.pth
+    l0: 91

sae_lens/saes/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .standard_sae import (
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
 )
+from .temporal_sae import TemporalSAE, TemporalSAEConfig
 from .topk_sae import (
     TopKSAE,
     TopKSAEConfig,
@@ -71,4 +72,6 @@ __all__ = [
     "JumpReLUTranscoderConfig",
     "MatryoshkaBatchTopKTrainingSAE",
     "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]

sae_lens/saes/sae.py CHANGED Viewed

@@ -21,7 +21,7 @@ import einops
 import torch
 from jaxtyping import Float
 from numpy.typing import NDArray
-from safetensors.torch import save_file
+from safetensors.torch import load_file, save_file
 from torch import nn
 from transformer_lens.hook_points import HookedRootModule, HookPoint
 from typing_extensions import deprecated, overload, override
@@ -155,9 +155,9 @@ class SAEConfig(ABC):
     dtype: str = "float32"
     device: str = "cpu"
     apply_b_dec_to_input: bool = True
-    normalize_activations: Literal[
-        "none", "expected_average_only_in", "constant_norm_rescale", "layer_norm"
-    ] = "none"  # none, expected_average_only_in (Anthropic April Update), constant_norm_rescale (Anthropic Feb Update)
+    normalize_activations: Literal["none", "expected_average_only_in", "layer_norm"] = (
+        "none"  # none, expected_average_only_in (Anthropic April Update)
+    )
     reshape_activations: Literal["none", "hook_z"] = "none"
     metadata: SAEMetadata = field(default_factory=SAEMetadata)
@@ -309,6 +309,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
             self.run_time_activation_norm_fn_in = run_time_activation_norm_fn_in
             self.run_time_activation_norm_fn_out = run_time_activation_norm_fn_out
         elif self.cfg.normalize_activations == "layer_norm":
             #  we need to scale the norm of the input and store the scaling factor
             def run_time_activation_ln_in(
@@ -452,23 +453,14 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     def process_sae_in(
         self, sae_in: Float[torch.Tensor, "... d_in"]
     ) -> Float[torch.Tensor, "... d_in"]:
-        # print(f"Input shape to process_sae_in: {sae_in.shape}")
-        # print(f"self.cfg.hook_name: {self.cfg.hook_name}")
-        # print(f"self.b_dec shape: {self.b_dec.shape}")
-        # print(f"Hook z reshaping mode: {getattr(self, 'hook_z_reshaping_mode', False)}")
         sae_in = sae_in.to(self.dtype)
-        # print(f"Shape before reshape_fn_in: {sae_in.shape}")
         sae_in = self.reshape_fn_in(sae_in)
-        # print(f"Shape after reshape_fn_in: {sae_in.shape}")
         sae_in = self.hook_sae_input(sae_in)
         sae_in = self.run_time_activation_norm_fn_in(sae_in)
         # Here's where the error happens
         bias_term = self.b_dec * self.cfg.apply_b_dec_to_input
-        # print(f"Bias term shape: {bias_term.shape}")
         return sae_in - bias_term
@@ -1018,6 +1010,12 @@ class TrainingSAE(SAE[T_TRAINING_SAE_CONFIG], ABC):
     ) -> type[TrainingSAEConfig]:
         return get_sae_training_class(architecture)[1]
+    def load_weights_from_checkpoint(self, checkpoint_path: Path | str) -> None:
+        checkpoint_path = Path(checkpoint_path)
+        state_dict = load_file(checkpoint_path / SAE_WEIGHTS_FILENAME)
+        self.process_state_dict_for_loading(state_dict)
+        self.load_state_dict(state_dict)
 _blank_hook = nn.Identity()

sae-lens 6.16.3__py3-none-any.whl → 6.21.0__py3-none-any.whl

Potentially problematic release.

sae-lens 6.16.3py3-none-any.whl → 6.21.0py3-none-any.whl