PyPI - sae-lens - Versions diffs - 6.15.0__py3-none-any.whl → 6.24.1__py3-none-any.whl - Mend

sae-lens 6.15.0py3-none-any.whl → 6.24.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

sae_lens/__init__.py +13 -1
sae_lens/analysis/hooked_sae_transformer.py +4 -13
sae_lens/cache_activations_runner.py +3 -4
sae_lens/config.py +39 -2
sae_lens/constants.py +1 -0
sae_lens/llm_sae_training_runner.py +9 -4
sae_lens/loading/pretrained_sae_loaders.py +430 -24
sae_lens/loading/pretrained_saes_directory.py +5 -3
sae_lens/pretokenize_runner.py +3 -3
sae_lens/pretrained_saes.yaml +26977 -65
sae_lens/saes/__init__.py +7 -0
sae_lens/saes/batchtopk_sae.py +3 -1
sae_lens/saes/gated_sae.py +6 -11
sae_lens/saes/jumprelu_sae.py +8 -13
sae_lens/saes/matryoshka_batchtopk_sae.py +8 -15
sae_lens/saes/sae.py +20 -32
sae_lens/saes/standard_sae.py +4 -9
sae_lens/saes/temporal_sae.py +365 -0
sae_lens/saes/topk_sae.py +8 -11
sae_lens/saes/transcoder.py +41 -0
sae_lens/training/activation_scaler.py +7 -0
sae_lens/training/activations_store.py +54 -12
sae_lens/training/optim.py +11 -0
sae_lens/training/sae_trainer.py +50 -11
{sae_lens-6.15.0.dist-info → sae_lens-6.24.1.dist-info}/METADATA +16 -16
sae_lens-6.24.1.dist-info/RECORD +41 -0
sae_lens-6.15.0.dist-info/RECORD +0 -40
{sae_lens-6.15.0.dist-info → sae_lens-6.24.1.dist-info}/WHEEL +0 -0
{sae_lens-6.15.0.dist-info → sae_lens-6.24.1.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.15.0"
+__version__ = "6.24.1"
 import logging
@@ -15,6 +15,8 @@ from sae_lens.saes import (
     GatedTrainingSAEConfig,
     JumpReLUSAE,
     JumpReLUSAEConfig,
+    JumpReLUSkipTranscoder,
+    JumpReLUSkipTranscoderConfig,
     JumpReLUTrainingSAE,
     JumpReLUTrainingSAEConfig,
     JumpReLUTranscoder,
@@ -28,6 +30,8 @@ from sae_lens.saes import (
     StandardSAEConfig,
     StandardTrainingSAE,
     StandardTrainingSAEConfig,
+    TemporalSAE,
+    TemporalSAEConfig,
     TopKSAE,
     TopKSAEConfig,
     TopKTrainingSAE,
@@ -103,8 +107,12 @@ __all__ = [
     "SkipTranscoderConfig",
     "JumpReLUTranscoder",
     "JumpReLUTranscoderConfig",
+    "JumpReLUSkipTranscoder",
+    "JumpReLUSkipTranscoderConfig",
     "MatryoshkaBatchTopKTrainingSAE",
     "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
 ]
@@ -127,3 +135,7 @@ register_sae_training_class(
 register_sae_class("transcoder", Transcoder, TranscoderConfig)
 register_sae_class("skip_transcoder", SkipTranscoder, SkipTranscoderConfig)
 register_sae_class("jumprelu_transcoder", JumpReLUTranscoder, JumpReLUTranscoderConfig)
+register_sae_class(
+    "jumprelu_skip_transcoder", JumpReLUSkipTranscoder, JumpReLUSkipTranscoderConfig
+)
+register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)

sae_lens/analysis/hooked_sae_transformer.py CHANGED Viewed

@@ -3,7 +3,6 @@ from contextlib import contextmanager
 from typing import Any, Callable
 import torch
-from jaxtyping import Float
 from transformer_lens.ActivationCache import ActivationCache
 from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
 from transformer_lens.hook_points import HookPoint  # Hooking utilities
@@ -11,8 +10,8 @@ from transformer_lens.HookedTransformer import HookedTransformer
 from sae_lens.saes.sae import SAE
-SingleLoss = Float[torch.Tensor, ""]  # Type alias for a single element tensor
-LossPerToken = Float[torch.Tensor, "batch pos-1"]
+SingleLoss = torch.Tensor  # Type alias for a single element tensor
+LossPerToken = torch.Tensor
 Loss = SingleLoss | LossPerToken
@@ -171,12 +170,7 @@ class HookedSAETransformer(HookedTransformer):
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         **model_kwargs: Any,
-    ) -> (
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss]
-    ):
+    ) -> None | torch.Tensor | Loss | tuple[torch.Tensor, Loss]:
         """Wrapper around HookedTransformer forward pass.
         Runs the model with the given SAEs attached for one forward pass, then removes them. By default, will reset all SAEs to original state after.
@@ -203,10 +197,7 @@ class HookedSAETransformer(HookedTransformer):
         remove_batch_dim: bool = False,
         **kwargs: Any,
     ) -> tuple[
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss],
+        None | torch.Tensor | Loss | tuple[torch.Tensor, Loss],
         ActivationCache | dict[str, torch.Tensor],
     ]:
         """Wrapper around 'run_with_cache' in HookedTransformer.

sae_lens/cache_activations_runner.py CHANGED Viewed

@@ -9,8 +9,7 @@ import torch
 from datasets import Array2D, Dataset, Features, Sequence, Value
 from datasets.fingerprint import generate_fingerprint
 from huggingface_hub import HfApi
-from jaxtyping import Float, Int
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger
@@ -318,8 +317,8 @@ class CacheActivationsRunner:
     def _create_shard(
         self,
         buffer: tuple[
-            Float[torch.Tensor, "(bs context_size) d_in"],
-            Int[torch.Tensor, "(bs context_size)"] | None,
+            torch.Tensor,  # shape: (bs context_size) d_in
+            torch.Tensor | None,  # shape: (bs context_size) or None
         ],
     ) -> Dataset:
         hook_names = [self.cfg.hook_name]

sae_lens/config.py CHANGED Viewed

@@ -18,6 +18,7 @@ from datasets import (
 from sae_lens import __version__, logger
 from sae_lens.constants import DTYPE_MAP
+from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 if TYPE_CHECKING:
@@ -171,6 +172,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         n_checkpoints (int): The number of checkpoints to save during training. 0 means no checkpoints.
         checkpoint_path (str | None): The path to save checkpoints. A unique ID will be appended to this path. Set to None to disable checkpoint saving. (default is "checkpoints")
         save_final_checkpoint (bool): Whether to include an additional final checkpoint when training is finished. (default is False).
+        resume_from_checkpoint (str | None): The path to the checkpoint to resume training from. (default is None).
         output_path (str | None): The path to save outputs. Set to None to disable output saving. (default is "output")
         verbose (bool): Whether to print verbose output. (default is True)
         model_kwargs (dict[str, Any]): Keyword arguments for `model.run_with_cache`
@@ -261,6 +263,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     checkpoint_path: str | None = "checkpoints"
     save_final_checkpoint: bool = False
     output_path: str | None = "output"
+    resume_from_checkpoint: str | None = None
     # Misc
     verbose: bool = True
@@ -385,8 +388,11 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         return self.sae.to_dict()
     def to_dict(self) -> dict[str, Any]:
-        # Make a shallow copy of config's dictionary
-        d = dict(self.__dict__)
+        """
+        Convert the config to a dictionary.
+        """
+        d = asdict(self)
         d["logger"] = asdict(self.logger)
         d["sae"] = self.sae.to_dict()
@@ -396,6 +402,37 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         d["act_store_device"] = str(self.act_store_device)
         return d
+    @classmethod
+    def from_dict(cls, cfg_dict: dict[str, Any]) -> "LanguageModelSAERunnerConfig[Any]":
+        """
+        Load a LanguageModelSAERunnerConfig from a dictionary given by `to_dict`.
+        Args:
+            cfg_dict (dict[str, Any]): The dictionary to load the config from.
+        Returns:
+            LanguageModelSAERunnerConfig: The loaded config.
+        """
+        if "sae" not in cfg_dict:
+            raise ValueError("sae field is required in the config dictionary")
+        if "architecture" not in cfg_dict["sae"]:
+            raise ValueError("architecture field is required in the sae dictionary")
+        if "logger" not in cfg_dict:
+            raise ValueError("logger field is required in the config dictionary")
+        sae_config_class = get_sae_training_class(cfg_dict["sae"]["architecture"])[1]
+        sae_cfg = sae_config_class.from_dict(cfg_dict["sae"])
+        logger_cfg = LoggingConfig(**cfg_dict["logger"])
+        updated_cfg_dict: dict[str, Any] = {
+            **cfg_dict,
+            "sae": sae_cfg,
+            "logger": logger_cfg,
+        }
+        output = cls(**updated_cfg_dict)
+        # the post_init always appends to checkpoint path, so we need to set it explicitly here.
+        if "checkpoint_path" in cfg_dict:
+            output.checkpoint_path = cfg_dict["checkpoint_path"]
+        return output
     def to_sae_trainer_config(self) -> "SAETrainerConfig":
         return SAETrainerConfig(
             n_checkpoints=self.n_checkpoints,

sae_lens/constants.py CHANGED Viewed

@@ -17,5 +17,6 @@ SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
 SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
+TRAINER_STATE_FILENAME = "trainer_state.pt"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

sae_lens/llm_sae_training_runner.py CHANGED Viewed

@@ -16,7 +16,6 @@ from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import (
-    ACTIVATIONS_STORE_STATE_FILENAME,
     RUNNER_CFG_FILENAME,
     SPARSITY_FILENAME,
 )
@@ -112,6 +111,7 @@ class LanguageModelSAETrainingRunner:
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
         override_sae: TrainingSAE[Any] | None = None,
+        resume_from_checkpoint: Path | str | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -153,6 +153,7 @@ class LanguageModelSAETrainingRunner:
                 )
         else:
             self.sae = override_sae
         self.sae.to(self.cfg.device)
     def run(self):
@@ -185,6 +186,12 @@ class LanguageModelSAETrainingRunner:
             cfg=self.cfg.to_sae_trainer_config(),
         )
+        if self.cfg.resume_from_checkpoint is not None:
+            logger.info(f"Resuming from checkpoint: {self.cfg.resume_from_checkpoint}")
+            trainer.load_trainer_state(self.cfg.resume_from_checkpoint)
+            self.sae.load_weights_from_checkpoint(self.cfg.resume_from_checkpoint)
+            self.activations_store.load_from_checkpoint(self.cfg.resume_from_checkpoint)
         self._compile_if_needed()
         sae = self.run_trainer_with_interruption_handling(trainer)
@@ -304,9 +311,7 @@ class LanguageModelSAETrainingRunner:
         if checkpoint_path is None:
             return
-        self.activations_store.save(
-            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
-        )
+        self.activations_store.save_to_checkpoint(checkpoint_path)
         runner_config = self.cfg.to_dict()
         with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:

sae-lens 6.15.0__py3-none-any.whl → 6.24.1__py3-none-any.whl

sae-lens 6.15.0py3-none-any.whl → 6.24.1py3-none-any.whl