PyPI - sae-lens - Versions diffs - 6.16.3__tar.gz → 6.18.0__tar.gz - Mend

sae-lens 6.16.3tar.gz → 6.18.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sae-lens might be problematic. Click here for more details.

Files changed (40) hide show

{sae_lens-6.16.3 → sae_lens-6.18.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.16.3
+Version: 6.18.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE

{sae_lens-6.16.3 → sae_lens-6.18.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sae-lens"
-version = "6.16.3"
+version = "6.18.0"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.16.3"
+__version__ = "6.18.0"
 import logging

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/cache_activations_runner.py RENAMED Viewed

@@ -10,7 +10,7 @@ from datasets import Array2D, Dataset, Features, Sequence, Value
 from datasets.fingerprint import generate_fingerprint
 from huggingface_hub import HfApi
 from jaxtyping import Float, Int
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/config.py RENAMED Viewed

@@ -18,6 +18,7 @@ from datasets import (
 from sae_lens import __version__, logger
 from sae_lens.constants import DTYPE_MAP
+from sae_lens.registry import get_sae_training_class
 from sae_lens.saes.sae import TrainingSAEConfig
 if TYPE_CHECKING:
@@ -171,6 +172,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         n_checkpoints (int): The number of checkpoints to save during training. 0 means no checkpoints.
         checkpoint_path (str | None): The path to save checkpoints. A unique ID will be appended to this path. Set to None to disable checkpoint saving. (default is "checkpoints")
         save_final_checkpoint (bool): Whether to include an additional final checkpoint when training is finished. (default is False).
+        resume_from_checkpoint (str | None): The path to the checkpoint to resume training from. (default is None).
         output_path (str | None): The path to save outputs. Set to None to disable output saving. (default is "output")
         verbose (bool): Whether to print verbose output. (default is True)
         model_kwargs (dict[str, Any]): Keyword arguments for `model.run_with_cache`
@@ -261,6 +263,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     checkpoint_path: str | None = "checkpoints"
     save_final_checkpoint: bool = False
     output_path: str | None = "output"
+    resume_from_checkpoint: str | None = None
     # Misc
     verbose: bool = True
@@ -385,8 +388,11 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         return self.sae.to_dict()
     def to_dict(self) -> dict[str, Any]:
-        # Make a shallow copy of config's dictionary
-        d = dict(self.__dict__)
+        """
+        Convert the config to a dictionary.
+        """
+        d = asdict(self)
         d["logger"] = asdict(self.logger)
         d["sae"] = self.sae.to_dict()
@@ -396,6 +402,37 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         d["act_store_device"] = str(self.act_store_device)
         return d
+    @classmethod
+    def from_dict(cls, cfg_dict: dict[str, Any]) -> "LanguageModelSAERunnerConfig[Any]":
+        """
+        Load a LanguageModelSAERunnerConfig from a dictionary given by `to_dict`.
+        Args:
+            cfg_dict (dict[str, Any]): The dictionary to load the config from.
+        Returns:
+            LanguageModelSAERunnerConfig: The loaded config.
+        """
+        if "sae" not in cfg_dict:
+            raise ValueError("sae field is required in the config dictionary")
+        if "architecture" not in cfg_dict["sae"]:
+            raise ValueError("architecture field is required in the sae dictionary")
+        if "logger" not in cfg_dict:
+            raise ValueError("logger field is required in the config dictionary")
+        sae_config_class = get_sae_training_class(cfg_dict["sae"]["architecture"])[1]
+        sae_cfg = sae_config_class.from_dict(cfg_dict["sae"])
+        logger_cfg = LoggingConfig(**cfg_dict["logger"])
+        updated_cfg_dict: dict[str, Any] = {
+            **cfg_dict,
+            "sae": sae_cfg,
+            "logger": logger_cfg,
+        }
+        output = cls(**updated_cfg_dict)
+        # the post_init always appends to checkpoint path, so we need to set it explicitly here.
+        if "checkpoint_path" in cfg_dict:
+            output.checkpoint_path = cfg_dict["checkpoint_path"]
+        return output
     def to_sae_trainer_config(self) -> "SAETrainerConfig":
         return SAETrainerConfig(
             n_checkpoints=self.n_checkpoints,

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/constants.py RENAMED Viewed

@@ -17,5 +17,6 @@ SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
 SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
+TRAINER_STATE_FILENAME = "trainer_state.pt"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/llm_sae_training_runner.py RENAMED Viewed

@@ -16,7 +16,6 @@ from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import (
-    ACTIVATIONS_STORE_STATE_FILENAME,
     RUNNER_CFG_FILENAME,
     SPARSITY_FILENAME,
 )
@@ -112,6 +111,7 @@ class LanguageModelSAETrainingRunner:
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
         override_sae: TrainingSAE[Any] | None = None,
+        resume_from_checkpoint: Path | str | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -153,6 +153,7 @@ class LanguageModelSAETrainingRunner:
                 )
         else:
             self.sae = override_sae
         self.sae.to(self.cfg.device)
     def run(self):
@@ -185,6 +186,12 @@ class LanguageModelSAETrainingRunner:
             cfg=self.cfg.to_sae_trainer_config(),
         )
+        if self.cfg.resume_from_checkpoint is not None:
+            logger.info(f"Resuming from checkpoint: {self.cfg.resume_from_checkpoint}")
+            trainer.load_trainer_state(self.cfg.resume_from_checkpoint)
+            self.sae.load_weights_from_checkpoint(self.cfg.resume_from_checkpoint)
+            self.activations_store.load_from_checkpoint(self.cfg.resume_from_checkpoint)
         self._compile_if_needed()
         sae = self.run_trainer_with_interruption_handling(trainer)
@@ -304,9 +311,7 @@ class LanguageModelSAETrainingRunner:
         if checkpoint_path is None:
             return
-        self.activations_store.save(
-            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
-        )
+        self.activations_store.save_to_checkpoint(checkpoint_path)
         runner_config = self.cfg.to_dict()
         with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/saes/sae.py RENAMED Viewed

@@ -21,7 +21,7 @@ import einops
 import torch
 from jaxtyping import Float
 from numpy.typing import NDArray
-from safetensors.torch import save_file
+from safetensors.torch import load_file, save_file
 from torch import nn
 from transformer_lens.hook_points import HookedRootModule, HookPoint
 from typing_extensions import deprecated, overload, override
@@ -1018,6 +1018,12 @@ class TrainingSAE(SAE[T_TRAINING_SAE_CONFIG], ABC):
     ) -> type[TrainingSAEConfig]:
         return get_sae_training_class(architecture)[1]
+    def load_weights_from_checkpoint(self, checkpoint_path: Path | str) -> None:
+        checkpoint_path = Path(checkpoint_path)
+        state_dict = load_file(checkpoint_path / SAE_WEIGHTS_FILENAME)
+        self.process_state_dict_for_loading(state_dict)
+        self.load_state_dict(state_dict)
 _blank_hook = nn.Identity()

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/training/activation_scaler.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import json
 from dataclasses import dataclass
+from pathlib import Path
 from statistics import mean
 import torch
@@ -51,3 +52,9 @@ class ActivationScaler:
         with open(file_path, "w") as f:
             json.dump({"scaling_factor": self.scaling_factor}, f)
+    def load(self, file_path: str | Path):
+        """load the state dict from a file in json format"""
+        with open(file_path) as f:
+            data = json.load(f)
+            self.scaling_factor = data["scaling_factor"]

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/training/activations_store.py RENAMED Viewed

@@ -4,6 +4,7 @@ import json
 import os
 import warnings
 from collections.abc import Generator, Iterator, Sequence
+from pathlib import Path
 from typing import Any, Literal, cast
 import datasets
@@ -13,8 +14,8 @@ from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import HfHubHTTPError
 from jaxtyping import Float, Int
 from requests import HTTPError
-from safetensors.torch import save_file
-from tqdm import tqdm
+from safetensors.torch import load_file, save_file
+from tqdm.auto import tqdm
 from transformer_lens.hook_points import HookedRootModule
 from transformers import AutoTokenizer, PreTrainedTokenizerBase
@@ -24,7 +25,7 @@ from sae_lens.config import (
     HfDataset,
     LanguageModelSAERunnerConfig,
 )
-from sae_lens.constants import DTYPE_MAP
+from sae_lens.constants import ACTIVATIONS_STORE_STATE_FILENAME, DTYPE_MAP
 from sae_lens.pretokenize_runner import get_special_token_from_cfg
 from sae_lens.saes.sae import SAE, T_SAE_CONFIG, T_TRAINING_SAE_CONFIG
 from sae_lens.tokenization_and_batching import concat_and_batch_sequences
@@ -729,6 +730,48 @@ class ActivationsStore:
         """save the state dict to a file in safetensors format"""
         save_file(self.state_dict(), file_path)
+    def save_to_checkpoint(self, checkpoint_path: str | Path):
+        """Save the state dict to a checkpoint path"""
+        self.save(str(Path(checkpoint_path) / ACTIVATIONS_STORE_STATE_FILENAME))
+    def load_from_checkpoint(self, checkpoint_path: str | Path):
+        """Load the state dict from a checkpoint path"""
+        self.load(str(Path(checkpoint_path) / ACTIVATIONS_STORE_STATE_FILENAME))
+    def load(self, file_path: str):
+        """Load the state dict from a file in safetensors format"""
+        state_dict = load_file(file_path)
+        if "n_dataset_processed" in state_dict:
+            target_n_dataset_processed = state_dict["n_dataset_processed"].item()
+            # Only fast-forward if needed
+            if target_n_dataset_processed > self.n_dataset_processed:
+                logger.info(
+                    "Fast-forwarding through dataset samples to match checkpoint position"
+                )
+                samples_to_skip = target_n_dataset_processed - self.n_dataset_processed
+                pbar = tqdm(
+                    total=samples_to_skip,
+                    desc="Fast-forwarding through dataset",
+                    leave=False,
+                )
+                while target_n_dataset_processed > self.n_dataset_processed:
+                    start = self.n_dataset_processed
+                    try:
+                        # Just consume and ignore the values to fast-forward
+                        next(self.iterable_sequences)
+                    except StopIteration:
+                        logger.warning(
+                            "Dataset exhausted during fast-forward. Resetting dataset."
+                        )
+                        self.iterable_sequences = self._iterate_tokenized_sequences()
+                    pbar.update(self.n_dataset_processed - start)
+                pbar.close()
 def validate_pretokenized_dataset_tokenizer(
     dataset_path: str, model_tokenizer: PreTrainedTokenizerBase

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/training/optim.py RENAMED Viewed

@@ -2,6 +2,8 @@
 Took the LR scheduler from my previous work: https://github.com/jbloomAus/DecisionTransformerInterpretability/blob/ee55df35cdb92e81d689c72fb9dd5a7252893363/src/decision_transformer/utils.py#L425
 """
+from typing import Any
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
@@ -150,3 +152,12 @@ class CoefficientScheduler:
     def value(self) -> float:
         """Returns the current scalar value."""
         return self.current_value
+    def state_dict(self) -> dict[str, Any]:
+        return {
+            "current_step": self.current_step,
+        }
+    def load_state_dict(self, state_dict: dict[str, Any]):
+        for k in state_dict:
+            setattr(self, k, state_dict[k])

{sae_lens-6.16.3 → sae_lens-6.18.0}/sae_lens/training/sae_trainer.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import contextlib
+import math
 from pathlib import Path
 from typing import Any, Callable, Generic, Protocol
@@ -10,7 +11,11 @@ from tqdm.auto import tqdm
 from sae_lens import __version__
 from sae_lens.config import SAETrainerConfig
-from sae_lens.constants import ACTIVATION_SCALER_CFG_FILENAME, SPARSITY_FILENAME
+from sae_lens.constants import (
+    ACTIVATION_SCALER_CFG_FILENAME,
+    SPARSITY_FILENAME,
+    TRAINER_STATE_FILENAME,
+)
 from sae_lens.saes.sae import (
     T_TRAINING_SAE,
     T_TRAINING_SAE_CONFIG,
@@ -56,6 +61,7 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
     data_provider: DataProvider
     activation_scaler: ActivationScaler
     evaluator: Evaluator[T_TRAINING_SAE] | None
+    coefficient_schedulers: dict[str, CoefficientScheduler]
     def __init__(
         self,
@@ -84,7 +90,9 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
                 range(
                     0,
                     cfg.total_training_samples,
-                    cfg.total_training_samples // self.cfg.n_checkpoints,
+                    math.ceil(
+                        cfg.total_training_samples / (self.cfg.n_checkpoints + 1)
+                    ),
                 )
             )[1:]
@@ -93,11 +101,6 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
             sae.cfg.d_sae, device=cfg.device
         )
         self.n_frac_active_samples = 0
-        # we don't train the scaling factor (initially)
-        # set requires grad to false for the scaling factor
-        for name, param in self.sae.named_parameters():
-            if "scaling_factor" in name:
-                param.requires_grad = False
         self.optimizer = Adam(
             sae.parameters(),
@@ -210,10 +213,7 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
                 sparsity_path = checkpoint_path / SPARSITY_FILENAME
                 save_file({"sparsity": self.log_feature_sparsity}, sparsity_path)
-                activation_scaler_path = (
-                    checkpoint_path / ACTIVATION_SCALER_CFG_FILENAME
-                )
-                self.activation_scaler.save(str(activation_scaler_path))
+                self.save_trainer_state(checkpoint_path)
                 if self.cfg.logger.log_to_wandb:
                     self.cfg.logger.log(
@@ -227,6 +227,44 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
         if self.save_checkpoint_fn is not None:
             self.save_checkpoint_fn(checkpoint_path=checkpoint_path)
+    def save_trainer_state(self, checkpoint_path: Path) -> None:
+        checkpoint_path.mkdir(exist_ok=True, parents=True)
+        scheduler_state_dicts = {
+            name: scheduler.state_dict()
+            for name, scheduler in self.coefficient_schedulers.items()
+        }
+        torch.save(
+            {
+                "optimizer": self.optimizer.state_dict(),
+                "lr_scheduler": self.lr_scheduler.state_dict(),
+                "n_training_samples": self.n_training_samples,
+                "n_training_steps": self.n_training_steps,
+                "act_freq_scores": self.act_freq_scores,
+                "n_forward_passes_since_fired": self.n_forward_passes_since_fired,
+                "n_frac_active_samples": self.n_frac_active_samples,
+                "started_fine_tuning": self.started_fine_tuning,
+                "coefficient_schedulers": scheduler_state_dicts,
+            },
+            str(checkpoint_path / TRAINER_STATE_FILENAME),
+        )
+        activation_scaler_path = checkpoint_path / ACTIVATION_SCALER_CFG_FILENAME
+        self.activation_scaler.save(str(activation_scaler_path))
+    def load_trainer_state(self, checkpoint_path: Path | str) -> None:
+        checkpoint_path = Path(checkpoint_path)
+        self.activation_scaler.load(checkpoint_path / ACTIVATION_SCALER_CFG_FILENAME)
+        state_dict = torch.load(checkpoint_path / TRAINER_STATE_FILENAME)
+        self.optimizer.load_state_dict(state_dict["optimizer"])
+        self.lr_scheduler.load_state_dict(state_dict["lr_scheduler"])
+        self.n_training_samples = state_dict["n_training_samples"]
+        self.n_training_steps = state_dict["n_training_steps"]
+        self.act_freq_scores = state_dict["act_freq_scores"]
+        self.n_forward_passes_since_fired = state_dict["n_forward_passes_since_fired"]
+        self.n_frac_active_samples = state_dict["n_frac_active_samples"]
+        self.started_fine_tuning = state_dict["started_fine_tuning"]
+        for name, scheduler_state_dict in state_dict["coefficient_schedulers"].items():
+            self.coefficient_schedulers[name].load_state_dict(scheduler_state_dict)
     def _train_step(
         self,
         sae: T_TRAINING_SAE,