PyPI - sae-lens - Versions diffs - 6.0.0rc1__tar.gz → 6.0.0rc3__tar.gz - Mend

sae-lens 6.0.0rc1tar.gz → 6.0.0rc3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{sae_lens-6.0.0rc1 → sae_lens-6.0.0rc3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sae-lens
-Version: 6.0.0rc1
+Version: 6.0.0rc3
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch

{sae_lens-6.0.0rc1 → sae_lens-6.0.0rc3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sae-lens"
-version = "6.0.0-rc.1"
+version = "6.0.0-rc.3"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"
@@ -52,13 +52,14 @@ boto3 = "^1.34.101"
 docstr-coverage = "^2.3.2"
 mkdocs = "^1.6.1"
 mkdocs-material = "^9.5.34"
-mkdocs-autorefs = "^1.1.0"
+mkdocs-autorefs = "^1.4.2"
 mkdocs-section-index = "^0.3.9"
 mkdocstrings = "^0.25.2"
 mkdocstrings-python = "^1.10.9"
 tabulate = "^0.9.0"
 ruff = "^0.7.4"
 eai-sparsify = "^1.1.1"
+mike = "^2.0.0"
 [tool.poetry.extras]
 mamba = ["mamba-lens"]

sae_lens-6.0.0rc3/sae_lens/__init__.py ADDED Viewed

@@ -0,0 +1,98 @@
+# ruff: noqa: E402
+__version__ = "6.0.0-rc.3"
+import logging
+logger = logging.getLogger(__name__)
+from sae_lens.saes import (
+    SAE,
+    GatedSAE,
+    GatedSAEConfig,
+    GatedTrainingSAE,
+    GatedTrainingSAEConfig,
+    JumpReLUSAE,
+    JumpReLUSAEConfig,
+    JumpReLUTrainingSAE,
+    JumpReLUTrainingSAEConfig,
+    SAEConfig,
+    StandardSAE,
+    StandardSAEConfig,
+    StandardTrainingSAE,
+    StandardTrainingSAEConfig,
+    TopKSAE,
+    TopKSAEConfig,
+    TopKTrainingSAE,
+    TopKTrainingSAEConfig,
+    TrainingSAE,
+    TrainingSAEConfig,
+)
+from .analysis.hooked_sae_transformer import HookedSAETransformer
+from .cache_activations_runner import CacheActivationsRunner
+from .config import (
+    CacheActivationsRunnerConfig,
+    LanguageModelSAERunnerConfig,
+    LoggingConfig,
+    PretokenizeRunnerConfig,
+)
+from .evals import run_evals
+from .llm_sae_training_runner import LanguageModelSAETrainingRunner, SAETrainingRunner
+from .loading.pretrained_sae_loaders import (
+    PretrainedSaeDiskLoader,
+    PretrainedSaeHuggingfaceLoader,
+)
+from .pretokenize_runner import PretokenizeRunner, pretokenize_runner
+from .registry import register_sae_class, register_sae_training_class
+from .training.activations_store import ActivationsStore
+from .training.upload_saes_to_huggingface import upload_saes_to_huggingface
+__all__ = [
+    "SAE",
+    "SAEConfig",
+    "TrainingSAE",
+    "TrainingSAEConfig",
+    "HookedSAETransformer",
+    "ActivationsStore",
+    "LanguageModelSAERunnerConfig",
+    "LanguageModelSAETrainingRunner",
+    "CacheActivationsRunnerConfig",
+    "CacheActivationsRunner",
+    "PretokenizeRunnerConfig",
+    "PretokenizeRunner",
+    "pretokenize_runner",
+    "run_evals",
+    "upload_saes_to_huggingface",
+    "PretrainedSaeHuggingfaceLoader",
+    "PretrainedSaeDiskLoader",
+    "register_sae_class",
+    "register_sae_training_class",
+    "StandardSAE",
+    "StandardSAEConfig",
+    "StandardTrainingSAE",
+    "StandardTrainingSAEConfig",
+    "GatedSAE",
+    "GatedSAEConfig",
+    "GatedTrainingSAE",
+    "GatedTrainingSAEConfig",
+    "TopKSAE",
+    "TopKSAEConfig",
+    "TopKTrainingSAE",
+    "TopKTrainingSAEConfig",
+    "JumpReLUSAE",
+    "JumpReLUSAEConfig",
+    "JumpReLUTrainingSAE",
+    "JumpReLUTrainingSAEConfig",
+    "SAETrainingRunner",
+    "LoggingConfig",
+]
+register_sae_class("standard", StandardSAE, StandardSAEConfig)
+register_sae_training_class("standard", StandardTrainingSAE, StandardTrainingSAEConfig)
+register_sae_class("gated", GatedSAE, GatedSAEConfig)
+register_sae_training_class("gated", GatedTrainingSAE, GatedTrainingSAEConfig)
+register_sae_class("topk", TopKSAE, TopKSAEConfig)
+register_sae_training_class("topk", TopKTrainingSAE, TopKTrainingSAEConfig)
+register_sae_class("jumprelu", JumpReLUSAE, JumpReLUSAEConfig)
+register_sae_training_class("jumprelu", JumpReLUTrainingSAE, JumpReLUTrainingSAEConfig)

{sae_lens-6.0.0rc1 → sae_lens-6.0.0rc3}/sae_lens/analysis/hooked_sae_transformer.py RENAMED Viewed

@@ -68,7 +68,7 @@ class HookedSAETransformer(HookedTransformer):
         super().__init__(*model_args, **model_kwargs)
         self.acts_to_saes: dict[str, SAE] = {}  # type: ignore
-    def add_sae(self, sae: SAE, use_error_term: bool | None = None):
+    def add_sae(self, sae: SAE[Any], use_error_term: bool | None = None):
         """Attaches an SAE to the model
         WARNING: This sae will be permanantly attached until you remove it with reset_saes. This function will also overwrite any existing SAE attached to the same hook point.
@@ -77,7 +77,7 @@ class HookedSAETransformer(HookedTransformer):
             sae: SparseAutoencoderBase. The SAE to attach to the model
             use_error_term: (bool | None) If provided, will set the use_error_term attribute of the SAE to this value. Determines whether the SAE returns input or reconstruction. Defaults to None.
         """
-        act_name = sae.cfg.hook_name
+        act_name = sae.cfg.metadata.hook_name
         if (act_name not in self.acts_to_saes) and (act_name not in self.hook_dict):
             logging.warning(
                 f"No hook found for {act_name}. Skipping. Check model.hook_dict for available hooks."
@@ -92,7 +92,7 @@ class HookedSAETransformer(HookedTransformer):
         set_deep_attr(self, act_name, sae)
         self.setup()
-    def _reset_sae(self, act_name: str, prev_sae: SAE | None = None):
+    def _reset_sae(self, act_name: str, prev_sae: SAE[Any] | None = None):
         """Resets an SAE that was attached to the model
         By default will remove the SAE from that hook_point.
@@ -124,7 +124,7 @@ class HookedSAETransformer(HookedTransformer):
     def reset_saes(
         self,
         act_names: str | list[str] | None = None,
-        prev_saes: list[SAE | None] | None = None,
+        prev_saes: list[SAE[Any] | None] | None = None,
     ):
         """Reset the SAEs attached to the model
@@ -154,7 +154,7 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         **model_kwargs: Any,
@@ -183,7 +183,7 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_cache_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         return_cache_object: bool = True,
@@ -225,7 +225,7 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_hooks_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         fwd_hooks: list[tuple[str | Callable, Callable]] = [],  # type: ignore
         bwd_hooks: list[tuple[str | Callable, Callable]] = [],  # type: ignore
@@ -261,7 +261,7 @@ class HookedSAETransformer(HookedTransformer):
     @contextmanager
     def saes(
         self,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
     ):
@@ -295,8 +295,8 @@ class HookedSAETransformer(HookedTransformer):
             saes = [saes]
         try:
             for sae in saes:
-                act_names_to_reset.append(sae.cfg.hook_name)
-                prev_sae = self.acts_to_saes.get(sae.cfg.hook_name, None)
+                act_names_to_reset.append(sae.cfg.metadata.hook_name)
+                prev_sae = self.acts_to_saes.get(sae.cfg.metadata.hook_name, None)
                 prev_saes.append(prev_sae)
                 self.add_sae(sae, use_error_term=use_error_term)
             yield self

{sae_lens-6.0.0rc1 → sae_lens-6.0.0rc3}/sae_lens/analysis/neuronpedia_integration.py RENAMED Viewed

@@ -58,7 +58,7 @@ def NanAndInfReplacer(value: str):
     return NAN_REPLACEMENT
-def open_neuronpedia_feature_dashboard(sae: SAE, index: int):
+def open_neuronpedia_feature_dashboard(sae: SAE[Any], index: int):
     sae_id = sae.cfg.neuronpedia_id
     if sae_id is None:
         logger.warning(
@@ -70,7 +70,7 @@ def open_neuronpedia_feature_dashboard(sae: SAE, index: int):
 def get_neuronpedia_quick_list(
-    sae: SAE,
+    sae: SAE[Any],
     features: list[int],
     name: str = "temporary_list",
 ):
@@ -157,9 +157,10 @@ def sleep_identity(x: T) -> T:
 @retry(wait=wait_random_exponential(min=1, max=500), stop=stop_after_attempt(10))
-async def simulate_and_score(
-    simulator: NeuronSimulator, activation_records: list[ActivationRecord]
-) -> ScoredSimulation:
+async def simulate_and_score(  # type: ignore
+    simulator: NeuronSimulator,
+    activation_records: list[ActivationRecord],  # type: ignore
+) -> ScoredSimulation:  # type: ignore
     """Score an explanation of a neuron by how well it predicts activations on the given text sequences."""
     scored_sequence_simulations = await asyncio.gather(
         *[
@@ -330,8 +331,9 @@ async def autointerp_neuronpedia_features(  # noqa: C901
             feature.activations = []
         activation_records = [
             ActivationRecord(
-                tokens=activation.tokens, activations=activation.act_values
-            )
+                tokens=activation.tokens,  # type: ignore
+                activations=activation.act_values,  # type: ignore
+            )  # type: ignore
             for activation in feature.activations
         ]
@@ -384,15 +386,15 @@ async def autointerp_neuronpedia_features(  # noqa: C901
             temp_activation_records = [
                 ActivationRecord(
-                    tokens=[
+                    tokens=[  # type: ignore
                         token.replace("<|endoftext|>", "<|not_endoftext|>")
                         .replace(" 55", "_55")
                         .encode("ascii", errors="backslashreplace")
                         .decode("ascii")
-                        for token in activation_record.tokens
+                        for token in activation_record.tokens  # type: ignore
                     ],
-                    activations=activation_record.activations,
-                )
+                    activations=activation_record.activations,  # type: ignore
+                )  # type: ignore
                 for activation_record in activation_records
             ]

{sae_lens-6.0.0rc1 → sae_lens-6.0.0rc3}/sae_lens/cache_activations_runner.py RENAMED Viewed

@@ -14,7 +14,8 @@ from tqdm import tqdm
 from transformer_lens.HookedTransformer import HookedRootModule
 from sae_lens import logger
-from sae_lens.config import DTYPE_MAP, CacheActivationsRunnerConfig
+from sae_lens.config import CacheActivationsRunnerConfig
+from sae_lens.constants import DTYPE_MAP
 from sae_lens.load_model import load_model
 from sae_lens.training.activations_store import ActivationsStore
@@ -33,7 +34,6 @@ def _mk_activations_store(
         dataset=override_dataset or cfg.dataset_path,
         streaming=cfg.streaming,
         hook_name=cfg.hook_name,
-        hook_layer=cfg.hook_layer,
         hook_head_index=None,
         context_size=cfg.context_size,
         d_in=cfg.d_in,
@@ -264,7 +264,7 @@ class CacheActivationsRunner:
         for i in tqdm(range(self.cfg.n_buffers), desc="Caching activations"):
             try:
-                buffer = self.activations_store.get_buffer(
+                buffer = self.activations_store.get_raw_buffer(
                     self.cfg.n_batches_in_buffer, shuffle=False
                 )
                 shard = self._create_shard(buffer)
@@ -318,7 +318,7 @@ class CacheActivationsRunner:
     def _create_shard(
         self,
         buffer: tuple[
-            Float[torch.Tensor, "(bs context_size) num_layers d_in"],
+            Float[torch.Tensor, "(bs context_size) d_in"],
             Int[torch.Tensor, "(bs context_size)"] | None,
         ],
     ) -> Dataset:
@@ -326,13 +326,15 @@ class CacheActivationsRunner:
         acts, token_ids = buffer
         acts = einops.rearrange(
             acts,
-            "(bs context_size) num_layers d_in -> num_layers bs context_size d_in",
+            "(bs context_size) d_in -> bs context_size d_in",
             bs=self.cfg.n_seq_in_buffer,
             context_size=self.context_size,
             d_in=self.cfg.d_in,
-            num_layers=len(hook_names),
         )
-        shard_dict = {hook_name: act for hook_name, act in zip(hook_names, acts)}
+        shard_dict: dict[str, object] = {
+            hook_name: act_batch
+            for hook_name, act_batch in zip(hook_names, [acts], strict=True)
+        }
         if token_ids is not None:
             token_ids = einops.rearrange(

sae-lens 6.0.0rc1__tar.gz → 6.0.0rc3__tar.gz

sae-lens 6.0.0rc1tar.gz → 6.0.0rc3tar.gz