PyPI - sae-lens - Versions diffs - 6.26.0__tar.gz → 6.28.0__tar.gz - Mend

sae-lens 6.26.0tar.gz → 6.28.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{sae_lens-6.26.0 → sae_lens-6.28.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.26.0
+Version: 6.28.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE
@@ -77,6 +77,8 @@ The new v6 update is a major refactor to SAELens and changes the way training co
   [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
 - [Training a Sparse Autoencoder](tutorials/training_a_sparse_autoencoder.ipynb)
   [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
+- [Training SAEs on Synthetic Data](tutorials/training_saes_on_synthetic_data.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_saes_on_synthetic_data.ipynb)
 ## Join the Slack!

{sae_lens-6.26.0 → sae_lens-6.28.0}/README.md RENAMED Viewed

@@ -41,6 +41,8 @@ The new v6 update is a major refactor to SAELens and changes the way training co
   [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
 - [Training a Sparse Autoencoder](tutorials/training_a_sparse_autoencoder.ipynb)
   [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
+- [Training SAEs on Synthetic Data](tutorials/training_saes_on_synthetic_data.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_saes_on_synthetic_data.ipynb)
 ## Join the Slack!

{sae_lens-6.26.0 → sae_lens-6.28.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sae-lens"
-version = "6.26.0"
+version = "6.28.0"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"
@@ -51,12 +51,14 @@ mkdocs-redirects = "^1.2.1"
 mkdocs-section-index = "^0.3.9"
 mkdocstrings = "^0.25.2"
 mkdocstrings-python = "^1.10.9"
+beautifulsoup4 = "^4.12.0"
 tabulate = "^0.9.0"
 ruff = "^0.7.4"
 eai-sparsify = "^1.1.1"
 mike = "^2.0.0"
 trio = "^0.30.0"
 dictionary-learning = "^0.1.0"
+kaleido = "^1.2.0"
 [tool.poetry.extras]
 mamba = ["mamba-lens"]

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.26.0"
+__version__ = "6.28.0"
 import logging
@@ -63,6 +63,7 @@ from .loading.pretrained_sae_loaders import (
 from .pretokenize_runner import PretokenizeRunner, pretokenize_runner
 from .registry import register_sae_class, register_sae_training_class
 from .training.activations_store import ActivationsStore
+from .training.sae_trainer import SAETrainer
 from .training.upload_saes_to_huggingface import upload_saes_to_huggingface
 __all__ = [
@@ -102,6 +103,7 @@ __all__ = [
     "JumpReLUTrainingSAE",
     "JumpReLUTrainingSAEConfig",
     "SAETrainingRunner",
+    "SAETrainer",
     "LoggingConfig",
     "BatchTopKTrainingSAE",
     "BatchTopKTrainingSAEConfig",

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/cache_activations_runner.py RENAMED Viewed

@@ -263,14 +263,21 @@ class CacheActivationsRunner:
         for i in tqdm(range(self.cfg.n_buffers), desc="Caching activations"):
             try:
-                buffer = self.activations_store.get_raw_buffer(
-                    self.cfg.n_batches_in_buffer, shuffle=False
-                )
-                shard = self._create_shard(buffer)
+                # Accumulate n_batches_in_buffer batches into one shard
+                buffers: list[tuple[torch.Tensor, torch.Tensor | None]] = []
+                for _ in range(self.cfg.n_batches_in_buffer):
+                    buffers.append(self.activations_store.get_raw_llm_batch())
+                # Concatenate all batches
+                acts = torch.cat([b[0] for b in buffers], dim=0)
+                token_ids: torch.Tensor | None = None
+                if buffers[0][1] is not None:
+                    # All batches have token_ids if the first one does
+                    token_ids = torch.cat([b[1] for b in buffers], dim=0)  # type: ignore[arg-type]
+                shard = self._create_shard((acts, token_ids))
                 shard.save_to_disk(
                     f"{tmp_cached_activation_path}/shard_{i:05d}", num_shards=1
                 )
-                del buffer, shard
+                del buffers, acts, token_ids, shard
             except StopIteration:
                 logger.warning(
                     f"Warning: Ran out of samples while filling the buffer at batch {i} before reaching {self.cfg.n_buffers} batches."

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/config.py RENAMED Viewed

@@ -148,6 +148,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         seqpos_slice (tuple[int | None, ...]): Determines slicing of activations when constructing batches during training. The slice should be (start_pos, end_pos, optional[step_size]), e.g. for Othello we sometimes use (5, -5). Note, step_size > 0.
         disable_concat_sequences (bool): Whether to disable concatenating sequences and ignore sequences shorter than the context size. If True, disables concatenating and ignores short sequences.
         sequence_separator_token (int | Literal["bos", "eos", "sep"] | None): If not `None`, this token will be placed between sentences in a batch to act as a separator. By default, this is the `<bos>` token.
+        activations_mixing_fraction (float): Fraction of the activation buffer to keep for mixing with new activations (default 0.5). Higher values mean more temporal shuffling but slower throughput. If 0, activations are served in order without shuffling (no temporal mixing).
         device (str): The device to use. Usually "cuda".
         act_store_device (str): The device to use for the activation store. "cpu" is advised in order to save VRAM. Defaults to "with_model" which uses the same device as the main model.
         seed (int): The seed to use.
@@ -217,6 +218,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     sequence_separator_token: int | Literal["bos", "eos", "sep"] | None = (
         special_token_field(default="bos")
     )
+    activations_mixing_fraction: float = 0.5
     # Misc
     device: str = "cpu"

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/loading/pretrained_sae_loaders.py RENAMED Viewed

@@ -959,7 +959,7 @@ def get_dictionary_learning_config_1_from_hf(
     architecture = "standard"
     if trainer["dict_class"] == "GatedAutoEncoder":
         architecture = "gated"
-    elif trainer["dict_class"] == "MatryoshkaBatchTopKSAE":
+    elif trainer["dict_class"] in ["MatryoshkaBatchTopKSAE", "BatchTopKSAE"]:
         architecture = "jumprelu"
     return {
@@ -1831,6 +1831,7 @@ def temporal_sae_huggingface_loader(
     Load TemporalSAE from canrager/temporalSAEs format (safetensors version).
     Expects folder_name to contain:
     - conf.yaml (configuration)
     - latest_ckpt.safetensors (model weights)
     """

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/pretrained_saes.yaml RENAMED Viewed

@@ -9072,150 +9072,150 @@ gemma-scope-2-27b-it-transcoders-all:
   - id: layer_5_width_262k_l0_small_affine
     path: transcoder_all/layer_5_width_262k_l0_small_affine
     l0: 12
-  # - id: layer_60_width_16k_l0_big
-  #   path: transcoder_all/layer_60_width_16k_l0_big
-  #   l0: 120
-  # - id: layer_60_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_60_width_16k_l0_big_affine
-  #   l0: 120
-  # - id: layer_60_width_16k_l0_small
-  #   path: transcoder_all/layer_60_width_16k_l0_small
-  #   l0: 20
-  # - id: layer_60_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_60_width_16k_l0_small_affine
-  #   l0: 20
-  # - id: layer_60_width_262k_l0_big
-  #   path: transcoder_all/layer_60_width_262k_l0_big
-  #   l0: 120
-  # - id: layer_60_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_60_width_262k_l0_big_affine
-  #   l0: 120
-  # - id: layer_60_width_262k_l0_small
-  #   path: transcoder_all/layer_60_width_262k_l0_small
-  #   l0: 20
-  # - id: layer_60_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_60_width_262k_l0_small_affine
-  #   l0: 20
-  # - id: layer_61_width_16k_l0_big
-  #   path: transcoder_all/layer_61_width_16k_l0_big
-  #   l0: 120
-  # - id: layer_61_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_61_width_16k_l0_big_affine
-  #   l0: 120
-  # - id: layer_61_width_16k_l0_small
-  #   path: transcoder_all/layer_61_width_16k_l0_small
-  #   l0: 20
-  # - id: layer_61_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_61_width_16k_l0_small_affine
-  #   l0: 20
-  # - id: layer_61_width_262k_l0_big
-  #   path: transcoder_all/layer_61_width_262k_l0_big
-  #   l0: 120
-  # - id: layer_61_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_61_width_262k_l0_big_affine
-  #   l0: 120
-  # - id: layer_61_width_262k_l0_small
-  #   path: transcoder_all/layer_61_width_262k_l0_small
-  #   l0: 20
-  # - id: layer_61_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_61_width_262k_l0_small_affine
-  #   l0: 20
-  # - id: layer_6_width_16k_l0_big
-  #   path: transcoder_all/layer_6_width_16k_l0_big
-  #   l0: 77
-  # - id: layer_6_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_6_width_16k_l0_big_affine
-  #   l0: 77
-  # - id: layer_6_width_16k_l0_small
-  #   path: transcoder_all/layer_6_width_16k_l0_small
-  #   l0: 12
-  # - id: layer_6_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_6_width_16k_l0_small_affine
-  #   l0: 12
-  # - id: layer_6_width_262k_l0_big
-  #   path: transcoder_all/layer_6_width_262k_l0_big
-  #   l0: 77
-  # - id: layer_6_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_6_width_262k_l0_big_affine
-  #   l0: 77
-  # - id: layer_6_width_262k_l0_small
-  #   path: transcoder_all/layer_6_width_262k_l0_small
-  #   l0: 12
-  # - id: layer_6_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_6_width_262k_l0_small_affine
-  #   l0: 12
-  # - id: layer_7_width_16k_l0_big
-  #   path: transcoder_all/layer_7_width_16k_l0_big
-  #   l0: 80
-  # - id: layer_7_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_7_width_16k_l0_big_affine
-  #   l0: 80
-  # - id: layer_7_width_16k_l0_small
-  #   path: transcoder_all/layer_7_width_16k_l0_small
-  #   l0: 13
-  # - id: layer_7_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_7_width_16k_l0_small_affine
-  #   l0: 13
-  # - id: layer_7_width_262k_l0_big
-  #   path: transcoder_all/layer_7_width_262k_l0_big
-  #   l0: 80
-  # - id: layer_7_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_7_width_262k_l0_big_affine
-  #   l0: 80
-  # - id: layer_7_width_262k_l0_small
-  #   path: transcoder_all/layer_7_width_262k_l0_small
-  #   l0: 13
-  # - id: layer_7_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_7_width_262k_l0_small_affine
-  #   l0: 13
-  # - id: layer_8_width_16k_l0_big
-  #   path: transcoder_all/layer_8_width_16k_l0_big
-  #   l0: 83
-  # - id: layer_8_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_8_width_16k_l0_big_affine
-  #   l0: 83
-  # - id: layer_8_width_16k_l0_small
-  #   path: transcoder_all/layer_8_width_16k_l0_small
-  #   l0: 13
-  # - id: layer_8_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_8_width_16k_l0_small_affine
-  #   l0: 13
-  # - id: layer_8_width_262k_l0_big
-  #   path: transcoder_all/layer_8_width_262k_l0_big
-  #   l0: 83
-  # - id: layer_8_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_8_width_262k_l0_big_affine
-  #   l0: 83
-  # - id: layer_8_width_262k_l0_small
-  #   path: transcoder_all/layer_8_width_262k_l0_small
-  #   l0: 13
-  # - id: layer_8_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_8_width_262k_l0_small_affine
-  #   l0: 13
-  # - id: layer_9_width_16k_l0_big
-  #   path: transcoder_all/layer_9_width_16k_l0_big
-  #   l0: 86
-  # - id: layer_9_width_16k_l0_big_affine
-  #   path: transcoder_all/layer_9_width_16k_l0_big_affine
-  #   l0: 86
-  # - id: layer_9_width_16k_l0_small
-  #   path: transcoder_all/layer_9_width_16k_l0_small
-  #   l0: 14
-  # - id: layer_9_width_16k_l0_small_affine
-  #   path: transcoder_all/layer_9_width_16k_l0_small_affine
-  #   l0: 14
-  # - id: layer_9_width_262k_l0_big
-  #   path: transcoder_all/layer_9_width_262k_l0_big
-  #   l0: 86
-  # - id: layer_9_width_262k_l0_big_affine
-  #   path: transcoder_all/layer_9_width_262k_l0_big_affine
-  #   l0: 86
-  # - id: layer_9_width_262k_l0_small
-  #   path: transcoder_all/layer_9_width_262k_l0_small
-  #   l0: 14
-  # - id: layer_9_width_262k_l0_small_affine
-  #   path: transcoder_all/layer_9_width_262k_l0_small_affine
-  #   l0: 14
+  - id: layer_60_width_16k_l0_big
+    path: transcoder_all/layer_60_width_16k_l0_big
+    l0: 120
+  - id: layer_60_width_16k_l0_big_affine
+    path: transcoder_all/layer_60_width_16k_l0_big_affine
+    l0: 120
+  - id: layer_60_width_16k_l0_small
+    path: transcoder_all/layer_60_width_16k_l0_small
+    l0: 20
+  - id: layer_60_width_16k_l0_small_affine
+    path: transcoder_all/layer_60_width_16k_l0_small_affine
+    l0: 20
+  - id: layer_60_width_262k_l0_big
+    path: transcoder_all/layer_60_width_262k_l0_big
+    l0: 120
+  - id: layer_60_width_262k_l0_big_affine
+    path: transcoder_all/layer_60_width_262k_l0_big_affine
+    l0: 120
+  - id: layer_60_width_262k_l0_small
+    path: transcoder_all/layer_60_width_262k_l0_small
+    l0: 20
+  - id: layer_60_width_262k_l0_small_affine
+    path: transcoder_all/layer_60_width_262k_l0_small_affine
+    l0: 20
+  - id: layer_61_width_16k_l0_big
+    path: transcoder_all/layer_61_width_16k_l0_big
+    l0: 120
+  - id: layer_61_width_16k_l0_big_affine
+    path: transcoder_all/layer_61_width_16k_l0_big_affine
+    l0: 120
+  - id: layer_61_width_16k_l0_small
+    path: transcoder_all/layer_61_width_16k_l0_small
+    l0: 20
+  - id: layer_61_width_16k_l0_small_affine
+    path: transcoder_all/layer_61_width_16k_l0_small_affine
+    l0: 20
+  - id: layer_61_width_262k_l0_big
+    path: transcoder_all/layer_61_width_262k_l0_big
+    l0: 120
+  - id: layer_61_width_262k_l0_big_affine
+    path: transcoder_all/layer_61_width_262k_l0_big_affine
+    l0: 120
+  - id: layer_61_width_262k_l0_small
+    path: transcoder_all/layer_61_width_262k_l0_small
+    l0: 20
+  - id: layer_61_width_262k_l0_small_affine
+    path: transcoder_all/layer_61_width_262k_l0_small_affine
+    l0: 20
+  - id: layer_6_width_16k_l0_big
+    path: transcoder_all/layer_6_width_16k_l0_big
+    l0: 77
+  - id: layer_6_width_16k_l0_big_affine
+    path: transcoder_all/layer_6_width_16k_l0_big_affine
+    l0: 77
+  - id: layer_6_width_16k_l0_small
+    path: transcoder_all/layer_6_width_16k_l0_small
+    l0: 12
+  - id: layer_6_width_16k_l0_small_affine
+    path: transcoder_all/layer_6_width_16k_l0_small_affine
+    l0: 12
+  - id: layer_6_width_262k_l0_big
+    path: transcoder_all/layer_6_width_262k_l0_big
+    l0: 77
+  - id: layer_6_width_262k_l0_big_affine
+    path: transcoder_all/layer_6_width_262k_l0_big_affine
+    l0: 77
+  - id: layer_6_width_262k_l0_small
+    path: transcoder_all/layer_6_width_262k_l0_small
+    l0: 12
+  - id: layer_6_width_262k_l0_small_affine
+    path: transcoder_all/layer_6_width_262k_l0_small_affine
+    l0: 12
+  - id: layer_7_width_16k_l0_big
+    path: transcoder_all/layer_7_width_16k_l0_big
+    l0: 80
+  - id: layer_7_width_16k_l0_big_affine
+    path: transcoder_all/layer_7_width_16k_l0_big_affine
+    l0: 80
+  - id: layer_7_width_16k_l0_small
+    path: transcoder_all/layer_7_width_16k_l0_small
+    l0: 13
+  - id: layer_7_width_16k_l0_small_affine
+    path: transcoder_all/layer_7_width_16k_l0_small_affine
+    l0: 13
+  - id: layer_7_width_262k_l0_big
+    path: transcoder_all/layer_7_width_262k_l0_big
+    l0: 80
+  - id: layer_7_width_262k_l0_big_affine
+    path: transcoder_all/layer_7_width_262k_l0_big_affine
+    l0: 80
+  - id: layer_7_width_262k_l0_small
+    path: transcoder_all/layer_7_width_262k_l0_small
+    l0: 13
+  - id: layer_7_width_262k_l0_small_affine
+    path: transcoder_all/layer_7_width_262k_l0_small_affine
+    l0: 13
+  - id: layer_8_width_16k_l0_big
+    path: transcoder_all/layer_8_width_16k_l0_big
+    l0: 83
+  - id: layer_8_width_16k_l0_big_affine
+    path: transcoder_all/layer_8_width_16k_l0_big_affine
+    l0: 83
+  - id: layer_8_width_16k_l0_small
+    path: transcoder_all/layer_8_width_16k_l0_small
+    l0: 13
+  - id: layer_8_width_16k_l0_small_affine
+    path: transcoder_all/layer_8_width_16k_l0_small_affine
+    l0: 13
+  - id: layer_8_width_262k_l0_big
+    path: transcoder_all/layer_8_width_262k_l0_big
+    l0: 83
+  - id: layer_8_width_262k_l0_big_affine
+    path: transcoder_all/layer_8_width_262k_l0_big_affine
+    l0: 83
+  - id: layer_8_width_262k_l0_small
+    path: transcoder_all/layer_8_width_262k_l0_small
+    l0: 13
+  - id: layer_8_width_262k_l0_small_affine
+    path: transcoder_all/layer_8_width_262k_l0_small_affine
+    l0: 13
+  - id: layer_9_width_16k_l0_big
+    path: transcoder_all/layer_9_width_16k_l0_big
+    l0: 86
+  - id: layer_9_width_16k_l0_big_affine
+    path: transcoder_all/layer_9_width_16k_l0_big_affine
+    l0: 86
+  - id: layer_9_width_16k_l0_small
+    path: transcoder_all/layer_9_width_16k_l0_small
+    l0: 14
+  - id: layer_9_width_16k_l0_small_affine
+    path: transcoder_all/layer_9_width_16k_l0_small_affine
+    l0: 14
+  - id: layer_9_width_262k_l0_big
+    path: transcoder_all/layer_9_width_262k_l0_big
+    l0: 86
+  - id: layer_9_width_262k_l0_big_affine
+    path: transcoder_all/layer_9_width_262k_l0_big_affine
+    l0: 86
+  - id: layer_9_width_262k_l0_small
+    path: transcoder_all/layer_9_width_262k_l0_small
+    l0: 14
+  - id: layer_9_width_262k_l0_small_affine
+    path: transcoder_all/layer_9_width_262k_l0_small_affine
+    l0: 14
 gemma-scope-2-27b-it-transcoders:
   conversion_func: gemma_3
   model: google/gemma-3-27b-it

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/saes/gated_sae.py RENAMED Viewed

@@ -118,6 +118,7 @@ class GatedTrainingSAE(TrainingSAE[GatedTrainingSAEConfig]):
     """
     GatedTrainingSAE is a concrete implementation of BaseTrainingSAE for the "gated" SAE architecture.
     It implements:
       - initialize_weights: sets up gating parameters (as in GatedSAE) plus optional training-specific init.
       - encode: calls encode_with_hidden_pre (standard training approach).
       - decode: linear transformation + hooking, same as GatedSAE or StandardTrainingSAE.

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/saes/jumprelu_sae.py RENAMED Viewed

@@ -105,6 +105,7 @@ class JumpReLUSAE(SAE[JumpReLUSAEConfig]):
     activation function (e.g., ReLU etc.).
     It implements:
       - initialize_weights: sets up parameters, including a threshold.
       - encode: computes the feature activations using JumpReLU.
       - decode: reconstructs the input from the feature activations.
@@ -216,10 +217,12 @@ class JumpReLUTrainingSAE(TrainingSAE[JumpReLUTrainingSAEConfig]):
     JumpReLUTrainingSAE is a training-focused implementation of a SAE using a JumpReLU activation.
     Similar to the inference-only JumpReLUSAE, but with:
       - A learnable log-threshold parameter (instead of a raw threshold).
       - A specialized auxiliary loss term for sparsity (L0 or similar).
     Methods of interest include:
     - initialize_weights: sets up W_enc, b_enc, W_dec, b_dec, and log_threshold.
     - encode_with_hidden_pre_jumprelu: runs a forward pass for training.
     - training_forward_pass: calculates MSE and auxiliary losses, returning a TrainStepOutput.

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/saes/standard_sae.py RENAMED Viewed

@@ -34,6 +34,7 @@ class StandardSAE(SAE[StandardSAEConfig]):
     using a simple linear encoder and decoder.
     It implements the required abstract methods from BaseSAE:
       - initialize_weights: sets up simple parameter initializations for W_enc, b_enc, W_dec, and b_dec.
       - encode: computes the feature activations from an input.
       - decode: reconstructs the input from the feature activations.
@@ -99,6 +100,7 @@ class StandardTrainingSAE(TrainingSAE[StandardTrainingSAEConfig]):
     """
     StandardTrainingSAE is a concrete implementation of BaseTrainingSAE using the "standard" SAE architecture.
     It implements:
       - initialize_weights: basic weight initialization for encoder/decoder.
       - encode: inference encoding (invokes encode_with_hidden_pre).
       - decode: a simple linear decoder.

{sae_lens-6.26.0 → sae_lens-6.28.0}/sae_lens/saes/temporal_sae.py RENAMED Viewed

@@ -167,6 +167,7 @@ class TemporalSAE(SAE[TemporalSAEConfig]):
     """TemporalSAE: Sparse Autoencoder with temporal attention.
     This SAE decomposes each activation x_t into:
     - x_pred: Information aggregated from context {x_0, ..., x_{t-1}}
     - x_novel: Novel information at position t (encoded sparsely)

sae_lens-6.28.0/sae_lens/synthetic/__init__.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+Synthetic data utilities for SAE experiments.
+This module provides tools for creating feature dictionaries and generating
+synthetic activations for testing and experimenting with SAEs.
+Main components:
+- FeatureDictionary: Maps sparse feature activations to dense hidden activations
+- ActivationGenerator: Generates batches of synthetic feature activations
+- HierarchyNode: Enforces hierarchical structure on feature activations
+- Training utilities: Helpers for training and evaluating SAEs on synthetic data
+- Plotting utilities: Visualization helpers for understanding SAE behavior
+"""
+from sae_lens.synthetic.activation_generator import (
+    ActivationGenerator,
+    ActivationsModifier,
+    ActivationsModifierInput,
+)
+from sae_lens.synthetic.correlation import (
+    create_correlation_matrix_from_correlations,
+    generate_random_correlation_matrix,
+    generate_random_correlations,
+)
+from sae_lens.synthetic.evals import (
+    SyntheticDataEvalResult,
+    eval_sae_on_synthetic_data,
+    mean_correlation_coefficient,
+)
+from sae_lens.synthetic.feature_dictionary import (
+    FeatureDictionary,
+    FeatureDictionaryInitializer,
+    orthogonal_initializer,
+    orthogonalize_embeddings,
+)
+from sae_lens.synthetic.firing_probabilities import (
+    linear_firing_probabilities,
+    random_firing_probabilities,
+    zipfian_firing_probabilities,
+)
+from sae_lens.synthetic.hierarchy import HierarchyNode, hierarchy_modifier
+from sae_lens.synthetic.initialization import init_sae_to_match_feature_dict
+from sae_lens.synthetic.plotting import (
+    find_best_feature_ordering,
+    find_best_feature_ordering_across_saes,
+    find_best_feature_ordering_from_sae,
+    plot_sae_feature_similarity,
+)
+from sae_lens.synthetic.training import (
+    SyntheticActivationIterator,
+    train_toy_sae,
+)
+from sae_lens.util import cosine_similarities
+__all__ = [
+    # Main classes
+    "FeatureDictionary",
+    "HierarchyNode",
+    "hierarchy_modifier",
+    "ActivationGenerator",
+    # Activation generation
+    "zipfian_firing_probabilities",
+    "linear_firing_probabilities",
+    "random_firing_probabilities",
+    "create_correlation_matrix_from_correlations",
+    "generate_random_correlations",
+    "generate_random_correlation_matrix",
+    # Feature modifiers
+    "ActivationsModifier",
+    "ActivationsModifierInput",
+    # Utilities
+    "orthogonalize_embeddings",
+    "orthogonal_initializer",
+    "FeatureDictionaryInitializer",
+    "cosine_similarities",
+    # Training utilities
+    "SyntheticActivationIterator",
+    "SyntheticDataEvalResult",
+    "train_toy_sae",
+    "eval_sae_on_synthetic_data",
+    "mean_correlation_coefficient",
+    "init_sae_to_match_feature_dict",
+    # Plotting utilities
+    "find_best_feature_ordering",
+    "find_best_feature_ordering_from_sae",
+    "find_best_feature_ordering_across_saes",
+    "plot_sae_feature_similarity",
+]

sae-lens 6.26.0__tar.gz → 6.28.0__tar.gz

sae-lens 6.26.0tar.gz → 6.28.0tar.gz