PyPI - sae-lens - Versions diffs - 6.26.2__py3-none-any.whl → 6.27.0__py3-none-any.whl - Mend

sae-lens 6.26.2py3-none-any.whl → 6.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.26.2"
+__version__ = "6.27.0"
 import logging

sae_lens/config.py CHANGED Viewed

@@ -148,6 +148,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         seqpos_slice (tuple[int | None, ...]): Determines slicing of activations when constructing batches during training. The slice should be (start_pos, end_pos, optional[step_size]), e.g. for Othello we sometimes use (5, -5). Note, step_size > 0.
         disable_concat_sequences (bool): Whether to disable concatenating sequences and ignore sequences shorter than the context size. If True, disables concatenating and ignores short sequences.
         sequence_separator_token (int | Literal["bos", "eos", "sep"] | None): If not `None`, this token will be placed between sentences in a batch to act as a separator. By default, this is the `<bos>` token.
+        activations_mixing_fraction (float): Fraction of the activation buffer to keep for mixing with new activations (default 0.5). Higher values mean more temporal shuffling but slower throughput. If 0, activations are served in order without shuffling (no temporal mixing).
         device (str): The device to use. Usually "cuda".
         act_store_device (str): The device to use for the activation store. "cpu" is advised in order to save VRAM. Defaults to "with_model" which uses the same device as the main model.
         seed (int): The seed to use.
@@ -217,6 +218,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     sequence_separator_token: int | Literal["bos", "eos", "sep"] | None = (
         special_token_field(default="bos")
     )
+    activations_mixing_fraction: float = 0.5
     # Misc
     device: str = "cpu"

sae_lens/training/activations_store.py CHANGED Viewed

@@ -148,6 +148,7 @@ class ActivationsStore:
             exclude_special_tokens=exclude_special_tokens,
             disable_concat_sequences=cfg.disable_concat_sequences,
             sequence_separator_token=cfg.sequence_separator_token,
+            activations_mixing_fraction=cfg.activations_mixing_fraction,
         )
     @classmethod
@@ -222,6 +223,7 @@ class ActivationsStore:
         exclude_special_tokens: torch.Tensor | None = None,
         disable_concat_sequences: bool = False,
         sequence_separator_token: int | Literal["bos", "eos", "sep"] | None = "bos",
+        activations_mixing_fraction: float = 0.5,
     ):
         self.model = model
         if model_kwargs is None:
@@ -269,6 +271,7 @@ class ActivationsStore:
         self.sequence_separator_token: int | Literal["bos", "eos", "sep"] | None = (
             sequence_separator_token
         )
+        self.activations_mixing_fraction = activations_mixing_fraction
         self.n_dataset_processed = 0
@@ -708,6 +711,7 @@ class ActivationsStore:
             buffer_size=self.n_batches_in_buffer * self.training_context_size,
             batch_size=self.train_batch_size_tokens,
             activations_loader=self._iterate_filtered_activations(),
+            mix_fraction=self.activations_mixing_fraction,
         )
     def next_batch(self) -> torch.Tensor:

sae_lens/training/mixing_buffer.py CHANGED Viewed

@@ -8,15 +8,19 @@ def mixing_buffer(
     buffer_size: int,
     batch_size: int,
     activations_loader: Iterator[torch.Tensor],
+    mix_fraction: float = 0.5,
 ) -> Iterator[torch.Tensor]:
     """
     A generator that maintains a mix of old and new activations for better training.
-    It stores half of the activations and mixes them with new ones to create batches.
+    It keeps a portion of activations and mixes them with new ones to create batches.
     Args:
-        buffer_size: Total size of the buffer (will store buffer_size/2 activations)
+        buffer_size: Total size of the buffer
         batch_size: Size of batches to return
         activations_loader: Iterator providing new activations
+        mix_fraction: Fraction of buffer to keep for mixing (default 0.5).
+                      Higher values mean more temporal mixing but slower throughput.
+                      If 0, no shuffling occurs (passthrough mode).
     Yields:
         Batches of activations of shape (batch_size, *activation_dims)
@@ -24,6 +28,8 @@ def mixing_buffer(
     if buffer_size < batch_size:
         raise ValueError("Buffer size must be greater than or equal to batch size")
+    if not 0 <= mix_fraction <= 1:
+        raise ValueError("mix_fraction must be in [0, 1]")
     storage_buffer: torch.Tensor | None = None
@@ -35,10 +41,12 @@ def mixing_buffer(
         )
         if storage_buffer.shape[0] >= buffer_size:
-            # Shuffle
-            storage_buffer = storage_buffer[torch.randperm(storage_buffer.shape[0])]
+            if mix_fraction > 0:
+                storage_buffer = storage_buffer[torch.randperm(storage_buffer.shape[0])]
-            num_serving_batches = max(1, storage_buffer.shape[0] // (2 * batch_size))
+            num_serving_batches = max(
+                1, int(storage_buffer.shape[0] * (1 - mix_fraction)) // batch_size
+            )
             serving_cutoff = num_serving_batches * batch_size
             serving_buffer = storage_buffer[:serving_cutoff]
             storage_buffer = storage_buffer[serving_cutoff:]

{sae_lens-6.26.2.dist-info → sae_lens-6.27.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.26.2
+Version: 6.27.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE

{sae_lens-6.26.2.dist-info → sae_lens-6.27.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-sae_lens/__init__.py,sha256=8muF12kzUe8sePiovnUMEXCu1OcotIVw-VvDjGEK2Zw,4725
+sae_lens/__init__.py,sha256=379YK4TU5y4Gl_sjF9JG5b7c_ywo3PjcY37e3EW2IyA,4725
 sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sae_lens/analysis/hooked_sae_transformer.py,sha256=dQRgGVwce8XwylL2AzJE7l9elhtMRFCs2hdUj-Qyy4g,14038
 sae_lens/analysis/neuronpedia_integration.py,sha256=Gx1W7hUBEuMoasNcnOnZ1wmqbXDd1pSZ1nqKEya1HQc,4962
 sae_lens/cache_activations_runner.py,sha256=Lvlz-k5-3XxVRtUdC4b1CiKyx5s0ckLa8GDGv9_kcxs,12566
-sae_lens/config.py,sha256=C982bUELhGHcfTwzeMTtXIf2hPtc946thYpUyctLiBo,30516
+sae_lens/config.py,sha256=sseYcRMsAyopj8FICup1RGTXjFxzAithZ2OH7OpQV3Y,30839
 sae_lens/constants.py,sha256=CM-h9AjZNAl2aP7hVpKk7YsFHpu-_Lfhhmq2d5qPEVc,887
 sae_lens/evals.py,sha256=P0NUsJeGzYxFBiVKhbPzd72IFKY4gH40HHlEZ3jEAmg,39598
 sae_lens/llm_sae_training_runner.py,sha256=M7BK55gSFYu2qFQKABHX3c8i46P1LfODCeyHFzGGuqU,15196
@@ -28,15 +28,15 @@ sae_lens/saes/transcoder.py,sha256=CTpJs8ASOK06npih7gZHygZuxqTR7HICWlOYfTiKjI4,1
 sae_lens/tokenization_and_batching.py,sha256=D_o7cXvRqhT89H3wNzoRymNALNE6eHojBWLdXOUwUGE,5438
 sae_lens/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sae_lens/training/activation_scaler.py,sha256=FzNfgBplLWmyiSlZ6TUvE-nur3lOiGTrlvC97ys8S24,1973
-sae_lens/training/activations_store.py,sha256=rQadexm2BiwK7_MZIPlRkcKSqabi3iuOTC-R8aJchS8,33778
-sae_lens/training/mixing_buffer.py,sha256=vDpYG5ZE70szDvBsRKcNHEES3h_WTKJ16qDYk5jPOVA,2015
+sae_lens/training/activations_store.py,sha256=2BVajHRcozKQFf1tkeraUCdFuut3spdk0hhgtdpizzI,34031
+sae_lens/training/mixing_buffer.py,sha256=DK22yPwEop4suG0K-8XFw5ZGNl0JrgCEjypmKEUAaGY,2394
 sae_lens/training/optim.py,sha256=bJpqqcK4enkcPvQAJkeH4Ci1LUOlfjIMTv6-IlaAbRA,5588
 sae_lens/training/sae_trainer.py,sha256=zhkabyIKxI_tZTV3_kwz6zMrHZ95Ecr97krmwc-9ffs,17600
 sae_lens/training/types.py,sha256=1FpLx_Doda9vZpmfm-x1e8wGBYpyhe9Kpb_JuM5nIFM,90
 sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
 sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
 sae_lens/util.py,sha256=spkcmQUsjVYFn5H2032nQYr1CKGVnv3tAdfIpY59-Mg,3919
-sae_lens-6.26.2.dist-info/METADATA,sha256=TPTLR3wKbPcGOsJ9P5hxVSQu-O6JIioFxoXUHP4Tj2w,5361
-sae_lens-6.26.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-sae_lens-6.26.2.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
-sae_lens-6.26.2.dist-info/RECORD,,
+sae_lens-6.27.0.dist-info/METADATA,sha256=S3GYpJhfYx05i-ZfX8rpwbiR1IFDlFAR0nSURgJQmJk,5361
+sae_lens-6.27.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+sae_lens-6.27.0.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
+sae_lens-6.27.0.dist-info/RECORD,,

{sae_lens-6.26.2.dist-info → sae_lens-6.27.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sae_lens-6.26.2.dist-info → sae_lens-6.27.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sae-lens 6.26.2__py3-none-any.whl → 6.27.0__py3-none-any.whl

sae-lens 6.26.2py3-none-any.whl → 6.27.0py3-none-any.whl