PyPI - sae-lens - Versions diffs - 6.12.2__py3-none-any.whl → 6.13.0__py3-none-any.whl - Mend

sae-lens 6.12.2py3-none-any.whl → 6.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

sae_lens/__init__.py +1 -1
sae_lens/evals.py +2 -0
sae_lens/loading/pretrained_sae_loaders.py +6 -0
sae_lens/pretokenize_runner.py +2 -1
sae_lens/saes/sae.py +9 -10
sae_lens/saes/topk_sae.py +211 -9
sae_lens/tokenization_and_batching.py +1 -1
sae_lens/training/sae_trainer.py +7 -5
sae_lens/training/types.py +1 -1
{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/METADATA +1 -1
{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/RECORD +13 -13
{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/WHEEL +0 -0
{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/licenses/LICENSE +0 -0

sae_lens/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.12.2"
+__version__ = "6.13.0"
 import logging

sae_lens/evals.py CHANGED Viewed

@@ -466,6 +466,8 @@ def get_sparsity_and_variance_metrics(
         sae_out_scaled = sae.decode(sae_feature_activations).to(
             original_act_scaled.device
         )
+        if sae_feature_activations.is_sparse:
+            sae_feature_activations = sae_feature_activations.to_dense()
         del cache
         sae_out = activation_scaler.unscale(sae_out_scaled)

sae_lens/loading/pretrained_sae_loaders.py CHANGED Viewed

@@ -233,6 +233,12 @@ def handle_pre_6_0_config(cfg_dict: dict[str, Any]) -> dict[str, Any]:
         "reshape_activations",
         "hook_z" if "hook_z" in new_cfg.get("hook_name", "") else "none",
     )
+    if (
+        new_cfg.get("activation_fn") == "topk"
+        and new_cfg.get("activation_fn_kwargs", {}).get("k") is not None
+    ):
+        new_cfg["architecture"] = "topk"
+        new_cfg["k"] = new_cfg["activation_fn_kwargs"]["k"]
     if "normalize_activations" in new_cfg and isinstance(
         new_cfg["normalize_activations"], bool

sae_lens/pretokenize_runner.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import io
 import json
 import sys
+from collections.abc import Iterator
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Iterator, Literal, cast
+from typing import Literal, cast
 import torch
 from datasets import Dataset, DatasetDict, load_dataset

sae_lens/saes/sae.py CHANGED Viewed

@@ -14,7 +14,6 @@ from typing import (
     Generic,
     Literal,
     NamedTuple,
-    Type,
     TypeVar,
 )
@@ -534,7 +533,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     @classmethod
     @deprecated("Use load_from_disk instead")
     def load_from_pretrained(
-        cls: Type[T_SAE],
+        cls: type[T_SAE],
         path: str | Path,
         device: str = "cpu",
         dtype: str | None = None,
@@ -543,7 +542,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     @classmethod
     def load_from_disk(
-        cls: Type[T_SAE],
+        cls: type[T_SAE],
         path: str | Path,
         device: str = "cpu",
         dtype: str | None = None,
@@ -564,7 +563,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     @classmethod
     def from_pretrained(
-        cls: Type[T_SAE],
+        cls: type[T_SAE],
         release: str,
         sae_id: str,
         device: str = "cpu",
@@ -585,7 +584,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     @classmethod
     def from_pretrained_with_cfg_and_sparsity(
-        cls: Type[T_SAE],
+        cls: type[T_SAE],
         release: str,
         sae_id: str,
         device: str = "cpu",
@@ -684,7 +683,7 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
         return sae, cfg_dict, log_sparsities
     @classmethod
-    def from_dict(cls: Type[T_SAE], config_dict: dict[str, Any]) -> T_SAE:
+    def from_dict(cls: type[T_SAE], config_dict: dict[str, Any]) -> T_SAE:
         """Create an SAE from a config dictionary."""
         sae_cls = cls.get_sae_class_for_architecture(config_dict["architecture"])
         sae_config_cls = cls.get_sae_config_class_for_architecture(
@@ -694,8 +693,8 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     @classmethod
     def get_sae_class_for_architecture(
-        cls: Type[T_SAE], architecture: str
-    ) -> Type[T_SAE]:
+        cls: type[T_SAE], architecture: str
+    ) -> type[T_SAE]:
         """Get the SAE class for a given architecture."""
         sae_cls, _ = get_sae_class(architecture)
         if not issubclass(sae_cls, cls):
@@ -1000,8 +999,8 @@ class TrainingSAE(SAE[T_TRAINING_SAE_CONFIG], ABC):
     @classmethod
     def get_sae_class_for_architecture(
-        cls: Type[T_TRAINING_SAE], architecture: str
-    ) -> Type[T_TRAINING_SAE]:
+        cls: type[T_TRAINING_SAE], architecture: str
+    ) -> type[T_TRAINING_SAE]:
         """Get the SAE class for a given architecture."""
         sae_cls, _ = get_sae_training_class(architecture)
         if not issubclass(sae_cls, cls):

sae_lens/saes/topk_sae.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Callable
 import torch
 from jaxtyping import Float
 from torch import nn
+from transformer_lens.hook_points import HookPoint
 from typing_extensions import override
 from sae_lens.saes.sae import (
@@ -15,34 +16,102 @@ from sae_lens.saes.sae import (
     TrainingSAE,
     TrainingSAEConfig,
     TrainStepInput,
+    _disable_hooks,
 )
+class SparseHookPoint(HookPoint):
+    """
+    A HookPoint that takes in a sparse tensor.
+    Overrides TransformerLens's HookPoint.
+    """
+    def __init__(self, d_sae: int):
+        super().__init__()
+        self.d_sae = d_sae
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        using_hooks = (
+            self._forward_hooks is not None and len(self._forward_hooks) > 0
+        ) or (self._backward_hooks is not None and len(self._backward_hooks) > 0)
+        if using_hooks and x.is_sparse:
+            return x.to_dense()
+        return x  # if no hooks are being used, use passthrough
 class TopK(nn.Module):
     """
     A simple TopK activation that zeroes out all but the top K elements along the last dimension,
     and applies ReLU to the top K elements.
     """
-    b_enc: nn.Parameter
+    use_sparse_activations: bool
     def __init__(
         self,
         k: int,
+        use_sparse_activations: bool = False,
     ):
         super().__init__()
         self.k = k
+        self.use_sparse_activations = use_sparse_activations
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self,
+        x: torch.Tensor,
+    ) -> torch.Tensor:
         """
         1) Select top K elements along the last dimension.
         2) Apply ReLU.
         3) Zero out all other entries.
         """
-        topk = torch.topk(x, k=self.k, dim=-1)
-        values = topk.values.relu()
+        topk_values, topk_indices = torch.topk(x, k=self.k, dim=-1, sorted=False)
+        values = topk_values.relu()
+        if self.use_sparse_activations:
+            # Produce a COO sparse tensor (use sparse matrix multiply in decode)
+            original_shape = x.shape
+            # Create indices for all dimensions
+            # For each element in topk_indices, we need to map it back to the original tensor coordinates
+            batch_dims = original_shape[:-1]  # All dimensions except the last one
+            num_batch_elements = torch.prod(torch.tensor(batch_dims)).item()
+            # Create batch indices - each batch element repeated k times
+            batch_indices_flat = torch.arange(
+                num_batch_elements, device=x.device
+            ).repeat_interleave(self.k)
+            # Convert flat batch indices back to multi-dimensional indices
+            if len(batch_dims) == 1:
+                # 2D case: [batch, features]
+                sparse_indices = torch.stack(
+                    [
+                        batch_indices_flat,
+                        topk_indices.flatten(),
+                    ]
+                )
+            else:
+                # 3D+ case: need to unravel the batch indices
+                batch_indices_multi = []
+                remaining = batch_indices_flat
+                for dim_size in reversed(batch_dims):
+                    batch_indices_multi.append(remaining % dim_size)
+                    remaining = remaining // dim_size
+                batch_indices_multi.reverse()
+                sparse_indices = torch.stack(
+                    [
+                        *batch_indices_multi,
+                        topk_indices.flatten(),
+                    ]
+                )
+            return torch.sparse_coo_tensor(
+                sparse_indices, values.flatten(), original_shape
+            )
         result = torch.zeros_like(x)
-        result.scatter_(-1, topk.indices, values)
+        result.scatter_(-1, topk_indices, values)
         return result
@@ -60,6 +129,63 @@ class TopKSAEConfig(SAEConfig):
         return "topk"
+def _sparse_matmul_nd(
+    sparse_tensor: torch.Tensor, dense_matrix: torch.Tensor
+) -> torch.Tensor:
+    """
+    Multiply a sparse tensor of shape [..., d_sae] with a dense matrix of shape [d_sae, d_out]
+    to get a result of shape [..., d_out].
+    This function handles sparse tensors with arbitrary batch dimensions by flattening
+    the batch dimensions, performing 2D sparse matrix multiplication, and reshaping back.
+    """
+    original_shape = sparse_tensor.shape
+    batch_dims = original_shape[:-1]
+    d_sae = original_shape[-1]
+    d_out = dense_matrix.shape[-1]
+    if sparse_tensor.ndim == 2:
+        # Simple 2D case - use torch.sparse.mm directly
+        # sparse.mm errors with bfloat16 :(
+        with torch.autocast(device_type=sparse_tensor.device.type, enabled=False):
+            return torch.sparse.mm(sparse_tensor, dense_matrix)
+    # For 3D+ case, reshape to 2D, multiply, then reshape back
+    batch_size = int(torch.prod(torch.tensor(batch_dims)).item())
+    # Ensure tensor is coalesced for efficient access to indices/values
+    if not sparse_tensor.is_coalesced():
+        sparse_tensor = sparse_tensor.coalesce()
+    # Get indices and values
+    indices = sparse_tensor.indices()  # [ndim, nnz]
+    values = sparse_tensor.values()  # [nnz]
+    # Convert multi-dimensional batch indices to flat indices
+    flat_batch_indices = torch.zeros_like(indices[0])
+    multiplier = 1
+    for i in reversed(range(len(batch_dims))):
+        flat_batch_indices += indices[i] * multiplier
+        multiplier *= batch_dims[i]
+    # Create 2D sparse tensor indices [batch_flat, feature]
+    sparse_2d_indices = torch.stack([flat_batch_indices, indices[-1]])
+    # Create 2D sparse tensor
+    sparse_2d = torch.sparse_coo_tensor(
+        sparse_2d_indices, values, (batch_size, d_sae)
+    ).coalesce()
+    # sparse.mm errors with bfloat16 :(
+    with torch.autocast(device_type=sparse_tensor.device.type, enabled=False):
+        # Do the matrix multiplication
+        result_2d = torch.sparse.mm(sparse_2d, dense_matrix)  # [batch_size, d_out]
+    # Reshape back to original batch dimensions
+    result_shape = tuple(batch_dims) + (d_out,)
+    return result_2d.view(result_shape)
 class TopKSAE(SAE[TopKSAEConfig]):
     """
     An inference-only sparse autoencoder using a "topk" activation function.
@@ -96,21 +222,26 @@ class TopKSAE(SAE[TopKSAEConfig]):
         return self.hook_sae_acts_post(self.activation_fn(hidden_pre))
     def decode(
-        self, feature_acts: Float[torch.Tensor, "... d_sae"]
+        self,
+        feature_acts: Float[torch.Tensor, "... d_sae"],
     ) -> Float[torch.Tensor, "... d_in"]:
         """
         Reconstructs the input from topk feature activations.
         Applies optional finetuning scaling, hooking to recons, out normalization,
         and optional head reshaping.
         """
-        sae_out_pre = feature_acts @ self.W_dec + self.b_dec
+        # Handle sparse tensors using efficient sparse matrix multiplication
+        if feature_acts.is_sparse:
+            sae_out_pre = _sparse_matmul_nd(feature_acts, self.W_dec) + self.b_dec
+        else:
+            sae_out_pre = feature_acts @ self.W_dec + self.b_dec
         sae_out_pre = self.hook_sae_recons(sae_out_pre)
         sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
         return self.reshape_fn_out(sae_out_pre, self.d_head)
     @override
     def get_activation_fn(self) -> Callable[[torch.Tensor], torch.Tensor]:
-        return TopK(self.cfg.k)
+        return TopK(self.cfg.k, use_sparse_activations=False)
     @override
     @torch.no_grad()
@@ -124,9 +255,43 @@ class TopKSAE(SAE[TopKSAEConfig]):
 class TopKTrainingSAEConfig(TrainingSAEConfig):
     """
     Configuration class for training a TopKTrainingSAE.
+    Args:
+        k (int): Number of top features to keep active. Only the top k features
+            with the highest pre-activations will be non-zero. Defaults to 100.
+        use_sparse_activations (bool): Whether to use sparse tensor representations
+            for activations during training. This can reduce memory usage and improve
+            performance when k is small relative to d_sae, but is only worthwhile if
+            using float32 and not using autocast. Defaults to False.
+        aux_loss_coefficient (float): Coefficient for the auxiliary loss that encourages
+            dead neurons to learn useful features. This loss helps prevent neuron death
+            in TopK SAEs by having dead neurons reconstruct the residual error from
+            live neurons. Defaults to 1.0.
+        decoder_init_norm (float | None): Norm to initialize decoder weights to.
+            0.1 corresponds to the "heuristic" initialization from Anthropic's April update.
+            Use None to disable. Inherited from TrainingSAEConfig. Defaults to 0.1.
+        d_in (int): Input dimension (dimensionality of the activations being encoded).
+            Inherited from SAEConfig.
+        d_sae (int): SAE latent dimension (number of features in the SAE).
+            Inherited from SAEConfig.
+        dtype (str): Data type for the SAE parameters. Inherited from SAEConfig.
+            Defaults to "float32".
+        device (str): Device to place the SAE on. Inherited from SAEConfig.
+            Defaults to "cpu".
+        apply_b_dec_to_input (bool): Whether to apply decoder bias to the input
+            before encoding. Inherited from SAEConfig. Defaults to True.
+        normalize_activations (Literal["none", "expected_average_only_in", "constant_norm_rescale", "layer_norm"]):
+            Normalization strategy for input activations. Inherited from SAEConfig.
+            Defaults to "none".
+        reshape_activations (Literal["none", "hook_z"]): How to reshape activations
+            (useful for attention head outputs). Inherited from SAEConfig.
+            Defaults to "none".
+        metadata (SAEMetadata): Metadata about the SAE training (model name, hook name, etc.).
+            Inherited from SAEConfig.
     """
     k: int = 100
+    use_sparse_activations: bool = False
     aux_loss_coefficient: float = 1.0
     @override
@@ -144,6 +309,8 @@ class TopKTrainingSAE(TrainingSAE[TopKTrainingSAEConfig]):
     def __init__(self, cfg: TopKTrainingSAEConfig, use_error_term: bool = False):
         super().__init__(cfg, use_error_term)
+        self.hook_sae_acts_post = SparseHookPoint(self.cfg.d_sae)
+        self.setup()
     @override
     def initialize_weights(self) -> None:
@@ -163,6 +330,41 @@ class TopKTrainingSAE(TrainingSAE[TopKTrainingSAEConfig]):
         feature_acts = self.hook_sae_acts_post(self.activation_fn(hidden_pre))
         return feature_acts, hidden_pre
+    @override
+    def decode(
+        self,
+        feature_acts: Float[torch.Tensor, "... d_sae"],
+    ) -> Float[torch.Tensor, "... d_in"]:
+        """
+        Decodes feature activations back into input space,
+        applying optional finetuning scale, hooking, out normalization, etc.
+        """
+        # Handle sparse tensors using efficient sparse matrix multiplication
+        if feature_acts.is_sparse:
+            sae_out_pre = _sparse_matmul_nd(feature_acts, self.W_dec) + self.b_dec
+        else:
+            sae_out_pre = feature_acts @ self.W_dec + self.b_dec
+        sae_out_pre = self.hook_sae_recons(sae_out_pre)
+        sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
+        return self.reshape_fn_out(sae_out_pre, self.d_head)
+    @override
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass through the SAE."""
+        feature_acts = self.encode(x)
+        sae_out = self.decode(feature_acts)
+        if self.use_error_term:
+            with torch.no_grad():
+                # Recompute without hooks for true error term
+                with _disable_hooks(self):
+                    feature_acts_clean = self.encode(x)
+                    x_reconstruct_clean = self.decode(feature_acts_clean)
+                sae_error = self.hook_sae_error(x - x_reconstruct_clean)
+            sae_out = sae_out + sae_error
+        return self.hook_sae_output(sae_out)
     @override
     def calculate_aux_loss(
         self,
@@ -189,7 +391,7 @@ class TopKTrainingSAE(TrainingSAE[TopKTrainingSAEConfig]):
     @override
     def get_activation_fn(self) -> Callable[[torch.Tensor], torch.Tensor]:
-        return TopK(self.cfg.k)
+        return TopK(self.cfg.k, use_sparse_activations=self.cfg.use_sparse_activations)
     @override
     def get_coefficients(self) -> dict[str, TrainCoefficientConfig | float]:

sae_lens/tokenization_and_batching.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Generator, Iterator
+from collections.abc import Generator, Iterator
 import torch

sae_lens/training/sae_trainer.py CHANGED Viewed

@@ -253,12 +253,14 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
             )
             with torch.no_grad():
-                did_fire = (train_step_output.feature_acts > 0).float().sum(-2) > 0
+                # calling .bool() should be equivalent to .abs() > 0, and work with coo tensors
+                firing_feats = train_step_output.feature_acts.bool().float()
+                did_fire = firing_feats.sum(-2).bool()
+                if did_fire.is_sparse:
+                    did_fire = did_fire.to_dense()
                 self.n_forward_passes_since_fired += 1
                 self.n_forward_passes_since_fired[did_fire] = 0
-                self.act_freq_scores += (
-                    (train_step_output.feature_acts.abs() > 0).float().sum(0)
-                )
+                self.act_freq_scores += firing_feats.sum(0)
                 self.n_frac_active_samples += self.cfg.train_batch_size_samples
         # Grad scaler will rescale gradients if autocast is enabled
@@ -310,7 +312,7 @@ class SAETrainer(Generic[T_TRAINING_SAE, T_TRAINING_SAE_CONFIG]):
         loss = output.loss.item()
         # metrics for currents acts
-        l0 = (feature_acts > 0).float().sum(-1).mean()
+        l0 = feature_acts.bool().float().sum(-1).to_dense().mean()
         current_learning_rate = self.optimizer.param_groups[0]["lr"]
         per_token_l2_loss = (sae_out - sae_in).pow(2).sum(dim=-1).squeeze()

sae_lens/training/types.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Iterator
+from collections.abc import Iterator
 import torch

{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sae-lens
-Version: 6.12.2
+Version: 6.13.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 License-File: LICENSE

{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/RECORD RENAMED Viewed

@@ -1,39 +1,39 @@
-sae_lens/__init__.py,sha256=TptOdqP3B6E_TTQ4n6DXAIDA9c1_9LUUsDkoqyrSSBg,3589
+sae_lens/__init__.py,sha256=6cL-2l4CIzZJfgyRP5I90zu2Tty196wOgFg1JGlQd1c,3589
 sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sae_lens/analysis/hooked_sae_transformer.py,sha256=vRu6JseH1lZaEeILD5bEkQEQ1wYHHDcxD-f2olKmE9Y,14275
 sae_lens/analysis/neuronpedia_integration.py,sha256=Gx1W7hUBEuMoasNcnOnZ1wmqbXDd1pSZ1nqKEya1HQc,4962
 sae_lens/cache_activations_runner.py,sha256=cNeAtp2JQ_vKbeddZVM-tcPLYyyfTWL8NDna5KQpkLI,12583
 sae_lens/config.py,sha256=IdRXSKPfYY3hwUovj-u83eep8z52gkJHII0mY0KseYY,28739
 sae_lens/constants.py,sha256=CSjmiZ-bhjQeVLyRvWxAjBokCgkfM8mnvd7-vxLIWTY,639
-sae_lens/evals.py,sha256=4hanbyG8qZLItWqft94F4ZjUoytPVB7fw5s0P4Oi0VE,39504
+sae_lens/evals.py,sha256=p4AOueeemhJXyfLx2TxOva8LXxXj63JSKe9Lnib3mHs,39623
 sae_lens/llm_sae_training_runner.py,sha256=sJTcDX1bUJJ_jZLUT88-8KUYIAPeUGoXktX68PsBqw0,15137
 sae_lens/load_model.py,sha256=C8AMykctj6H7tz_xRwB06-EXj6TfW64PtSJZR5Jxn1Y,8649
 sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/loading/pretrained_sae_loaders.py,sha256=CVzHntSUKR1X3_gAqn8K_Ajq8D85qBrmrgEgU93IV4A,49609
+sae_lens/loading/pretrained_sae_loaders.py,sha256=SM4aT8NM6ezYix5c2u7p72Fz2RfvTtf7gw5RdOSKXhc,49846
 sae_lens/loading/pretrained_saes_directory.py,sha256=4Vn-Jex6SveD7EbxcSOBv8cx1gkPfUMLU1QOP-ww1ZE,3752
-sae_lens/pretokenize_runner.py,sha256=w0f6SfZLAxbp5eAAKnet8RqUB_DKofZ9RGsoJwFnYbA,7058
+sae_lens/pretokenize_runner.py,sha256=x-reJzVPFDS9iRFbZtrFYSzNguJYki9gd0pbHjYJ3r4,7085
 sae_lens/pretrained_saes.yaml,sha256=6ca3geEB6NyhULUrmdtPDK8ea0YdpLp8_au78vIFC5w,602553
 sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
 sae_lens/saes/__init__.py,sha256=jVwazK8Q6dW5J6_zFXPoNAuBvSxgziQ8eMOjGM3t-X8,1475
 sae_lens/saes/batchtopk_sae.py,sha256=GX_J0vH4vzeLqYxl0mkfsZQpFEoCEHMR4dIG8fz8N8w,3449
 sae_lens/saes/gated_sae.py,sha256=qcmM9JwBA8aZR8z_IRHV1_gQX-q_63tKewWXRnhdXuo,8986
 sae_lens/saes/jumprelu_sae.py,sha256=HHBF1sJ95lZvxwP5vwLSQFKdnJN2KKYK0WAEaLTrta0,13399
-sae_lens/saes/sae.py,sha256=McpF4pTh70r6SQUbHFm0YQ9X2c2qPULBUSd_YmnEk4Y,38284
+sae_lens/saes/sae.py,sha256=nuII6ZmaVtJWhPjyhasHQyiv_Wj-zdAtRQqJRYbVBQs,38274
 sae_lens/saes/standard_sae.py,sha256=9UqYyYtQuThYxXKNaDjYcyowpOx2-7cShG-TeUP6JCQ,5940
-sae_lens/saes/topk_sae.py,sha256=CXMBI6CFvI5829bOhoQ350VXR9d8uFHUDlULTIWHXoU,8686
+sae_lens/saes/topk_sae.py,sha256=pM26I9uDeh_ZWx0HXUyPVFfEV2pfuRJmAPNWR5pmRhY,17615
 sae_lens/saes/transcoder.py,sha256=BfLSbTYVNZh-ruGxseZiZJ_acEL6_7QyTdfqUr0lDOg,12156
-sae_lens/tokenization_and_batching.py,sha256=now7caLbU3p-iGokNwmqZDyIvxYoXgnG1uklhgiLZN4,4656
+sae_lens/tokenization_and_batching.py,sha256=jV7Rx5wHHcYMmexFhvbSk2q5R0gYBjtKoJKpowAgMEo,4665
 sae_lens/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sae_lens/training/activation_scaler.py,sha256=seEE-2Qd2JMHxqgnsNWPt-DGtYGZxWPnOwCGuVNSOtI,1719
 sae_lens/training/activations_store.py,sha256=2EUY2abqpT5El3T95sypM_JRDgiKL3VeT73U9SQIFGY,32903
 sae_lens/training/mixing_buffer.py,sha256=vDpYG5ZE70szDvBsRKcNHEES3h_WTKJ16qDYk5jPOVA,2015
 sae_lens/training/optim.py,sha256=TiI9nbffzXNsI8WjcIsqa2uheW6suxqL_KDDmWXobWI,5312
-sae_lens/training/sae_trainer.py,sha256=Jh5AyBGtfZjnprv9H3k0p_luWWnM7YFjlmHuO1W_J6U,15465
-sae_lens/training/types.py,sha256=qSjmGzXf3MLalygG0psnVjmhX_mpLmL47MQtZfe7qxg,81
+sae_lens/training/sae_trainer.py,sha256=il4Evf-c4F3Uf2n_v-AOItCasX-uPxYTzn_sZLvLkl0,15633
+sae_lens/training/types.py,sha256=1FpLx_Doda9vZpmfm-x1e8wGBYpyhe9Kpb_JuM5nIFM,90
 sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
 sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
 sae_lens/util.py,sha256=lW7fBn_b8quvRYlen9PUmB7km60YhKyjmuelB1f6KzQ,2253
-sae_lens-6.12.2.dist-info/METADATA,sha256=m8hF8tj-b70b5iAvN21ZDxOXzRHRxDmwpJclZzHqPw4,5318
-sae_lens-6.12.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-sae_lens-6.12.2.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
-sae_lens-6.12.2.dist-info/RECORD,,
+sae_lens-6.13.0.dist-info/METADATA,sha256=rqSlR_xjf3fqZga4OHpNtrhKzaA4tIrobj-e6yq8sbA,5318
+sae_lens-6.13.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+sae_lens-6.13.0.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
+sae_lens-6.13.0.dist-info/RECORD,,

{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sae_lens-6.12.2.dist-info → sae_lens-6.13.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sae-lens 6.12.2__py3-none-any.whl → 6.13.0__py3-none-any.whl

sae-lens 6.12.2py3-none-any.whl → 6.13.0py3-none-any.whl