PyPI - sae-lens - Versions diffs - 6.28.2__py3-none-any.whl → 6.32.1__py3-none-any.whl - Mend

sae-lens 6.28.2py3-none-any.whl → 6.32.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

sae_lens/__init__.py +14 -1
sae_lens/analysis/__init__.py +15 -0
sae_lens/analysis/compat.py +16 -0
sae_lens/analysis/hooked_sae_transformer.py +1 -1
sae_lens/analysis/sae_transformer_bridge.py +348 -0
sae_lens/config.py +9 -1
sae_lens/evals.py +2 -2
sae_lens/loading/pretrained_sae_loaders.py +11 -4
sae_lens/pretrained_saes.yaml +36 -0
sae_lens/saes/temporal_sae.py +1 -1
sae_lens/synthetic/__init__.py +6 -0
sae_lens/synthetic/activation_generator.py +197 -25
sae_lens/synthetic/correlation.py +217 -36
sae_lens/synthetic/feature_dictionary.py +11 -2
sae_lens/synthetic/hierarchy.py +314 -2
sae_lens/synthetic/training.py +16 -3
sae_lens/training/activation_scaler.py +3 -1
{sae_lens-6.28.2.dist-info → sae_lens-6.32.1.dist-info}/METADATA +2 -2
{sae_lens-6.28.2.dist-info → sae_lens-6.32.1.dist-info}/RECORD +21 -19
{sae_lens-6.28.2.dist-info → sae_lens-6.32.1.dist-info}/WHEEL +1 -1
{sae_lens-6.28.2.dist-info → sae_lens-6.32.1.dist-info}/licenses/LICENSE +0 -0

sae_lens/pretrained_saes.yaml CHANGED Viewed

@@ -4148,6 +4148,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_16k_l0_medium
     path: resid_post/layer_17_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-16k
   - id: layer_17_width_16k_l0_small
     path: resid_post/layer_17_width_16k_l0_small
     l0: 20
@@ -4166,6 +4167,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_262k_l0_medium
     path: resid_post/layer_17_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-262k
   - id: layer_17_width_262k_l0_medium_seed_1
     path: resid_post/layer_17_width_262k_l0_medium_seed_1
     l0: 60
@@ -4178,6 +4180,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_17_width_65k_l0_medium
     path: resid_post/layer_17_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/17-gemmascope-2-res-65k
   - id: layer_17_width_65k_l0_small
     path: resid_post/layer_17_width_65k_l0_small
     l0: 20
@@ -4187,6 +4190,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_16k_l0_medium
     path: resid_post/layer_22_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-16k
   - id: layer_22_width_16k_l0_small
     path: resid_post/layer_22_width_16k_l0_small
     l0: 20
@@ -4205,6 +4209,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_262k_l0_medium
     path: resid_post/layer_22_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-262k
   - id: layer_22_width_262k_l0_medium_seed_1
     path: resid_post/layer_22_width_262k_l0_medium_seed_1
     l0: 60
@@ -4217,6 +4222,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_22_width_65k_l0_medium
     path: resid_post/layer_22_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/22-gemmascope-2-res-65k
   - id: layer_22_width_65k_l0_small
     path: resid_post/layer_22_width_65k_l0_small
     l0: 20
@@ -4226,6 +4232,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_16k_l0_medium
     path: resid_post/layer_29_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-16k
   - id: layer_29_width_16k_l0_small
     path: resid_post/layer_29_width_16k_l0_small
     l0: 20
@@ -4244,6 +4251,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_262k_l0_medium
     path: resid_post/layer_29_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-262k
   - id: layer_29_width_262k_l0_medium_seed_1
     path: resid_post/layer_29_width_262k_l0_medium_seed_1
     l0: 60
@@ -4256,6 +4264,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_29_width_65k_l0_medium
     path: resid_post/layer_29_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-4b-it/29-gemmascope-2-res-65k
   - id: layer_29_width_65k_l0_small
     path: resid_post/layer_29_width_65k_l0_small
     l0: 20
@@ -4265,6 +4274,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_16k_l0_medium
     path: resid_post/layer_9_width_16k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-16k
   - id: layer_9_width_16k_l0_small
     path: resid_post/layer_9_width_16k_l0_small
     l0: 17
@@ -4283,6 +4293,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_262k_l0_medium
     path: resid_post/layer_9_width_262k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-262k
   - id: layer_9_width_262k_l0_medium_seed_1
     path: resid_post/layer_9_width_262k_l0_medium_seed_1
     l0: 53
@@ -4295,6 +4306,7 @@ gemma-scope-2-4b-it-res:
   - id: layer_9_width_65k_l0_medium
     path: resid_post/layer_9_width_65k_l0_medium
     l0: 53
+    neuronpedia: gemma-3-4b-it/9-gemmascope-2-res-65k
   - id: layer_9_width_65k_l0_small
     path: resid_post/layer_9_width_65k_l0_small
     l0: 17
@@ -14491,6 +14503,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_16k_l0_medium
     path: resid_post/layer_12_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-16k
   - id: layer_12_width_16k_l0_small
     path: resid_post/layer_12_width_16k_l0_small
     l0: 20
@@ -14509,6 +14522,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_262k_l0_medium
     path: resid_post/layer_12_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-262k
   - id: layer_12_width_262k_l0_medium_seed_1
     path: resid_post/layer_12_width_262k_l0_medium_seed_1
     l0: 60
@@ -14521,6 +14535,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_12_width_65k_l0_medium
     path: resid_post/layer_12_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/12-gemmascope-2-res-65k
   - id: layer_12_width_65k_l0_small
     path: resid_post/layer_12_width_65k_l0_small
     l0: 20
@@ -14530,6 +14545,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_16k_l0_medium
     path: resid_post/layer_15_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-16k
   - id: layer_15_width_16k_l0_small
     path: resid_post/layer_15_width_16k_l0_small
     l0: 20
@@ -14548,6 +14564,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_262k_l0_medium
     path: resid_post/layer_15_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-262k
   - id: layer_15_width_262k_l0_medium_seed_1
     path: resid_post/layer_15_width_262k_l0_medium_seed_1
     l0: 60
@@ -14560,6 +14577,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_15_width_65k_l0_medium
     path: resid_post/layer_15_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/15-gemmascope-2-res-65k
   - id: layer_15_width_65k_l0_small
     path: resid_post/layer_15_width_65k_l0_small
     l0: 20
@@ -14569,6 +14587,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_16k_l0_medium
     path: resid_post/layer_5_width_16k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-16k
   - id: layer_5_width_16k_l0_small
     path: resid_post/layer_5_width_16k_l0_small
     l0: 18
@@ -14587,6 +14606,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_262k_l0_medium
     path: resid_post/layer_5_width_262k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-262k
   - id: layer_5_width_262k_l0_medium_seed_1
     path: resid_post/layer_5_width_262k_l0_medium_seed_1
     l0: 55
@@ -14599,6 +14619,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_5_width_65k_l0_medium
     path: resid_post/layer_5_width_65k_l0_medium
     l0: 55
+    neuronpedia: gemma-3-270m-it/5-gemmascope-2-res-65k
   - id: layer_5_width_65k_l0_small
     path: resid_post/layer_5_width_65k_l0_small
     l0: 18
@@ -14608,6 +14629,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_16k_l0_medium
     path: resid_post/layer_9_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-16k
   - id: layer_9_width_16k_l0_small
     path: resid_post/layer_9_width_16k_l0_small
     l0: 20
@@ -14626,6 +14648,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_262k_l0_medium
     path: resid_post/layer_9_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-262k
   - id: layer_9_width_262k_l0_medium_seed_1
     path: resid_post/layer_9_width_262k_l0_medium_seed_1
     l0: 60
@@ -14638,6 +14661,7 @@ gemma-scope-2-270m-it-res:
   - id: layer_9_width_65k_l0_medium
     path: resid_post/layer_9_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-270m-it/9-gemmascope-2-res-65k
   - id: layer_9_width_65k_l0_small
     path: resid_post/layer_9_width_65k_l0_small
     l0: 20
@@ -18727,6 +18751,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_16k_l0_medium
     path: resid_post/layer_13_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-16k
   - id: layer_13_width_16k_l0_small
     path: resid_post/layer_13_width_16k_l0_small
     l0: 20
@@ -18745,6 +18770,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_262k_l0_medium
     path: resid_post/layer_13_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-262k
   - id: layer_13_width_262k_l0_medium_seed_1
     path: resid_post/layer_13_width_262k_l0_medium_seed_1
     l0: 60
@@ -18757,6 +18783,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_13_width_65k_l0_medium
     path: resid_post/layer_13_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/13-gemmascope-2-res-65k
   - id: layer_13_width_65k_l0_small
     path: resid_post/layer_13_width_65k_l0_small
     l0: 20
@@ -18766,6 +18793,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_16k_l0_medium
     path: resid_post/layer_17_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-16k
   - id: layer_17_width_16k_l0_small
     path: resid_post/layer_17_width_16k_l0_small
     l0: 20
@@ -18784,6 +18812,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_262k_l0_medium
     path: resid_post/layer_17_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-262k
   - id: layer_17_width_262k_l0_medium_seed_1
     path: resid_post/layer_17_width_262k_l0_medium_seed_1
     l0: 60
@@ -18796,6 +18825,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_17_width_65k_l0_medium
     path: resid_post/layer_17_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/17-gemmascope-2-res-65k
   - id: layer_17_width_65k_l0_small
     path: resid_post/layer_17_width_65k_l0_small
     l0: 20
@@ -18805,6 +18835,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_16k_l0_medium
     path: resid_post/layer_22_width_16k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-16k
   - id: layer_22_width_16k_l0_small
     path: resid_post/layer_22_width_16k_l0_small
     l0: 20
@@ -18823,6 +18854,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_262k_l0_medium
     path: resid_post/layer_22_width_262k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-262k
   - id: layer_22_width_262k_l0_medium_seed_1
     path: resid_post/layer_22_width_262k_l0_medium_seed_1
     l0: 60
@@ -18835,6 +18867,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_22_width_65k_l0_medium
     path: resid_post/layer_22_width_65k_l0_medium
     l0: 60
+    neuronpedia: gemma-3-1b-it/22-gemmascope-2-res-65k
   - id: layer_22_width_65k_l0_small
     path: resid_post/layer_22_width_65k_l0_small
     l0: 20
@@ -18844,6 +18877,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_16k_l0_medium
     path: resid_post/layer_7_width_16k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-16k
   - id: layer_7_width_16k_l0_small
     path: resid_post/layer_7_width_16k_l0_small
     l0: 18
@@ -18862,6 +18896,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_262k_l0_medium
     path: resid_post/layer_7_width_262k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-262k
   - id: layer_7_width_262k_l0_medium_seed_1
     path: resid_post/layer_7_width_262k_l0_medium_seed_1
     l0: 54
@@ -18874,6 +18909,7 @@ gemma-scope-2-1b-it-res:
   - id: layer_7_width_65k_l0_medium
     path: resid_post/layer_7_width_65k_l0_medium
     l0: 54
+    neuronpedia: gemma-3-1b-it/7-gemmascope-2-res-65k
   - id: layer_7_width_65k_l0_small
     path: resid_post/layer_7_width_65k_l0_small
     l0: 18

sae_lens/saes/temporal_sae.py CHANGED Viewed

@@ -4,7 +4,7 @@ TemporalSAE decomposes activations into:
 1. Predicted codes (from attention over context)
 2. Novel codes (sparse features of the residual)
-See: https://arxiv.org/abs/2410.04185
+See: https://arxiv.org/pdf/2511.01836
 """
 import math

sae_lens/synthetic/__init__.py CHANGED Viewed

@@ -17,11 +17,14 @@ from sae_lens.synthetic.activation_generator import (
     ActivationGenerator,
     ActivationsModifier,
     ActivationsModifierInput,
+    CorrelationMatrixInput,
 )
 from sae_lens.synthetic.correlation import (
+    LowRankCorrelationMatrix,
     create_correlation_matrix_from_correlations,
     generate_random_correlation_matrix,
     generate_random_correlations,
+    generate_random_low_rank_correlation_matrix,
 )
 from sae_lens.synthetic.evals import (
     SyntheticDataEvalResult,
@@ -66,6 +69,9 @@ __all__ = [
     "create_correlation_matrix_from_correlations",
     "generate_random_correlations",
     "generate_random_correlation_matrix",
+    "generate_random_low_rank_correlation_matrix",
+    "LowRankCorrelationMatrix",
+    "CorrelationMatrixInput",
     # Feature modifiers
     "ActivationsModifier",
     "ActivationsModifierInput",

sae_lens/synthetic/activation_generator.py CHANGED Viewed

@@ -2,17 +2,21 @@
 Functions for generating synthetic feature activations.
 """
+import math
 from collections.abc import Callable, Sequence
 import torch
-from scipy.stats import norm
 from torch import nn
 from torch.distributions import MultivariateNormal
+from sae_lens.synthetic.correlation import LowRankCorrelationMatrix
 from sae_lens.util import str_to_dtype
 ActivationsModifier = Callable[[torch.Tensor], torch.Tensor]
 ActivationsModifierInput = ActivationsModifier | Sequence[ActivationsModifier] | None
+CorrelationMatrixInput = (
+    torch.Tensor | LowRankCorrelationMatrix | tuple[torch.Tensor, torch.Tensor]
+)
 class ActivationGenerator(nn.Module):
@@ -28,7 +32,9 @@ class ActivationGenerator(nn.Module):
     mean_firing_magnitudes: torch.Tensor
     modify_activations: ActivationsModifier | None
     correlation_matrix: torch.Tensor | None
+    low_rank_correlation: tuple[torch.Tensor, torch.Tensor] | None
     correlation_thresholds: torch.Tensor | None
+    use_sparse_tensors: bool
     def __init__(
         self,
@@ -37,10 +43,37 @@ class ActivationGenerator(nn.Module):
         std_firing_magnitudes: torch.Tensor | float = 0.0,
         mean_firing_magnitudes: torch.Tensor | float = 1.0,
         modify_activations: ActivationsModifierInput = None,
-        correlation_matrix: torch.Tensor | None = None,
+        correlation_matrix: CorrelationMatrixInput | None = None,
         device: torch.device | str = "cpu",
         dtype: torch.dtype | str = "float32",
+        use_sparse_tensors: bool = False,
     ):
+        """
+        Create a new ActivationGenerator.
+        Args:
+            num_features: Number of features to generate activations for.
+            firing_probabilities: Probability of each feature firing. Can be a single
+                float (applied to all features) or a tensor of shape (num_features,).
+            std_firing_magnitudes: Standard deviation of firing magnitudes. Can be a
+                single float or a tensor of shape (num_features,). Defaults to 0.0
+                (deterministic magnitudes).
+            mean_firing_magnitudes: Mean firing magnitude when a feature fires. Can be
+                a single float or a tensor of shape (num_features,). Defaults to 1.0.
+            modify_activations: Optional function(s) to modify activations after
+                generation. Can be a single callable, a sequence of callables (applied
+                in order), or None. Useful for applying hierarchy constraints.
+            correlation_matrix: Optional correlation structure between features. Can be:
+                - A full correlation matrix tensor of shape (num_features, num_features)
+                - A LowRankCorrelationMatrix for memory-efficient large-scale correlations
+                - A tuple of (factor, diag) tensors representing low-rank structure
+            device: Device to place tensors on. Defaults to "cpu".
+            dtype: Data type for tensors. Defaults to "float32".
+            use_sparse_tensors: If True, return sparse COO tensors from sample().
+                Only recommended when using massive numbers of features. Defaults to False.
+        """
         super().__init__()
         self.num_features = num_features
         self.firing_probabilities = _to_tensor(
@@ -54,14 +87,34 @@ class ActivationGenerator(nn.Module):
         )
         self.modify_activations = _normalize_modifiers(modify_activations)
         self.correlation_thresholds = None
+        self.correlation_matrix = None
+        self.low_rank_correlation = None
+        self.use_sparse_tensors = use_sparse_tensors
         if correlation_matrix is not None:
-            _validate_correlation_matrix(correlation_matrix, num_features)
-            self.correlation_thresholds = torch.tensor(
-                [norm.ppf(1 - p.item()) for p in self.firing_probabilities],
-                device=device,
-                dtype=self.firing_probabilities.dtype,
+            if isinstance(correlation_matrix, torch.Tensor):
+                # Full correlation matrix
+                _validate_correlation_matrix(correlation_matrix, num_features)
+                self.correlation_matrix = correlation_matrix
+            else:
+                # Low-rank correlation (tuple or LowRankCorrelationMatrix)
+                correlation_factor, correlation_diag = (
+                    correlation_matrix[0],
+                    correlation_matrix[1],
+                )
+                _validate_low_rank_correlation(
+                    correlation_factor, correlation_diag, num_features
+                )
+                # Pre-compute sqrt for efficiency (used every sample call)
+                self.low_rank_correlation = (
+                    correlation_factor,
+                    correlation_diag.sqrt(),
+                )
+            # Vectorized inverse normal CDF: norm.ppf(1-p) = sqrt(2) * erfinv(1 - 2*p)
+            self.correlation_thresholds = math.sqrt(2) * torch.erfinv(
+                1 - 2 * self.firing_probabilities
             )
-        self.correlation_matrix = correlation_matrix
     @torch.no_grad()
     def sample(self, batch_size: int) -> torch.Tensor:
@@ -84,30 +137,74 @@ class ActivationGenerator(nn.Module):
         if self.correlation_matrix is not None:
             assert self.correlation_thresholds is not None
-            firing_features = _generate_correlated_features(
+            firing_indices = _generate_correlated_features(
                 batch_size,
                 self.correlation_matrix,
                 self.correlation_thresholds,
                 device,
             )
+        elif self.low_rank_correlation is not None:
+            assert self.correlation_thresholds is not None
+            firing_indices = _generate_low_rank_correlated_features(
+                batch_size,
+                self.low_rank_correlation[0],
+                self.low_rank_correlation[1],
+                self.correlation_thresholds,
+                device,
+            )
         else:
-            firing_features = torch.bernoulli(
+            firing_indices = torch.bernoulli(
                 self.firing_probabilities.unsqueeze(0).expand(batch_size, -1)
+            ).nonzero(as_tuple=True)
+        # Compute activations only at firing positions (sparse optimization)
+        feature_indices = firing_indices[1]
+        num_firing = feature_indices.shape[0]
+        mean_at_firing = self.mean_firing_magnitudes[feature_indices]
+        std_at_firing = self.std_firing_magnitudes[feature_indices]
+        random_deltas = (
+            torch.randn(
+                num_firing, device=device, dtype=self.mean_firing_magnitudes.dtype
             )
-        firing_magnitude_delta = torch.normal(
-            torch.zeros_like(self.firing_probabilities)
-            .unsqueeze(0)
-            .expand(batch_size, -1),
-            self.std_firing_magnitudes.unsqueeze(0).expand(batch_size, -1),
+            * std_at_firing
         )
-        firing_magnitude_delta[firing_features == 0] = 0
-        feature_activations = (
-            firing_features * self.mean_firing_magnitudes + firing_magnitude_delta
-        ).relu()
+        activations_at_firing = (mean_at_firing + random_deltas).relu()
+        if self.use_sparse_tensors:
+            # Return sparse COO tensor
+            indices = torch.stack(firing_indices)  # [2, nnz]
+            feature_activations = torch.sparse_coo_tensor(
+                indices,
+                activations_at_firing,
+                size=(batch_size, self.num_features),
+                device=device,
+                dtype=self.mean_firing_magnitudes.dtype,
+            )
+        else:
+            # Dense tensor path
+            feature_activations = torch.zeros(
+                batch_size,
+                self.num_features,
+                device=device,
+                dtype=self.mean_firing_magnitudes.dtype,
+            )
+            feature_activations[firing_indices] = activations_at_firing
         if self.modify_activations is not None:
-            feature_activations = self.modify_activations(feature_activations).relu()
+            feature_activations = self.modify_activations(feature_activations)
+            if feature_activations.is_sparse:
+                # Apply relu to sparse values
+                feature_activations = feature_activations.coalesce()
+                feature_activations = torch.sparse_coo_tensor(
+                    feature_activations.indices(),
+                    feature_activations.values().relu(),
+                    feature_activations.shape,
+                    device=feature_activations.device,
+                    dtype=feature_activations.dtype,
+                )
+            else:
+                feature_activations = feature_activations.relu()
         return feature_activations
     def forward(self, batch_size: int) -> torch.Tensor:
@@ -119,7 +216,7 @@ def _generate_correlated_features(
     correlation_matrix: torch.Tensor,
     thresholds: torch.Tensor,
     device: torch.device,
-) -> torch.Tensor:
+) -> tuple[torch.Tensor, torch.Tensor]:
     """
     Generate correlated binary features using multivariate Gaussian sampling.
@@ -133,7 +230,7 @@ def _generate_correlated_features(
         device: Device to generate samples on
     Returns:
-        Binary feature matrix of shape [batch_size, num_features]
+        Tuple of (row_indices, col_indices) for firing features
     """
     num_features = correlation_matrix.shape[0]
@@ -143,7 +240,49 @@ def _generate_correlated_features(
     )
     gaussian_samples = mvn.sample((batch_size,))
-    return (gaussian_samples > thresholds.unsqueeze(0)).float()
+    indices = (gaussian_samples > thresholds.unsqueeze(0)).nonzero(as_tuple=True)
+    return indices[0], indices[1]
+def _generate_low_rank_correlated_features(
+    batch_size: int,
+    correlation_factor: torch.Tensor,
+    cov_diag_sqrt: torch.Tensor,
+    thresholds: torch.Tensor,
+    device: torch.device,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Generate correlated binary features using low-rank multivariate Gaussian sampling.
+    Uses the Gaussian copula approach with a low-rank covariance structure for scalability.
+    The covariance is represented as: cov = factor @ factor.T + diag(diag_term)
+    Args:
+        batch_size: Number of samples to generate
+        correlation_factor: Factor matrix of shape (num_features, rank)
+        cov_diag_sqrt: Pre-computed sqrt of diagonal term, shape (num_features,)
+        thresholds: Pre-computed thresholds for each feature (from inverse normal CDF)
+        device: Device to generate samples on
+    Returns:
+        Tuple of (row_indices, col_indices) for firing features
+    """
+    # Manual low-rank MVN sampling to enable autocast for the expensive matmul
+    # samples = eps @ cov_factor.T + eta * sqrt(cov_diag)
+    # where eps ~ N(0, I_rank) and eta ~ N(0, I_n)
+    num_features, rank = correlation_factor.shape
+    # Generate random samples in float32 for numerical stability
+    eps = torch.randn(batch_size, rank, device=device, dtype=correlation_factor.dtype)
+    eta = torch.randn(
+        batch_size, num_features, device=device, dtype=cov_diag_sqrt.dtype
+    )
+    gaussian_samples = eps @ correlation_factor.T + eta * cov_diag_sqrt
+    indices = (gaussian_samples > thresholds.unsqueeze(0)).nonzero(as_tuple=True)
+    return indices[0], indices[1]
 def _to_tensor(
@@ -194,7 +333,7 @@ def _validate_correlation_matrix(
     Args:
         correlation_matrix: The matrix to validate
-        num_features: Expected number of features (matrix should be [num_features, num_features])
+        num_features: Expected number of features (matrix should be (num_features, num_features))
     Raises:
         ValueError: If the matrix has incorrect shape, non-unit diagonal, or is not positive definite
@@ -214,3 +353,36 @@ def _validate_correlation_matrix(
         torch.linalg.cholesky(correlation_matrix)
     except RuntimeError as e:
         raise ValueError("Correlation matrix must be positive definite") from e
+def _validate_low_rank_correlation(
+    correlation_factor: torch.Tensor,
+    correlation_diag: torch.Tensor,
+    num_features: int,
+) -> None:
+    """Validate that low-rank correlation parameters have correct properties.
+    Args:
+        correlation_factor: Factor matrix of shape (num_features, rank)
+        correlation_diag: Diagonal term of shape (num_features,)
+        num_features: Expected number of features
+    Raises:
+        ValueError: If shapes are incorrect or diagonal terms are not positive
+    """
+    if correlation_factor.ndim != 2:
+        raise ValueError(
+            f"correlation_factor must be 2D, got {correlation_factor.ndim}D"
+        )
+    if correlation_factor.shape[0] != num_features:
+        raise ValueError(
+            f"correlation_factor must have shape ({num_features}, rank), "
+            f"got {tuple(correlation_factor.shape)}"
+        )
+    if correlation_diag.shape != (num_features,):
+        raise ValueError(
+            f"correlation_diag must have shape ({num_features},), "
+            f"got {tuple(correlation_diag.shape)}"
+        )
+    if torch.any(correlation_diag <= 0):
+        raise ValueError("correlation_diag must have all positive values")

sae-lens 6.28.2__py3-none-any.whl → 6.32.1__py3-none-any.whl

sae-lens 6.28.2py3-none-any.whl → 6.32.1py3-none-any.whl