PyPI - sae-lens - Versions diffs - 6.0.0rc1__py3-none-any.whl → 6.0.0rc2__py3-none-any.whl - Mend

sae-lens 6.0.0rc1py3-none-any.whl → 6.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sae_lens/__init__.py +50 -16
sae_lens/analysis/hooked_sae_transformer.py +10 -10
sae_lens/analysis/neuronpedia_integration.py +13 -11
sae_lens/cache_activations_runner.py +2 -1
sae_lens/config.py +59 -231
sae_lens/constants.py +18 -0
sae_lens/evals.py +16 -13
sae_lens/loading/pretrained_sae_loaders.py +36 -3
sae_lens/registry.py +49 -0
sae_lens/sae_training_runner.py +22 -21
sae_lens/saes/__init__.py +48 -0
sae_lens/saes/gated_sae.py +70 -59
sae_lens/saes/jumprelu_sae.py +58 -72
sae_lens/saes/sae.py +250 -272
sae_lens/saes/standard_sae.py +75 -57
sae_lens/saes/topk_sae.py +72 -83
sae_lens/training/activations_store.py +31 -15
sae_lens/training/optim.py +60 -36
sae_lens/training/sae_trainer.py +44 -69
sae_lens/training/upload_saes_to_huggingface.py +11 -5
sae_lens/util.py +28 -0
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc2.dist-info}/METADATA +1 -1
sae_lens-6.0.0rc2.dist-info/RECORD +35 -0
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc2.dist-info}/WHEEL +1 -1
sae_lens/regsitry.py +0 -34
sae_lens-6.0.0rc1.dist-info/RECORD +0 -32
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc2.dist-info}/LICENSE +0 -0

sae_lens/saes/standard_sae.py CHANGED Viewed

@@ -1,13 +1,37 @@
+from dataclasses import dataclass
+from typing import Any
 import numpy as np
 import torch
 from jaxtyping import Float
 from numpy.typing import NDArray
 from torch import nn
+from typing_extensions import override
+from sae_lens.saes.sae import (
+    SAE,
+    SAEConfig,
+    TrainCoefficientConfig,
+    TrainingSAE,
+    TrainingSAEConfig,
+    TrainStepInput,
+)
+from sae_lens.util import filter_valid_dataclass_fields
+@dataclass
+class StandardSAEConfig(SAEConfig):
+    """
+    Configuration class for a StandardSAE.
+    """
-from sae_lens.saes.sae import SAE, SAEConfig, TrainingSAE, TrainStepInput
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "standard"
-class StandardSAE(SAE):
+class StandardSAE(SAE[StandardSAEConfig]):
     """
     StandardSAE is an inference-only implementation of a Sparse Autoencoder (SAE)
     using a simple linear encoder and decoder.
@@ -23,31 +47,14 @@ class StandardSAE(SAE):
     b_enc: nn.Parameter
-    def __init__(self, cfg: SAEConfig, use_error_term: bool = False):
+    def __init__(self, cfg: StandardSAEConfig, use_error_term: bool = False):
         super().__init__(cfg, use_error_term)
+    @override
     def initialize_weights(self) -> None:
         # Initialize encoder weights and bias.
-        self.b_enc = nn.Parameter(
-            torch.zeros(self.cfg.d_sae, dtype=self.dtype, device=self.device)
-        )
-        self.b_dec = nn.Parameter(
-            torch.zeros(self.cfg.d_in, dtype=self.dtype, device=self.device)
-        )
-        # Use Kaiming Uniform for W_enc
-        w_enc_data = torch.empty(
-            self.cfg.d_in, self.cfg.d_sae, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_enc_data)
-        self.W_enc = nn.Parameter(w_enc_data)
-        # Use Kaiming Uniform for W_dec
-        w_dec_data = torch.empty(
-            self.cfg.d_sae, self.cfg.d_in, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_dec_data)
-        self.W_dec = nn.Parameter(w_dec_data)
+        super().initialize_weights()
+        _init_weights_standard(self)
     def encode(
         self, x: Float[torch.Tensor, "... d_in"]
@@ -70,11 +77,9 @@ class StandardSAE(SAE):
         Decode the feature activations back to the input space.
         Now, if hook_z reshaping is turned on, we reverse the flattening.
         """
-        # 1) apply finetuning scaling if configured.
-        scaled_features = self.apply_finetuning_scaling_factor(feature_acts)
-        # 2) linear transform
-        sae_out_pre = scaled_features @ self.W_dec + self.b_dec
-        # 3) hook reconstruction
+        # 1) linear transform
+        sae_out_pre = feature_acts @ self.W_dec + self.b_dec
+        # 2) hook reconstruction
         sae_out_pre = self.hook_sae_recons(sae_out_pre)
         # 4) optional out-normalization (e.g. constant_norm_rescale or layer_norm)
         sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
@@ -82,7 +87,23 @@ class StandardSAE(SAE):
         return self.reshape_fn_out(sae_out_pre, self.d_head)
-class StandardTrainingSAE(TrainingSAE):
+@dataclass
+class StandardTrainingSAEConfig(TrainingSAEConfig):
+    """
+    Configuration class for training a StandardTrainingSAE.
+    """
+    l1_coefficient: float = 1.0
+    lp_norm: float = 1.0
+    l1_warm_up_steps: int = 0
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "standard"
+class StandardTrainingSAE(TrainingSAE[StandardTrainingSAEConfig]):
     """
     StandardTrainingSAE is a concrete implementation of BaseTrainingSAE using the "standard" SAE architecture.
     It implements:
@@ -96,31 +117,17 @@ class StandardTrainingSAE(TrainingSAE):
     b_enc: nn.Parameter
     def initialize_weights(self) -> None:
-        # Basic init
-        # In Python MRO, this calls StandardSAE.initialize_weights()
-        StandardSAE.initialize_weights(self)  # type: ignore
-        # Complex init logic from original TrainingSAE
-        if self.cfg.decoder_orthogonal_init:
-            self.W_dec.data = nn.init.orthogonal_(self.W_dec.data.T).T
-        elif self.cfg.decoder_heuristic_init:
-            self.W_dec.data = torch.rand(  # Changed from Parameter to data assignment
-                self.cfg.d_sae, self.cfg.d_in, dtype=self.dtype, device=self.device
-            )
-            self.initialize_decoder_norm_constant_norm()
-        if self.cfg.init_encoder_as_decoder_transpose:
-            self.W_enc.data = self.W_dec.data.T.clone().contiguous()  # type: ignore
+        super().initialize_weights()
+        _init_weights_standard(self)
-        if self.cfg.normalize_sae_decoder:
-            with torch.no_grad():
-                self.set_decoder_norm_to_unit_norm()
-    @torch.no_grad()
-    def initialize_decoder_norm_constant_norm(self, norm: float = 0.1):
-        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)  # type: ignore
-        self.W_dec.data *= norm
+    @override
+    def get_coefficients(self) -> dict[str, float | TrainCoefficientConfig]:
+        return {
+            "l1": TrainCoefficientConfig(
+                value=self.cfg.l1_coefficient,
+                warm_up_steps=self.cfg.l1_warm_up_steps,
+            ),
+        }
     def encode_with_hidden_pre(
         self, x: Float[torch.Tensor, "... d_in"]
@@ -148,13 +155,11 @@ class StandardTrainingSAE(TrainingSAE):
         sae_out: torch.Tensor,
     ) -> dict[str, torch.Tensor]:
         # The "standard" auxiliary loss is a sparsity penalty on the feature activations
-        weighted_feature_acts = feature_acts
-        if self.cfg.scale_sparsity_penalty_by_decoder_norm:
-            weighted_feature_acts = feature_acts * self.W_dec.norm(dim=1)
+        weighted_feature_acts = feature_acts * self.W_dec.norm(dim=1)
         # Compute the p-norm (set by cfg.lp_norm) over the feature dimension
         sparsity = weighted_feature_acts.norm(p=self.cfg.lp_norm, dim=-1)
-        l1_loss = (step_input.current_l1_coefficient * sparsity).mean()
+        l1_loss = (step_input.coefficients["l1"] * sparsity).mean()
         return {"l1_loss": l1_loss}
@@ -165,3 +170,16 @@ class StandardTrainingSAE(TrainingSAE):
             **super().log_histograms(),
             "weights/b_e": b_e_dist,
         }
+    def to_inference_config_dict(self) -> dict[str, Any]:
+        return filter_valid_dataclass_fields(
+            self.cfg.to_dict(), StandardSAEConfig, ["architecture"]
+        )
+def _init_weights_standard(
+    sae: SAE[StandardSAEConfig] | TrainingSAE[StandardTrainingSAEConfig],
+) -> None:
+    sae.b_enc = nn.Parameter(
+        torch.zeros(sae.cfg.d_sae, dtype=sae.dtype, device=sae.device)
+    )

sae_lens/saes/topk_sae.py CHANGED Viewed

@@ -1,18 +1,22 @@
 """Inference-only TopKSAE variant, similar in spirit to StandardSAE but using a TopK-based activation."""
-from typing import Callable
+from dataclasses import dataclass
+from typing import Any, Callable
 import torch
 from jaxtyping import Float
 from torch import nn
+from typing_extensions import override
 from sae_lens.saes.sae import (
     SAE,
     SAEConfig,
+    TrainCoefficientConfig,
     TrainingSAE,
     TrainingSAEConfig,
     TrainStepInput,
 )
+from sae_lens.util import filter_valid_dataclass_fields
 class TopK(nn.Module):
@@ -45,14 +49,30 @@ class TopK(nn.Module):
         return result
-class TopKSAE(SAE):
+@dataclass
+class TopKSAEConfig(SAEConfig):
+    """
+    Configuration class for a TopKSAE.
+    """
+    k: int = 100
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "topk"
+class TopKSAE(SAE[TopKSAEConfig]):
     """
     An inference-only sparse autoencoder using a "topk" activation function.
     It uses linear encoder and decoder layers, applying the TopK activation
     to the hidden pre-activation in its encode step.
     """
-    def __init__(self, cfg: SAEConfig, use_error_term: bool = False):
+    b_enc: nn.Parameter
+    def __init__(self, cfg: TopKSAEConfig, use_error_term: bool = False):
         """
         Args:
             cfg: SAEConfig defining model size and behavior.
@@ -60,38 +80,11 @@ class TopKSAE(SAE):
         """
         super().__init__(cfg, use_error_term)
-        if self.cfg.activation_fn != "topk":
-            raise ValueError("TopKSAE must use a TopK activation function.")
+    @override
     def initialize_weights(self) -> None:
-        """
-        Initializes weights and biases for encoder/decoder similarly to the standard SAE,
-        that is:
-          - b_enc, b_dec are zero-initialized
-          - W_enc, W_dec are Kaiming Uniform
-        """
-        # encoder bias
-        self.b_enc = nn.Parameter(
-            torch.zeros(self.cfg.d_sae, dtype=self.dtype, device=self.device)
-        )
-        # decoder bias
-        self.b_dec = nn.Parameter(
-            torch.zeros(self.cfg.d_in, dtype=self.dtype, device=self.device)
-        )
-        # encoder weight
-        w_enc_data = torch.empty(
-            self.cfg.d_in, self.cfg.d_sae, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_enc_data)
-        self.W_enc = nn.Parameter(w_enc_data)
-        # decoder weight
-        w_dec_data = torch.empty(
-            self.cfg.d_sae, self.cfg.d_in, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_dec_data)
-        self.W_dec = nn.Parameter(w_dec_data)
+        # Initialize encoder weights and bias.
+        super().initialize_weights()
+        _init_weights_topk(self)
     def encode(
         self, x: Float[torch.Tensor, "... d_in"]
@@ -114,28 +107,31 @@ class TopKSAE(SAE):
         Applies optional finetuning scaling, hooking to recons, out normalization,
         and optional head reshaping.
         """
-        scaled_features = self.apply_finetuning_scaling_factor(feature_acts)
-        sae_out_pre = scaled_features @ self.W_dec + self.b_dec
+        sae_out_pre = feature_acts @ self.W_dec + self.b_dec
         sae_out_pre = self.hook_sae_recons(sae_out_pre)
         sae_out_pre = self.run_time_activation_norm_fn_out(sae_out_pre)
         return self.reshape_fn_out(sae_out_pre, self.d_head)
-    def _get_activation_fn(self) -> Callable[[torch.Tensor], torch.Tensor]:
-        if self.cfg.activation_fn == "topk":
-            if "k" not in self.cfg.activation_fn_kwargs:
-                raise ValueError("TopK activation function requires a k value.")
-            k = self.cfg.activation_fn_kwargs.get(
-                "k", 1
-            )  # Default k to 1 if not provided
-            postact_fn = self.cfg.activation_fn_kwargs.get(
-                "postact_fn", nn.ReLU()
-            )  # Default post-activation to ReLU if not provided
-            return TopK(k, postact_fn)
-        # Otherwise, return the "standard" handling from BaseSAE
-        return super()._get_activation_fn()
-class TopKTrainingSAE(TrainingSAE):
+    @override
+    def get_activation_fn(self) -> Callable[[torch.Tensor], torch.Tensor]:
+        return TopK(self.cfg.k)
+@dataclass
+class TopKTrainingSAEConfig(TrainingSAEConfig):
+    """
+    Configuration class for training a TopKTrainingSAE.
+    """
+    k: int = 100
+    @override
+    @classmethod
+    def architecture(cls) -> str:
+        return "topk"
+class TopKTrainingSAE(TrainingSAE[TopKTrainingSAEConfig]):
     """
     TopK variant with training functionality. Injects noise during training, optionally
     calculates a topk-related auxiliary loss, etc.
@@ -143,32 +139,13 @@ class TopKTrainingSAE(TrainingSAE):
     b_enc: nn.Parameter
-    def __init__(self, cfg: TrainingSAEConfig, use_error_term: bool = False):
+    def __init__(self, cfg: TopKTrainingSAEConfig, use_error_term: bool = False):
         super().__init__(cfg, use_error_term)
-        if self.cfg.activation_fn != "topk":
-            raise ValueError("TopKSAE must use a TopK activation function.")
+    @override
     def initialize_weights(self) -> None:
-        """Very similar to TopKSAE, using zero biases + Kaiming Uniform weights."""
-        self.b_enc = nn.Parameter(
-            torch.zeros(self.cfg.d_sae, dtype=self.dtype, device=self.device)
-        )
-        self.b_dec = nn.Parameter(
-            torch.zeros(self.cfg.d_in, dtype=self.dtype, device=self.device)
-        )
-        w_enc_data = torch.empty(
-            self.cfg.d_in, self.cfg.d_sae, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_enc_data)
-        self.W_enc = nn.Parameter(w_enc_data)
-        w_dec_data = torch.empty(
-            self.cfg.d_sae, self.cfg.d_in, dtype=self.dtype, device=self.device
-        )
-        nn.init.kaiming_uniform_(w_dec_data)
-        self.W_dec = nn.Parameter(w_dec_data)
+        super().initialize_weights()
+        _init_weights_topk(self)
     def encode_with_hidden_pre(
         self, x: Float[torch.Tensor, "... d_in"]
@@ -207,14 +184,13 @@ class TopKTrainingSAE(TrainingSAE):
         )
         return {"auxiliary_reconstruction_loss": topk_loss}
-    def _get_activation_fn(self):
-        if self.cfg.activation_fn == "topk":
-            if "k" not in self.cfg.activation_fn_kwargs:
-                raise ValueError("TopK activation function requires a k value.")
-            k = self.cfg.activation_fn_kwargs.get("k", 1)
-            postact_fn = self.cfg.activation_fn_kwargs.get("postact_fn", nn.ReLU())
-            return TopK(k, postact_fn)
-        return super()._get_activation_fn()
+    @override
+    def get_activation_fn(self) -> Callable[[torch.Tensor], torch.Tensor]:
+        return TopK(self.cfg.k)
+    @override
+    def get_coefficients(self) -> dict[str, TrainCoefficientConfig | float]:
+        return {}
     def calculate_topk_aux_loss(
         self,
@@ -288,6 +264,11 @@ class TopKTrainingSAE(TrainingSAE):
         return auxk_acts
+    def to_inference_config_dict(self) -> dict[str, Any]:
+        return filter_valid_dataclass_fields(
+            self.cfg.to_dict(), TopKSAEConfig, ["architecture"]
+        )
 def _calculate_topk_aux_acts(
     k_aux: int,
@@ -303,3 +284,11 @@ def _calculate_topk_aux_acts(
     auxk_acts.scatter_(-1, auxk_topk.indices, auxk_topk.values)
     # Set activations to zero for all but top k_aux dead latents
     return auxk_acts
+def _init_weights_topk(
+    sae: SAE[TopKSAEConfig] | TrainingSAE[TopKTrainingSAEConfig],
+) -> None:
+    sae.b_enc = nn.Parameter(
+        torch.zeros(sae.cfg.d_sae, dtype=sae.dtype, device=sae.device)
+    )

sae_lens/training/activations_store.py CHANGED Viewed

@@ -23,12 +23,12 @@ from transformers import AutoTokenizer, PreTrainedTokenizerBase
 from sae_lens import logger
 from sae_lens.config import (
-    DTYPE_MAP,
     CacheActivationsRunnerConfig,
     HfDataset,
     LanguageModelSAERunnerConfig,
 )
-from sae_lens.saes.sae import SAE
+from sae_lens.constants import DTYPE_MAP
+from sae_lens.saes.sae import SAE, T_SAE_CONFIG, T_TRAINING_SAE_CONFIG
 from sae_lens.tokenization_and_batching import concat_and_batch_sequences
@@ -91,7 +91,8 @@ class ActivationsStore:
     def from_config(
         cls,
         model: HookedRootModule,
-        cfg: LanguageModelSAERunnerConfig | CacheActivationsRunnerConfig,
+        cfg: LanguageModelSAERunnerConfig[T_TRAINING_SAE_CONFIG]
+        | CacheActivationsRunnerConfig,
         override_dataset: HfDataset | None = None,
     ) -> ActivationsStore:
         if isinstance(cfg, CacheActivationsRunnerConfig):
@@ -128,13 +129,15 @@ class ActivationsStore:
             hook_layer=cfg.hook_layer,
             hook_head_index=cfg.hook_head_index,
             context_size=cfg.context_size,
-            d_in=cfg.d_in,
+            d_in=cfg.d_in
+            if isinstance(cfg, CacheActivationsRunnerConfig)
+            else cfg.sae.d_in,
             n_batches_in_buffer=cfg.n_batches_in_buffer,
             total_training_tokens=cfg.training_tokens,
             store_batch_size_prompts=cfg.store_batch_size_prompts,
             train_batch_size_tokens=cfg.train_batch_size_tokens,
             prepend_bos=cfg.prepend_bos,
-            normalize_activations=cfg.normalize_activations,
+            normalize_activations=cfg.sae.normalize_activations,
             device=device,
             dtype=cfg.dtype,
             cached_activations_path=cached_activations_path,
@@ -149,9 +152,10 @@ class ActivationsStore:
     def from_sae(
         cls,
         model: HookedRootModule,
-        sae: SAE,
+        sae: SAE[T_SAE_CONFIG],
+        dataset: HfDataset | str,
+        dataset_trust_remote_code: bool = False,
         context_size: int | None = None,
-        dataset: HfDataset | str | None = None,
         streaming: bool = True,
         store_batch_size_prompts: int = 8,
         n_batches_in_buffer: int = 8,
@@ -159,25 +163,37 @@ class ActivationsStore:
         total_tokens: int = 10**9,
         device: str = "cpu",
     ) -> ActivationsStore:
+        if sae.cfg.metadata.hook_name is None:
+            raise ValueError("hook_name is required")
+        if sae.cfg.metadata.hook_layer is None:
+            raise ValueError("hook_layer is required")
+        if sae.cfg.metadata.hook_head_index is None:
+            raise ValueError("hook_head_index is required")
+        if sae.cfg.metadata.context_size is None:
+            raise ValueError("context_size is required")
+        if sae.cfg.metadata.prepend_bos is None:
+            raise ValueError("prepend_bos is required")
         return cls(
             model=model,
-            dataset=sae.cfg.dataset_path if dataset is None else dataset,
+            dataset=dataset,
             d_in=sae.cfg.d_in,
-            hook_name=sae.cfg.hook_name,
-            hook_layer=sae.cfg.hook_layer,
-            hook_head_index=sae.cfg.hook_head_index,
-            context_size=sae.cfg.context_size if context_size is None else context_size,
-            prepend_bos=sae.cfg.prepend_bos,
+            hook_name=sae.cfg.metadata.hook_name,
+            hook_layer=sae.cfg.metadata.hook_layer,
+            hook_head_index=sae.cfg.metadata.hook_head_index,
+            context_size=sae.cfg.metadata.context_size
+            if context_size is None
+            else context_size,
+            prepend_bos=sae.cfg.metadata.prepend_bos,
             streaming=streaming,
             store_batch_size_prompts=store_batch_size_prompts,
             train_batch_size_tokens=train_batch_size_tokens,
             n_batches_in_buffer=n_batches_in_buffer,
             total_training_tokens=total_tokens,
             normalize_activations=sae.cfg.normalize_activations,
-            dataset_trust_remote_code=sae.cfg.dataset_trust_remote_code,
+            dataset_trust_remote_code=dataset_trust_remote_code,
             dtype=sae.cfg.dtype,
             device=torch.device(device),
-            seqpos_slice=sae.cfg.seqpos_slice or (None,),
+            seqpos_slice=sae.cfg.metadata.seqpos_slice or (None,),
         )
     def __init__(

sae_lens/training/optim.py CHANGED Viewed

@@ -101,61 +101,85 @@ def _get_main_lr_scheduler(
     raise ValueError(f"Unsupported scheduler: {scheduler_name}")
-class L1Scheduler:
+class CoefficientScheduler:
+    """Linearly warms up a scalar value from 0.0 to a final value."""
     def __init__(
         self,
-        l1_warm_up_steps: float,
-        total_steps: int,
-        final_l1_coefficient: float,
+        warm_up_steps: float,
+        final_value: float,
     ):
-        self.l1_warmup_steps = l1_warm_up_steps
-        # assume using warm-up
-        if self.l1_warmup_steps != 0:
-            self.current_l1_coefficient = 0.0
-        else:
-            self.current_l1_coefficient = final_l1_coefficient
-        self.final_l1_coefficient = final_l1_coefficient
+        self.warm_up_steps = warm_up_steps
+        self.final_value = final_value
         self.current_step = 0
-        self.total_steps = total_steps
-        if not isinstance(self.final_l1_coefficient, (float, int)):
+        if not isinstance(self.final_value, (float, int)):
             raise TypeError(
-                f"final_l1_coefficient must be float or int, got {type(self.final_l1_coefficient)}."
+                f"final_value must be float or int, got {type(self.final_value)}."
             )
+        # Initialize current_value based on whether warm-up is used
+        if self.warm_up_steps > 0:
+            self.current_value = 0.0
+        else:
+            self.current_value = self.final_value
     def __repr__(self) -> str:
         return (
-            f"L1Scheduler(final_l1_value={self.final_l1_coefficient}, "
-            f"l1_warmup_steps={self.l1_warmup_steps}, "
-            f"total_steps={self.total_steps})"
+            f"{self.__class__.__name__}(final_value={self.final_value}, "
+            f"warm_up_steps={self.warm_up_steps})"
         )
-    def step(self):
+    def step(self) -> float:
         """
-        Updates the l1 coefficient of the sparse autoencoder.
+        Updates the scalar value based on the current step.
+        Returns:
+            The current scalar value after the step.
         """
-        step = self.current_step
-        if step < self.l1_warmup_steps:
-            self.current_l1_coefficient = self.final_l1_coefficient * (
-                (1 + step) / self.l1_warmup_steps
-            )  # type: ignore
+        if self.current_step < self.warm_up_steps:
+            self.current_value = self.final_value * (
+                (self.current_step + 1) / self.warm_up_steps
+            )
         else:
-            self.current_l1_coefficient = self.final_l1_coefficient  # type: ignore
+            # Ensure the value stays at final_value after warm-up
+            self.current_value = self.final_value
         self.current_step += 1
+        return self.current_value
-    def state_dict(self):
-        """State dict for serializing as part of an SAETrainContext."""
+    @property
+    def value(self) -> float:
+        """Returns the current scalar value."""
+        return self.current_value
+    def state_dict(self) -> dict[str, Any]:
+        """State dict for serialization."""
         return {
-            "l1_warmup_steps": self.l1_warmup_steps,
-            "total_steps": self.total_steps,
-            "current_l1_coefficient": self.current_l1_coefficient,
-            "final_l1_coefficient": self.final_l1_coefficient,
+            "warm_up_steps": self.warm_up_steps,
+            "final_value": self.final_value,
             "current_step": self.current_step,
+            "current_value": self.current_value,
         }
     def load_state_dict(self, state_dict: dict[str, Any]):
-        """Loads all state apart from attached SAE."""
-        for k in state_dict:
-            setattr(self, k, state_dict[k])
+        """Loads the scheduler state."""
+        self.warm_up_steps = state_dict["warm_up_steps"]
+        self.final_value = state_dict["final_value"]
+        self.current_step = state_dict["current_step"]
+        # Maintain consistency: re-calculate current_value based on loaded step
+        # This handles resuming correctly if stopped mid-warmup.
+        if self.current_step <= self.warm_up_steps and self.warm_up_steps > 0:
+            # Use max(0, ...) to handle case where current_step might be loaded as -1 or similar before first step
+            step_for_calc = max(0, self.current_step)
+            # Recalculate based on the step *before* the one about to be taken
+            # Or simply use the saved current_value if available and consistent
+            if "current_value" in state_dict:
+                self.current_value = state_dict["current_value"]
+            else:  # Legacy state dicts might not have current_value
+                self.current_value = self.final_value * (
+                    step_for_calc / self.warm_up_steps
+                )
+        else:
+            self.current_value = self.final_value

sae-lens 6.0.0rc1__py3-none-any.whl → 6.0.0rc2__py3-none-any.whl

sae-lens 6.0.0rc1py3-none-any.whl → 6.0.0rc2py3-none-any.whl