PyPI - sae-lens - Versions diffs - 6.0.0rc1__py3-none-any.whl → 6.0.0rc3__py3-none-any.whl - Mend

sae-lens 6.0.0rc1py3-none-any.whl → 6.0.0rc3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

sae_lens/__init__.py +55 -18
sae_lens/analysis/hooked_sae_transformer.py +10 -10
sae_lens/analysis/neuronpedia_integration.py +13 -11
sae_lens/cache_activations_runner.py +9 -7
sae_lens/config.py +105 -235
sae_lens/constants.py +20 -0
sae_lens/evals.py +34 -31
sae_lens/{sae_training_runner.py → llm_sae_training_runner.py} +103 -70
sae_lens/load_model.py +53 -5
sae_lens/loading/pretrained_sae_loaders.py +36 -10
sae_lens/registry.py +49 -0
sae_lens/saes/__init__.py +48 -0
sae_lens/saes/gated_sae.py +70 -59
sae_lens/saes/jumprelu_sae.py +58 -72
sae_lens/saes/sae.py +248 -273
sae_lens/saes/standard_sae.py +75 -57
sae_lens/saes/topk_sae.py +72 -83
sae_lens/training/activation_scaler.py +53 -0
sae_lens/training/activations_store.py +105 -184
sae_lens/training/mixing_buffer.py +56 -0
sae_lens/training/optim.py +60 -36
sae_lens/training/sae_trainer.py +134 -158
sae_lens/training/types.py +5 -0
sae_lens/training/upload_saes_to_huggingface.py +11 -5
sae_lens/util.py +47 -0
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc3.dist-info}/METADATA +1 -1
sae_lens-6.0.0rc3.dist-info/RECORD +38 -0
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc3.dist-info}/WHEEL +1 -1
sae_lens/regsitry.py +0 -34
sae_lens-6.0.0rc1.dist-info/RECORD +0 -32
{sae_lens-6.0.0rc1.dist-info → sae_lens-6.0.0rc3.dist-info}/LICENSE +0 -0

sae_lens/evals.py CHANGED Viewed

@@ -20,8 +20,10 @@ from transformer_lens import HookedTransformer
 from transformer_lens.hook_points import HookedRootModule
 from sae_lens.loading.pretrained_saes_directory import get_pretrained_saes_directory
-from sae_lens.saes.sae import SAE
+from sae_lens.saes.sae import SAE, SAEConfig
+from sae_lens.training.activation_scaler import ActivationScaler
 from sae_lens.training.activations_store import ActivationsStore
+from sae_lens.util import extract_stop_at_layer_from_tlens_hook_name
 def get_library_version() -> str:
@@ -100,15 +102,16 @@ def get_eval_everything_config(
 @torch.no_grad()
 def run_evals(
-    sae: SAE,
+    sae: SAE[Any],
     activation_store: ActivationsStore,
     model: HookedRootModule,
+    activation_scaler: ActivationScaler,
     eval_config: EvalConfig = EvalConfig(),
     model_kwargs: Mapping[str, Any] = {},
     ignore_tokens: set[int | None] = set(),
     verbose: bool = False,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
-    hook_name = sae.cfg.hook_name
+    hook_name = sae.cfg.metadata.hook_name
     actual_batch_size = (
         eval_config.batch_size_prompts or activation_store.store_batch_size_prompts
     )
@@ -140,6 +143,7 @@ def run_evals(
             sae,
             model,
             activation_store,
+            activation_scaler,
             compute_kl=eval_config.compute_kl,
             compute_ce_loss=eval_config.compute_ce_loss,
             n_batches=eval_config.n_eval_reconstruction_batches,
@@ -189,6 +193,7 @@ def run_evals(
             sae,
             model,
             activation_store,
+            activation_scaler,
             compute_l2_norms=eval_config.compute_l2_norms,
             compute_sparsity_metrics=eval_config.compute_sparsity_metrics,
             compute_variance_metrics=eval_config.compute_variance_metrics,
@@ -274,7 +279,7 @@ def run_evals(
     return all_metrics, feature_metrics
-def get_featurewise_weight_based_metrics(sae: SAE) -> dict[str, Any]:
+def get_featurewise_weight_based_metrics(sae: SAE[Any]) -> dict[str, Any]:
     unit_norm_encoders = (sae.W_enc / sae.W_enc.norm(dim=0, keepdim=True)).cpu()
     unit_norm_decoder = (sae.W_dec.T / sae.W_dec.T.norm(dim=0, keepdim=True)).cpu()
@@ -298,9 +303,10 @@ def get_featurewise_weight_based_metrics(sae: SAE) -> dict[str, Any]:
 def get_downstream_reconstruction_metrics(
-    sae: SAE,
+    sae: SAE[Any],
     model: HookedRootModule,
     activation_store: ActivationsStore,
+    activation_scaler: ActivationScaler,
     compute_kl: bool,
     compute_ce_loss: bool,
     n_batches: int,
@@ -326,8 +332,8 @@ def get_downstream_reconstruction_metrics(
         for metric_name, metric_value in get_recons_loss(
             sae,
             model,
+            activation_scaler,
             batch_tokens,
-            activation_store,
             compute_kl=compute_kl,
             compute_ce_loss=compute_ce_loss,
             ignore_tokens=ignore_tokens,
@@ -366,9 +372,10 @@ def get_downstream_reconstruction_metrics(
 def get_sparsity_and_variance_metrics(
-    sae: SAE,
+    sae: SAE[Any],
     model: HookedRootModule,
     activation_store: ActivationsStore,
+    activation_scaler: ActivationScaler,
     n_batches: int,
     compute_l2_norms: bool,
     compute_sparsity_metrics: bool,
@@ -379,8 +386,8 @@ def get_sparsity_and_variance_metrics(
     ignore_tokens: set[int | None] = set(),
     verbose: bool = False,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
-    hook_name = sae.cfg.hook_name
-    hook_head_index = sae.cfg.hook_head_index
+    hook_name = sae.cfg.metadata.hook_name
+    hook_head_index = sae.cfg.metadata.hook_head_index
     metric_dict = {}
     feature_metric_dict = {}
@@ -436,7 +443,7 @@ def get_sparsity_and_variance_metrics(
             batch_tokens,
             prepend_bos=False,
             names_filter=[hook_name],
-            stop_at_layer=sae.cfg.hook_layer + 1,
+            stop_at_layer=extract_stop_at_layer_from_tlens_hook_name(hook_name),
             **model_kwargs,
         )
@@ -451,16 +458,14 @@ def get_sparsity_and_variance_metrics(
             original_act = cache[hook_name]
         # normalise if necessary (necessary in training only, otherwise we should fold the scaling in)
-        if activation_store.normalize_activations == "expected_average_only_in":
-            original_act = activation_store.apply_norm_scaling_factor(original_act)
+        original_act = activation_scaler.scale(original_act)
         # send the (maybe normalised) activations into the SAE
         sae_feature_activations = sae.encode(original_act.to(sae.device))
         sae_out = sae.decode(sae_feature_activations).to(original_act.device)
         del cache
-        if activation_store.normalize_activations == "expected_average_only_in":
-            sae_out = activation_store.unscale(sae_out)
+        sae_out = activation_scaler.unscale(sae_out)
         flattened_sae_input = einops.rearrange(original_act, "b ctx d -> (b ctx) d")
         flattened_sae_feature_acts = einops.rearrange(
@@ -580,17 +585,21 @@ def get_sparsity_and_variance_metrics(
 @torch.no_grad()
 def get_recons_loss(
-    sae: SAE,
+    sae: SAE[SAEConfig],
     model: HookedRootModule,
+    activation_scaler: ActivationScaler,
     batch_tokens: torch.Tensor,
-    activation_store: ActivationsStore,
     compute_kl: bool,
     compute_ce_loss: bool,
     ignore_tokens: set[int | None] = set(),
     model_kwargs: Mapping[str, Any] = {},
+    hook_name: str | None = None,
 ) -> dict[str, Any]:
-    hook_name = sae.cfg.hook_name
-    head_index = sae.cfg.hook_head_index
+    hook_name = hook_name or sae.cfg.metadata.hook_name
+    head_index = sae.cfg.metadata.hook_head_index
+    if hook_name is None:
+        raise ValueError("hook_name must be provided")
     original_logits, original_ce_loss = model(
         batch_tokens, return_type="both", loss_per_token=True, **model_kwargs
@@ -614,15 +623,13 @@ def get_recons_loss(
         activations = activations.to(sae.device)
         # Handle rescaling if SAE expects it
-        if activation_store.normalize_activations == "expected_average_only_in":
-            activations = activation_store.apply_norm_scaling_factor(activations)
+        activations = activation_scaler.scale(activations)
         # SAE class agnost forward forward pass.
         new_activations = sae.decode(sae.encode(activations)).to(activations.dtype)
         # Unscale if activations were scaled prior to going into the SAE
-        if activation_store.normalize_activations == "expected_average_only_in":
-            new_activations = activation_store.unscale(new_activations)
+        new_activations = activation_scaler.unscale(new_activations)
         new_activations = torch.where(mask[..., None], new_activations, activations)
@@ -633,8 +640,7 @@ def get_recons_loss(
         activations = activations.to(sae.device)
         # Handle rescaling if SAE expects it
-        if activation_store.normalize_activations == "expected_average_only_in":
-            activations = activation_store.apply_norm_scaling_factor(activations)
+        activations = activation_scaler.scale(activations)
         # SAE class agnost forward forward pass.
         new_activations = sae.decode(sae.encode(activations.flatten(-2, -1))).to(
@@ -646,8 +652,7 @@ def get_recons_loss(
         )  # reshape to match original shape
         # Unscale if activations were scaled prior to going into the SAE
-        if activation_store.normalize_activations == "expected_average_only_in":
-            new_activations = activation_store.unscale(new_activations)
+        new_activations = activation_scaler.unscale(new_activations)
         return new_activations.to(original_device)
@@ -656,8 +661,7 @@ def get_recons_loss(
         activations = activations.to(sae.device)
         # Handle rescaling if SAE expects it
-        if activation_store.normalize_activations == "expected_average_only_in":
-            activations = activation_store.apply_norm_scaling_factor(activations)
+        activations = activation_scaler.scale(activations)
         new_activations = sae.decode(sae.encode(activations[:, :, head_index])).to(
             activations.dtype
@@ -665,8 +669,7 @@ def get_recons_loss(
         activations[:, :, head_index] = new_activations
         # Unscale if activations were scaled prior to going into the SAE
-        if activation_store.normalize_activations == "expected_average_only_in":
-            activations = activation_store.unscale(activations)
+        activations = activation_scaler.unscale(activations)
         return activations.to(original_device)
@@ -806,7 +809,6 @@ def multiple_evals(
     current_model = None
     current_model_str = None
-    print(filtered_saes)
     for sae_release_name, sae_id, _, _ in tqdm(filtered_saes):
         sae = SAE.from_pretrained(
             release=sae_release_name,  # see other options in sae_lens/pretrained_saes.yaml
@@ -846,6 +848,7 @@ def multiple_evals(
                 scalar_metrics, feature_metrics = run_evals(
                     sae=sae,
                     activation_store=activation_store,
+                    activation_scaler=ActivationScaler(),
                     model=current_model,
                     eval_config=eval_config,
                     ignore_tokens={

sae_lens/{sae_training_runner.py → llm_sae_training_runner.py} RENAMED Viewed

@@ -2,21 +2,31 @@ import json
 import signal
 import sys
 from collections.abc import Sequence
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, cast
+from typing import Any, Generic, cast
 import torch
 import wandb
 from simple_parsing import ArgumentParser
 from transformer_lens.hook_points import HookedRootModule
+from typing_extensions import deprecated
 from sae_lens import logger
 from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
+from sae_lens.constants import ACTIVATIONS_STORE_STATE_FILENAME, RUNNER_CFG_FILENAME
+from sae_lens.evals import EvalConfig, run_evals
 from sae_lens.load_model import load_model
-from sae_lens.saes.sae import TrainingSAE, TrainingSAEConfig
+from sae_lens.saes.sae import (
+    T_TRAINING_SAE,
+    T_TRAINING_SAE_CONFIG,
+    TrainingSAE,
+    TrainingSAEConfig,
+)
+from sae_lens.training.activation_scaler import ActivationScaler
 from sae_lens.training.activations_store import ActivationsStore
-from sae_lens.training.geometric_median import compute_geometric_median
 from sae_lens.training.sae_trainer import SAETrainer
+from sae_lens.training.types import DataProvider
 class InterruptedException(Exception):
@@ -27,22 +37,73 @@ def interrupt_callback(sig_num: Any, stack_frame: Any):  # noqa: ARG001
     raise InterruptedException()
-class SAETrainingRunner:
+@dataclass
+class LLMSaeEvaluator(Generic[T_TRAINING_SAE]):
+    model: HookedRootModule
+    activations_store: ActivationsStore
+    eval_batch_size_prompts: int | None
+    n_eval_batches: int
+    model_kwargs: dict[str, Any]
+    def __call__(
+        self,
+        sae: T_TRAINING_SAE,
+        data_provider: DataProvider,
+        activation_scaler: ActivationScaler,
+    ) -> dict[str, Any]:
+        ignore_tokens = set()
+        if self.activations_store.exclude_special_tokens is not None:
+            ignore_tokens = set(self.activations_store.exclude_special_tokens.tolist())
+        eval_config = EvalConfig(
+            batch_size_prompts=self.eval_batch_size_prompts,
+            n_eval_reconstruction_batches=self.n_eval_batches,
+            n_eval_sparsity_variance_batches=self.n_eval_batches,
+            compute_ce_loss=True,
+            compute_l2_norms=True,
+            compute_sparsity_metrics=True,
+            compute_variance_metrics=True,
+        )
+        eval_metrics, _ = run_evals(
+            sae=sae,
+            activation_store=self.activations_store,
+            model=self.model,
+            activation_scaler=activation_scaler,
+            eval_config=eval_config,
+            ignore_tokens=ignore_tokens,
+            model_kwargs=self.model_kwargs,
+        )  # not calculating featurwise metrics here.
+        # Remove eval metrics that are already logged during training
+        eval_metrics.pop("metrics/explained_variance", None)
+        eval_metrics.pop("metrics/explained_variance_std", None)
+        eval_metrics.pop("metrics/l0", None)
+        eval_metrics.pop("metrics/l1", None)
+        eval_metrics.pop("metrics/mse", None)
+        # Remove metrics that are not useful for wandb logging
+        eval_metrics.pop("metrics/total_tokens_evaluated", None)
+        return eval_metrics
+class LanguageModelSAETrainingRunner:
     """
     Class to run the training of a Sparse Autoencoder (SAE) on a TransformerLens model.
     """
-    cfg: LanguageModelSAERunnerConfig
+    cfg: LanguageModelSAERunnerConfig[Any]
     model: HookedRootModule
-    sae: TrainingSAE
+    sae: TrainingSAE[Any]
     activations_store: ActivationsStore
     def __init__(
         self,
-        cfg: LanguageModelSAERunnerConfig,
+        cfg: LanguageModelSAERunnerConfig[T_TRAINING_SAE_CONFIG],
         override_dataset: HfDataset | None = None,
         override_model: HookedRootModule | None = None,
-        override_sae: TrainingSAE | None = None,
+        override_sae: TrainingSAE[Any] | None = None,
     ):
         if override_dataset is not None:
             logger.warning(
@@ -82,7 +143,6 @@ class SAETrainingRunner:
                         self.cfg.get_training_sae_cfg_dict(),
                     ).to_dict()
                 )
-                self._init_sae_group_b_decs()
         else:
             self.sae = override_sae
@@ -100,12 +160,20 @@ class SAETrainingRunner:
                 id=self.cfg.logger.wandb_id,
             )
-        trainer = SAETrainer(
+        evaluator = LLMSaeEvaluator(
             model=self.model,
+            activations_store=self.activations_store,
+            eval_batch_size_prompts=self.cfg.eval_batch_size_prompts,
+            n_eval_batches=self.cfg.n_eval_batches,
+            model_kwargs=self.cfg.model_kwargs,
+        )
+        trainer = SAETrainer(
             sae=self.sae,
-            activation_store=self.activations_store,
+            data_provider=self.activations_store,
+            evaluator=evaluator,
             save_checkpoint_fn=self.save_checkpoint,
-            cfg=self.cfg,
+            cfg=self.cfg.to_sae_trainer_config(),
         )
         self._compile_if_needed()
@@ -141,7 +209,9 @@ class SAETrainingRunner:
                 backend=backend,
             )  # type: ignore
-    def run_trainer_with_interruption_handling(self, trainer: SAETrainer):
+    def run_trainer_with_interruption_handling(
+        self, trainer: SAETrainer[TrainingSAE[TrainingSAEConfig], TrainingSAEConfig]
+    ):
         try:
             # signal handlers (if preempted)
             signal.signal(signal.SIGINT, interrupt_callback)
@@ -152,73 +222,31 @@ class SAETrainingRunner:
         except (KeyboardInterrupt, InterruptedException):
             logger.warning("interrupted, saving progress")
-            checkpoint_name = str(trainer.n_training_tokens)
-            self.save_checkpoint(trainer, checkpoint_name=checkpoint_name)
+            checkpoint_path = Path(self.cfg.checkpoint_path) / str(
+                trainer.n_training_samples
+            )
+            self.save_checkpoint(checkpoint_path)
             logger.info("done saving")
             raise
         return sae
-    # TODO: move this into the SAE trainer or Training SAE class
-    def _init_sae_group_b_decs(
-        self,
-    ) -> None:
-        """
-        extract all activations at a certain layer and use for sae b_dec initialization
-        """
-        if self.cfg.b_dec_init_method == "geometric_median":
-            self.activations_store.set_norm_scaling_factor_if_needed()
-            layer_acts = self.activations_store.storage_buffer.detach()[:, 0, :]
-            # get geometric median of the activations if we're using those.
-            median = compute_geometric_median(
-                layer_acts,
-                maxiter=100,
-            ).median
-            self.sae.initialize_b_dec_with_precalculated(median)
-        elif self.cfg.b_dec_init_method == "mean":
-            self.activations_store.set_norm_scaling_factor_if_needed()
-            layer_acts = self.activations_store.storage_buffer.detach().cpu()[:, 0, :]
-            self.sae.initialize_b_dec_with_mean(layer_acts)  # type: ignore
-    @staticmethod
     def save_checkpoint(
-        trainer: SAETrainer,
-        checkpoint_name: str,
-        wandb_aliases: list[str] | None = None,
+        self,
+        checkpoint_path: Path,
     ) -> None:
-        base_path = Path(trainer.cfg.checkpoint_path) / checkpoint_name
-        base_path.mkdir(exist_ok=True, parents=True)
-        trainer.activations_store.save(
-            str(base_path / "activations_store_state.safetensors")
-        )
-        if trainer.sae.cfg.normalize_sae_decoder:
-            trainer.sae.set_decoder_norm_to_unit_norm()
-        weights_path, cfg_path, sparsity_path = trainer.sae.save_model(
-            str(base_path),
-            trainer.log_feature_sparsity,
+        self.activations_store.save(
+            str(checkpoint_path / ACTIVATIONS_STORE_STATE_FILENAME)
         )
-        # let's over write the cfg file with the trainer cfg, which is a super set of the original cfg.
-        # and should not cause issues but give us more info about SAEs we trained in SAE Lens.
-        config = trainer.cfg.to_dict()
-        with open(cfg_path, "w") as f:
-            json.dump(config, f)
-        if trainer.cfg.logger.log_to_wandb:
-            trainer.cfg.logger.log(
-                trainer,
-                weights_path,
-                cfg_path,
-                sparsity_path=sparsity_path,
-                wandb_aliases=wandb_aliases,
-            )
+        runner_config = self.cfg.to_dict()
+        with open(checkpoint_path / RUNNER_CFG_FILENAME, "w") as f:
+            json.dump(runner_config, f)
-def _parse_cfg_args(args: Sequence[str]) -> LanguageModelSAERunnerConfig:
+def _parse_cfg_args(
+    args: Sequence[str],
+) -> LanguageModelSAERunnerConfig[TrainingSAEConfig]:
     if len(args) == 0:
         args = ["--help"]
     parser = ArgumentParser(exit_on_error=False)
@@ -229,8 +257,13 @@ def _parse_cfg_args(args: Sequence[str]) -> LanguageModelSAERunnerConfig:
 # moved into its own function to make it easier to test
 def _run_cli(args: Sequence[str]):
     cfg = _parse_cfg_args(args)
-    SAETrainingRunner(cfg=cfg).run()
+    LanguageModelSAETrainingRunner(cfg=cfg).run()
 if __name__ == "__main__":
     _run_cli(args=sys.argv[1:])
+@deprecated("Use LanguageModelSAETrainingRunner instead")
+class SAETrainingRunner(LanguageModelSAETrainingRunner):
+    pass

sae_lens/load_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Literal, cast
+from typing import Any, Callable, Literal, cast
 import torch
 from transformer_lens import HookedTransformer
@@ -77,6 +77,7 @@ class HookedProxyLM(HookedRootModule):
     # copied and modified from base HookedRootModule
     def setup(self):
         self.mod_dict = {}
+        self.named_modules_dict = {}
         self.hook_dict: dict[str, HookPoint] = {}
         for name, module in self.model.named_modules():
             if name == "":
@@ -89,14 +90,21 @@ class HookedProxyLM(HookedRootModule):
             self.hook_dict[name] = hook_point
             self.mod_dict[name] = hook_point
+            self.named_modules_dict[name] = module
+    def run_with_cache(self, *args: Any, **kwargs: Any):  # type: ignore
+        if "names_filter" in kwargs:
+            # hacky way to make sure that the names_filter is passed to our forward method
+            kwargs["_names_filter"] = kwargs["names_filter"]
+        return super().run_with_cache(*args, **kwargs)
     def forward(
         self,
         tokens: torch.Tensor,
         return_type: Literal["both", "logits"] = "logits",
         loss_per_token: bool = False,
-        # TODO: implement real support for stop_at_layer
         stop_at_layer: int | None = None,
+        _names_filter: list[str] | None = None,
         **kwargs: Any,
     ) -> Output | Loss:
         # This is just what's needed for evals, not everything that HookedTransformer has
@@ -107,8 +115,28 @@ class HookedProxyLM(HookedRootModule):
             raise NotImplementedError(
                 "Only return_type supported is 'both' or 'logits' to match what's in evals.py and ActivationsStore"
             )
-        output = self.model(tokens)
-        logits = _extract_logits_from_output(output)
+        stop_hooks = []
+        if stop_at_layer is not None and _names_filter is not None:
+            if return_type != "logits":
+                raise NotImplementedError(
+                    "stop_at_layer is not supported for return_type='both'"
+                )
+            stop_manager = StopManager(_names_filter)
+            for hook_name in _names_filter:
+                module = self.named_modules_dict[hook_name]
+                stop_fn = stop_manager.get_stop_hook_fn(hook_name)
+                stop_hooks.append(module.register_forward_hook(stop_fn))
+        try:
+            output = self.model(tokens)
+            logits = _extract_logits_from_output(output)
+        except StopForward:
+            # If we stop early, we don't care about the return output
+            return None  # type: ignore
+        finally:
+            for stop_hook in stop_hooks:
+                stop_hook.remove()
         if return_type == "logits":
             return logits
@@ -159,7 +187,7 @@ class HookedProxyLM(HookedRootModule):
         # We don't want to prepend bos but the tokenizer does it automatically, so we remove it manually
         if hasattr(self.tokenizer, "add_bos_token") and self.tokenizer.add_bos_token:  # type: ignore
-            tokens = get_tokens_with_bos_removed(self.tokenizer, tokens)
+            tokens = get_tokens_with_bos_removed(self.tokenizer, tokens)  # type: ignore
         return tokens  # type: ignore
@@ -183,3 +211,23 @@ def get_hook_fn(hook_point: HookPoint):
         return output
     return hook_fn
+class StopForward(Exception):
+    pass
+class StopManager:
+    def __init__(self, hook_names: list[str]):
+        self.hook_names = hook_names
+        self.total_hook_names = len(set(hook_names))
+        self.called_hook_names = set()
+    def get_stop_hook_fn(self, hook_name: str) -> Callable[[Any, Any, Any], Any]:
+        def stop_hook_fn(module: Any, input: Any, output: Any) -> Any:  # noqa: ARG001
+            self.called_hook_names.add(hook_name)
+            if len(self.called_hook_names) == self.total_hook_names:
+                raise StopForward()
+            return output
+        return stop_hook_fn

sae-lens 6.0.0rc1__py3-none-any.whl → 6.0.0rc3__py3-none-any.whl

sae-lens 6.0.0rc1py3-none-any.whl → 6.0.0rc3py3-none-any.whl