PyPI - sae-lens - Versions diffs - 6.0.0rc2__py3-none-any.whl → 6.0.0rc4__py3-none-any.whl - Mend

sae-lens 6.0.0rc2py3-none-any.whl → 6.0.0rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sae_lens/__init__.py +6 -3
sae_lens/analysis/neuronpedia_integration.py +3 -3
sae_lens/cache_activations_runner.py +7 -6
sae_lens/config.py +50 -6
sae_lens/constants.py +2 -0
sae_lens/evals.py +39 -28
sae_lens/llm_sae_training_runner.py +377 -0
sae_lens/load_model.py +53 -5
sae_lens/loading/pretrained_sae_loaders.py +24 -12
sae_lens/saes/gated_sae.py +0 -4
sae_lens/saes/jumprelu_sae.py +4 -10
sae_lens/saes/sae.py +121 -51
sae_lens/saes/standard_sae.py +4 -11
sae_lens/saes/topk_sae.py +18 -12
sae_lens/training/activation_scaler.py +53 -0
sae_lens/training/activations_store.py +77 -174
sae_lens/training/mixing_buffer.py +56 -0
sae_lens/training/sae_trainer.py +107 -98
sae_lens/training/types.py +5 -0
sae_lens/training/upload_saes_to_huggingface.py +1 -1
sae_lens/util.py +19 -0
{sae_lens-6.0.0rc2.dist-info → sae_lens-6.0.0rc4.dist-info}/METADATA +1 -1
sae_lens-6.0.0rc4.dist-info/RECORD +37 -0
sae_lens/sae_training_runner.py +0 -237
sae_lens/training/geometric_median.py +0 -101
sae_lens-6.0.0rc2.dist-info/RECORD +0 -35
{sae_lens-6.0.0rc2.dist-info → sae_lens-6.0.0rc4.dist-info}/LICENSE +0 -0
{sae_lens-6.0.0rc2.dist-info → sae_lens-6.0.0rc4.dist-info}/WHEEL +0 -0

sae_lens/sae_training_runner.py DELETED Viewed

@@ -1,237 +0,0 @@
-import json
-import signal
-import sys
-from collections.abc import Sequence
-from pathlib import Path
-from typing import Any, cast
-import torch
-import wandb
-from safetensors.torch import save_file
-from simple_parsing import ArgumentParser
-from transformer_lens.hook_points import HookedRootModule
-from sae_lens import logger
-from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
-from sae_lens.constants import RUNNER_CFG_FILENAME, SPARSITY_FILENAME
-from sae_lens.load_model import load_model
-from sae_lens.saes.sae import T_TRAINING_SAE_CONFIG, TrainingSAE, TrainingSAEConfig
-from sae_lens.training.activations_store import ActivationsStore
-from sae_lens.training.geometric_median import compute_geometric_median
-from sae_lens.training.sae_trainer import SAETrainer
-class InterruptedException(Exception):
-    pass
-def interrupt_callback(sig_num: Any, stack_frame: Any):  # noqa: ARG001
-    raise InterruptedException()
-class SAETrainingRunner:
-    """
-    Class to run the training of a Sparse Autoencoder (SAE) on a TransformerLens model.
-    """
-    cfg: LanguageModelSAERunnerConfig[Any]
-    model: HookedRootModule
-    sae: TrainingSAE[Any]
-    activations_store: ActivationsStore
-    def __init__(
-        self,
-        cfg: LanguageModelSAERunnerConfig[T_TRAINING_SAE_CONFIG],
-        override_dataset: HfDataset | None = None,
-        override_model: HookedRootModule | None = None,
-        override_sae: TrainingSAE[Any] | None = None,
-    ):
-        if override_dataset is not None:
-            logger.warning(
-                f"You just passed in a dataset which will override the one specified in your configuration: {cfg.dataset_path}. As a consequence this run will not be reproducible via configuration alone."
-            )
-        if override_model is not None:
-            logger.warning(
-                f"You just passed in a model which will override the one specified in your configuration: {cfg.model_name}. As a consequence this run will not be reproducible via configuration alone."
-            )
-        self.cfg = cfg
-        if override_model is None:
-            self.model = load_model(
-                self.cfg.model_class_name,
-                self.cfg.model_name,
-                device=self.cfg.device,
-                model_from_pretrained_kwargs=self.cfg.model_from_pretrained_kwargs,
-            )
-        else:
-            self.model = override_model
-        self.activations_store = ActivationsStore.from_config(
-            self.model,
-            self.cfg,
-            override_dataset=override_dataset,
-        )
-        if override_sae is None:
-            if self.cfg.from_pretrained_path is not None:
-                self.sae = TrainingSAE.load_from_disk(
-                    self.cfg.from_pretrained_path, self.cfg.device
-                )
-            else:
-                self.sae = TrainingSAE.from_dict(
-                    TrainingSAEConfig.from_dict(
-                        self.cfg.get_training_sae_cfg_dict(),
-                    ).to_dict()
-                )
-                self._init_sae_group_b_decs()
-        else:
-            self.sae = override_sae
-    def run(self):
-        """
-        Run the training of the SAE.
-        """
-        if self.cfg.logger.log_to_wandb:
-            wandb.init(
-                project=self.cfg.logger.wandb_project,
-                entity=self.cfg.logger.wandb_entity,
-                config=cast(Any, self.cfg),
-                name=self.cfg.logger.run_name,
-                id=self.cfg.logger.wandb_id,
-            )
-        trainer = SAETrainer(
-            model=self.model,
-            sae=self.sae,
-            activation_store=self.activations_store,
-            save_checkpoint_fn=self.save_checkpoint,
-            cfg=self.cfg,
-        )
-        self._compile_if_needed()
-        sae = self.run_trainer_with_interruption_handling(trainer)
-        if self.cfg.logger.log_to_wandb:
-            wandb.finish()
-        return sae
-    def _compile_if_needed(self):
-        # Compile model and SAE
-        #  torch.compile can provide significant speedups (10-20% in testing)
-        # using max-autotune gives the best speedups but:
-        # (a) increases VRAM usage,
-        # (b) can't be used on both SAE and LM (some issue with cudagraphs), and
-        # (c) takes some time to compile
-        # optimal settings seem to be:
-        # use max-autotune on SAE and max-autotune-no-cudagraphs on LM
-        # (also pylance seems to really hate this)
-        if self.cfg.compile_llm:
-            self.model = torch.compile(
-                self.model,
-                mode=self.cfg.llm_compilation_mode,
-            )  # type: ignore
-        if self.cfg.compile_sae:
-            backend = "aot_eager" if self.cfg.device == "mps" else "inductor"
-            self.sae.training_forward_pass = torch.compile(  # type: ignore
-                self.sae.training_forward_pass,
-                mode=self.cfg.sae_compilation_mode,
-                backend=backend,
-            )  # type: ignore
-    def run_trainer_with_interruption_handling(
-        self, trainer: SAETrainer[TrainingSAE[TrainingSAEConfig], TrainingSAEConfig]
-    ):
-        try:
-            # signal handlers (if preempted)
-            signal.signal(signal.SIGINT, interrupt_callback)
-            signal.signal(signal.SIGTERM, interrupt_callback)
-            # train SAE
-            sae = trainer.fit()
-        except (KeyboardInterrupt, InterruptedException):
-            logger.warning("interrupted, saving progress")
-            checkpoint_name = str(trainer.n_training_tokens)
-            self.save_checkpoint(trainer, checkpoint_name=checkpoint_name)
-            logger.info("done saving")
-            raise
-        return sae
-    # TODO: move this into the SAE trainer or Training SAE class
-    def _init_sae_group_b_decs(
-        self,
-    ) -> None:
-        """
-        extract all activations at a certain layer and use for sae b_dec initialization
-        """
-        if self.cfg.sae.b_dec_init_method == "geometric_median":
-            self.activations_store.set_norm_scaling_factor_if_needed()
-            layer_acts = self.activations_store.storage_buffer.detach()[:, 0, :]
-            # get geometric median of the activations if we're using those.
-            median = compute_geometric_median(
-                layer_acts,
-                maxiter=100,
-            ).median
-            self.sae.initialize_b_dec_with_precalculated(median)
-        elif self.cfg.sae.b_dec_init_method == "mean":
-            self.activations_store.set_norm_scaling_factor_if_needed()
-            layer_acts = self.activations_store.storage_buffer.detach().cpu()[:, 0, :]
-            self.sae.initialize_b_dec_with_mean(layer_acts)  # type: ignore
-    @staticmethod
-    def save_checkpoint(
-        trainer: SAETrainer[TrainingSAE[Any], Any],
-        checkpoint_name: str,
-        wandb_aliases: list[str] | None = None,
-    ) -> None:
-        base_path = Path(trainer.cfg.checkpoint_path) / checkpoint_name
-        base_path.mkdir(exist_ok=True, parents=True)
-        trainer.activations_store.save(
-            str(base_path / "activations_store_state.safetensors")
-        )
-        weights_path, cfg_path = trainer.sae.save_model(str(base_path))
-        sparsity_path = base_path / SPARSITY_FILENAME
-        save_file({"sparsity": trainer.log_feature_sparsity}, sparsity_path)
-        runner_config = trainer.cfg.to_dict()
-        with open(base_path / RUNNER_CFG_FILENAME, "w") as f:
-            json.dump(runner_config, f)
-        if trainer.cfg.logger.log_to_wandb:
-            trainer.cfg.logger.log(
-                trainer,
-                weights_path,
-                cfg_path,
-                sparsity_path=sparsity_path,
-                wandb_aliases=wandb_aliases,
-            )
-def _parse_cfg_args(
-    args: Sequence[str],
-) -> LanguageModelSAERunnerConfig[TrainingSAEConfig]:
-    if len(args) == 0:
-        args = ["--help"]
-    parser = ArgumentParser(exit_on_error=False)
-    parser.add_arguments(LanguageModelSAERunnerConfig, dest="cfg")
-    return parser.parse_args(args).cfg
-# moved into its own function to make it easier to test
-def _run_cli(args: Sequence[str]):
-    cfg = _parse_cfg_args(args)
-    SAETrainingRunner(cfg=cfg).run()
-if __name__ == "__main__":
-    _run_cli(args=sys.argv[1:])

sae_lens/training/geometric_median.py DELETED Viewed

@@ -1,101 +0,0 @@
-from types import SimpleNamespace
-import torch
-import tqdm
-def weighted_average(points: torch.Tensor, weights: torch.Tensor):
-    weights = weights / weights.sum()
-    return (points * weights.view(-1, 1)).sum(dim=0)
-@torch.no_grad()
-def geometric_median_objective(
-    median: torch.Tensor, points: torch.Tensor, weights: torch.Tensor
-) -> torch.Tensor:
-    norms = torch.linalg.norm(points - median.view(1, -1), dim=1)  # type: ignore
-    return (norms * weights).sum()
-def compute_geometric_median(
-    points: torch.Tensor,
-    weights: torch.Tensor | None = None,
-    eps: float = 1e-6,
-    maxiter: int = 100,
-    ftol: float = 1e-20,
-    do_log: bool = False,
-):
-    """
-    :param points: ``torch.Tensor`` of shape ``(n, d)``
-    :param weights: Optional ``torch.Tensor`` of shape :math:``(n,)``.
-    :param eps: Smallest allowed value of denominator, to avoid divide by zero.
-        Equivalently, this is a smoothing parameter. Default 1e-6.
-    :param maxiter: Maximum number of Weiszfeld iterations. Default 100
-    :param ftol: If objective value does not improve by at least this `ftol` fraction, terminate the algorithm. Default 1e-20.
-    :param do_log: If true will return a log of function values encountered through the course of the algorithm
-    :return: SimpleNamespace object with fields
-        - `median`: estimate of the geometric median, which is a ``torch.Tensor`` object of shape :math:``(d,)``
-        - `termination`: string explaining how the algorithm terminated.
-        - `logs`: function values encountered through the course of the algorithm in a list (None if do_log is false).
-    """
-    with torch.no_grad():
-        if weights is None:
-            weights = torch.ones((points.shape[0],), device=points.device)
-        # initialize median estimate at mean
-        new_weights = weights
-        median = weighted_average(points, weights)
-        objective_value = geometric_median_objective(median, points, weights)
-        logs = [objective_value] if do_log else None
-        # Weiszfeld iterations
-        early_termination = False
-        pbar = tqdm.tqdm(range(maxiter))
-        for _ in pbar:
-            prev_obj_value = objective_value
-            norms = torch.linalg.norm(points - median.view(1, -1), dim=1)  # type: ignore
-            new_weights = weights / torch.clamp(norms, min=eps)
-            median = weighted_average(points, new_weights)
-            objective_value = geometric_median_objective(median, points, weights)
-            if logs is not None:
-                logs.append(objective_value)
-            if abs(prev_obj_value - objective_value) <= ftol * objective_value:
-                early_termination = True
-                break
-            pbar.set_description(f"Objective value: {objective_value:.4f}")
-    median = weighted_average(points, new_weights)  # allow autodiff to track it
-    return SimpleNamespace(
-        median=median,
-        new_weights=new_weights,
-        termination=(
-            "function value converged within tolerance"
-            if early_termination
-            else "maximum iterations reached"
-        ),
-        logs=logs,
-    )
-if __name__ == "__main__":
-    import time
-    TOLERANCE = 1e-2
-    dim1 = 10000
-    dim2 = 768
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    sample = (
-        torch.randn((dim1, dim2), device=device) * 100
-    )  # seems to be the order of magnitude of the actual use case
-    weights = torch.randn((dim1,), device=device)
-    torch.tensor(weights, device=device)
-    tic = time.perf_counter()
-    new = compute_geometric_median(sample, weights=weights, maxiter=100)
-    print(f"new code takes {time.perf_counter()-tic} seconds!")  # noqa: T201

sae_lens-6.0.0rc2.dist-info/RECORD DELETED Viewed

@@ -1,35 +0,0 @@
-sae_lens/__init__.py,sha256=JZATcdlWGVOXYTHb41hn7dPp7pR2tWgpLAz2ztQOE-A,2747
-sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/analysis/hooked_sae_transformer.py,sha256=Eyg1Y2hVIHNuiiLOCTgzstOuW6iA-7hPHqaGR8y_vMs,13809
-sae_lens/analysis/neuronpedia_integration.py,sha256=DlI08ThI0zwMrBthICt1OFCMyqmaCUDeZxhOk7b7teY,18680
-sae_lens/cache_activations_runner.py,sha256=27jp2hFxZj4foWCRCJJd2VCwYJtMgkvPx6MuIhQBofc,12591
-sae_lens/config.py,sha256=Ff6MRzRlVk8xtgkvHdJEmuPh9Owc10XIWBaUwdypzkU,26062
-sae_lens/constants.py,sha256=HSiSp0j2Umak2buT30seFhkmj7KNuPmB3u4yLXrgfOg,462
-sae_lens/evals.py,sha256=aR0pJMBWBUdZElXPcxUyNnNYWbM2LC5UeaESKAwdOMY,39098
-sae_lens/load_model.py,sha256=tE70sXsyyyGYW7o506O3eiw1MXyyW6DCQojLG49hWYI,6771
-sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/loading/pretrained_sae_loaders.py,sha256=IgQ-XSJ5VTLCzmJavPmk1vExBVB-36wW7w-ZNo7tzPY,31214
-sae_lens/loading/pretrained_saes_directory.py,sha256=4Vn-Jex6SveD7EbxcSOBv8cx1gkPfUMLU1QOP-ww1ZE,3752
-sae_lens/pretokenize_runner.py,sha256=0nHQq3s_d80VS8iVK4-e6y_orAYVO8c4RrLGtIDfK_E,6885
-sae_lens/pretrained_saes.yaml,sha256=C_z-7Lxz6ZIy2V-c-4Xw45eAQ926O9aGjocSNuki0xs,573557
-sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
-sae_lens/sae_training_runner.py,sha256=lI_d3ywS312dIz0wctm_Sgt3W9ffBOS7ahnDXBljX1s,8320
-sae_lens/saes/__init__.py,sha256=v6mfeDzyGYtT6x5SszAQtkldTXwPE-V_iwOlrT_pDwQ,1008
-sae_lens/saes/gated_sae.py,sha256=IgWvZxeJpdiu7VqeUnJLC-VWVhz6o8OXvmwCS-LJ-WQ,9426
-sae_lens/saes/jumprelu_sae.py,sha256=lkhafpoYYn4-62tBlmmufmUomoo3CmFFQQ3NNylBNSM,12264
-sae_lens/saes/sae.py,sha256=edJK3VFzOVBPXUX6QJ5fhhoY0wcfEisDmVXiqFRA7Xg,35089
-sae_lens/saes/standard_sae.py,sha256=tMs6Z6Cv44PWa7pLo53xhXFnHMvO5BM6eVYHtRPLpos,6652
-sae_lens/saes/topk_sae.py,sha256=CfF59K4J2XwUvztwg4fBbvFO3PyucLkg4Elkxdk0ozs,9786
-sae_lens/tokenization_and_batching.py,sha256=oUAscjy_LPOrOb8_Ty6eLAcZ0B3HB_wiWjWktgolhG0,4314
-sae_lens/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sae_lens/training/activations_store.py,sha256=5V5dExeXWoE0dw-ePOZVnQIbBJwrepRMdsQrRam9Lg8,36790
-sae_lens/training/geometric_median.py,sha256=3kH8ZJAgKStlnZgs6s1uYGDYh004Bl0r4RLhuwT3lBY,3719
-sae_lens/training/optim.py,sha256=KXdOym-Ly3f2aFbndRc0JEH0Wa7u1BE5ljxGN3YtouQ,6836
-sae_lens/training/sae_trainer.py,sha256=zYAk_9QJ8AJi2TjDZ1qW_lyoovSBqrJvBHzyYgb89ZY,15251
-sae_lens/training/upload_saes_to_huggingface.py,sha256=tXvR4j25IgMjJ8R9oczwSdy00Tg-P_jAtnPHRt8yF64,4489
-sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
-sae_lens/util.py,sha256=4lqtl7HT9OiyRK8fe8nXtkcn2lOR1uX7ANrAClf6Bv8,1026
-sae_lens-6.0.0rc2.dist-info/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
-sae_lens-6.0.0rc2.dist-info/METADATA,sha256=Z8Zwb6EknAPB5dOvfduYZewr4nldot-1dQoqz50Co3k,5326
-sae_lens-6.0.0rc2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-sae_lens-6.0.0rc2.dist-info/RECORD,,

{sae_lens-6.0.0rc2.dist-info → sae_lens-6.0.0rc4.dist-info}/LICENSE RENAMED Viewed

File without changes

{sae_lens-6.0.0rc2.dist-info → sae_lens-6.0.0rc4.dist-info}/WHEEL RENAMED Viewed

File without changes

sae-lens 6.0.0rc2__py3-none-any.whl → 6.0.0rc4__py3-none-any.whl

sae-lens 6.0.0rc2py3-none-any.whl → 6.0.0rc4py3-none-any.whl