PyPI - sae-lens - Versions diffs - 6.3.0__tar.gz → 6.4.0__tar.gz - Mend

sae-lens 6.3.0tar.gz → 6.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{sae_lens-6.3.0 → sae_lens-6.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sae-lens
-Version: 6.3.0
+Version: 6.4.0
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch

{sae_lens-6.3.0 → sae_lens-6.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sae-lens"
-version = "6.3.0"
+version = "6.4.0"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"

{sae_lens-6.3.0 → sae_lens-6.4.0}/sae_lens/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.3.0"
+__version__ = "6.4.0"
 import logging

{sae_lens-6.3.0 → sae_lens-6.4.0}/sae_lens/config.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import json
 import math
+import warnings
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast
@@ -125,7 +126,7 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
         model_name (str): The name of the model to use. This should be the name of the model in the Hugging Face model hub.
         model_class_name (str): The name of the class of the model to use. This should be either `HookedTransformer` or `HookedMamba`.
         hook_name (str): The name of the hook to use. This should be a valid TransformerLens hook.
-        hook_eval (str): NOT CURRENTLY IN USE. The name of the hook to use for evaluation.
+        hook_eval (str): DEPRECATED: Will be removed in v7.0.0. NOT CURRENTLY IN USE. The name of the hook to use for evaluation.
         hook_head_index (int, optional): When the hook is for an activation with a head index, we can specify a specific head to use here.
         dataset_path (str): A Hugging Face dataset path.
         dataset_trust_remote_code (bool): Whether to trust remote code when loading datasets from Huggingface.
@@ -264,6 +265,14 @@ class LanguageModelSAERunnerConfig(Generic[T_TRAINING_SAE_CONFIG]):
     exclude_special_tokens: bool | list[int] = False
     def __post_init__(self):
+        if self.hook_eval != "NOT_IN_USE":
+            warnings.warn(
+                "The 'hook_eval' field is deprecated and will be removed in v7.0.0. "
+                "It is not currently used and can be safely removed from your config.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         if self.use_cached_activations and self.cached_activations_path is None:
             self.cached_activations_path = _default_cached_activations_path(
                 self.dataset_path,

{sae_lens-6.3.0 → sae_lens-6.4.0}/sae_lens/llm_sae_training_runner.py RENAMED Viewed

@@ -17,6 +17,7 @@ from sae_lens.config import HfDataset, LanguageModelSAERunnerConfig
 from sae_lens.constants import ACTIVATIONS_STORE_STATE_FILENAME, RUNNER_CFG_FILENAME
 from sae_lens.evals import EvalConfig, run_evals
 from sae_lens.load_model import load_model
+from sae_lens.saes.batchtopk_sae import BatchTopKTrainingSAEConfig
 from sae_lens.saes.gated_sae import GatedTrainingSAEConfig
 from sae_lens.saes.jumprelu_sae import JumpReLUTrainingSAEConfig
 from sae_lens.saes.sae import (
@@ -291,7 +292,7 @@ def _parse_cfg_args(
     architecture_parser.add_argument(
         "--architecture",
         type=str,
-        choices=["standard", "gated", "jumprelu", "topk"],
+        choices=["standard", "gated", "jumprelu", "topk", "batchtopk"],
         default="standard",
         help="SAE architecture to use",
     )
@@ -352,6 +353,7 @@ def _parse_cfg_args(
         "gated": GatedTrainingSAEConfig,
         "jumprelu": JumpReLUTrainingSAEConfig,
         "topk": TopKTrainingSAEConfig,
+        "batchtopk": BatchTopKTrainingSAEConfig,
     }
     sae_config_type = sae_config_map[architecture]

{sae_lens-6.3.0 → sae_lens-6.4.0}/sae_lens/training/sae_trainer.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import contextlib
-from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Callable, Generic, Protocol
@@ -38,13 +37,6 @@ def _update_sae_lens_training_version(sae: TrainingSAE[Any]) -> None:
     sae.cfg.sae_lens_training_version = str(__version__)
-@dataclass
-class TrainSAEOutput:
-    sae: TrainingSAE[Any]
-    checkpoint_path: str
-    log_feature_sparsities: torch.Tensor
 class SaveCheckpointFn(Protocol):
     def __call__(
         self,