PyPI - sae-lens - Versions diffs - 6.0.0rc4__tar.gz → 6.0.0rc5__tar.gz - Mend

sae-lens 6.0.0rc4tar.gz → 6.0.0rc5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sae-lens
-Version: 6.0.0rc4
+Version: 6.0.0rc5
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
 Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch
@@ -80,7 +80,7 @@ Pre-trained SAEs for various models can be imported via SAE Lens. See this [page
 ## Join the Slack!
-Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-2o756ku1c-_yKBeUQMVfS_p_qcK6QLeA) for support!
+Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-375zalm04-GFd5tdBU1yLKlu_T_JSqZQ) for support!
 ## Citation

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/README.md RENAMED Viewed

@@ -40,7 +40,7 @@ Pre-trained SAEs for various models can be imported via SAE Lens. See this [page
 ## Join the Slack!
-Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-2o756ku1c-_yKBeUQMVfS_p_qcK6QLeA) for support!
+Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-375zalm04-GFd5tdBU1yLKlu_T_JSqZQ) for support!
 ## Citation

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sae-lens"
-version = "6.0.0-rc.4"
+version = "6.0.0-rc.5"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # ruff: noqa: E402
-__version__ = "6.0.0-rc.4"
+__version__ = "6.0.0-rc.5"
 import logging

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/constants.py RENAMED Viewed

@@ -16,5 +16,6 @@ SPARSITY_FILENAME = "sparsity.safetensors"
 SAE_WEIGHTS_FILENAME = "sae_weights.safetensors"
 SAE_CFG_FILENAME = "cfg.json"
 RUNNER_CFG_FILENAME = "runner_cfg.json"
+SPARSIFY_WEIGHTS_FILENAME = "sae.safetensors"
 ACTIVATIONS_STORE_STATE_FILENAME = "activations_store_state.safetensors"
 ACTIVATION_SCALER_CFG_FILENAME = "activation_scaler.json"

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/evals.py RENAMED Viewed

@@ -769,17 +769,6 @@ def nested_dict() -> defaultdict[Any, Any]:
     return defaultdict(nested_dict)
-def dict_to_nested(flat_dict: dict[str, Any]) -> defaultdict[Any, Any]:
-    nested = nested_dict()
-    for key, value in flat_dict.items():
-        parts = key.split("/")
-        d = nested
-        for part in parts[:-1]:
-            d = d[part]
-        d[parts[-1]] = value
-    return nested
 def multiple_evals(
     sae_regex_pattern: str,
     sae_block_pattern: str,

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/loading/pretrained_sae_loaders.py RENAMED Viewed

@@ -16,6 +16,7 @@ from sae_lens.constants import (
     DTYPE_MAP,
     SAE_CFG_FILENAME,
     SAE_WEIGHTS_FILENAME,
+    SPARSIFY_WEIGHTS_FILENAME,
     SPARSITY_FILENAME,
 )
 from sae_lens.loading.pretrained_saes_directory import (
@@ -248,7 +249,7 @@ def handle_pre_6_0_config(cfg_dict: dict[str, Any]) -> dict[str, Any]:
     config_class = get_sae_class(architecture)[1]
     sae_cfg_dict = filter_valid_dataclass_fields(new_cfg, config_class)
-    if architecture == "topk":
+    if architecture == "topk" and "activation_fn_kwargs" in new_cfg:
         sae_cfg_dict["k"] = new_cfg["activation_fn_kwargs"]["k"]
     sae_cfg_dict["metadata"] = {
@@ -530,11 +531,20 @@ def get_llama_scope_config_from_hf(
     # Model specific parameters
     model_name, d_in = "meta-llama/Llama-3.1-8B", old_cfg_dict["d_model"]
+    # Get norm scaling factor to rescale jumprelu threshold.
+    # We need this because sae.fold_activation_norm_scaling_factor folds scaling norm into W_enc.
+    # This requires jumprelu threshold to be scaled in the same way
+    norm_scaling_factor = (
+        d_in**0.5 / old_cfg_dict["dataset_average_activation_norm"]["in"]
+    )
     cfg_dict = {
         "architecture": "jumprelu",
-        "jump_relu_threshold": old_cfg_dict["jump_relu_threshold"],
+        "jump_relu_threshold": old_cfg_dict["jump_relu_threshold"]
+        * norm_scaling_factor,
         # We use a scalar jump_relu_threshold for all features
         # This is different from Gemma Scope JumpReLU SAEs.
+        # Scaled with norm_scaling_factor to match sae.fold_activation_norm_scaling_factor
         "d_in": d_in,
         "d_sae": old_cfg_dict["d_sae"],
         "dtype": "bfloat16",
@@ -942,6 +952,146 @@ def llama_scope_r1_distill_sae_huggingface_loader(
     return cfg_dict, state_dict, log_sparsity
+def get_sparsify_config_from_hf(
+    repo_id: str,
+    folder_name: str,
+    device: str,
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    cfg_filename = f"{folder_name}/{SAE_CFG_FILENAME}"
+    cfg_path = hf_hub_download(
+        repo_id,
+        filename=cfg_filename,
+        force_download=force_download,
+    )
+    sae_path = Path(cfg_path).parent
+    return get_sparsify_config_from_disk(
+        sae_path, device=device, cfg_overrides=cfg_overrides
+    )
+def get_sparsify_config_from_disk(
+    path: str | Path,
+    device: str | None = None,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    path = Path(path)
+    with open(path / SAE_CFG_FILENAME) as f:
+        old_cfg_dict = json.load(f)
+    config_path = path.parent / "config.json"
+    if config_path.exists():
+        with open(config_path) as f:
+            config_dict = json.load(f)
+    else:
+        config_dict = {}
+    folder_name = path.name
+    if folder_name == "embed_tokens":
+        hook_name, layer = "hook_embed", 0
+    else:
+        match = re.search(r"layers[._](\d+)", folder_name)
+        if match is None:
+            raise ValueError(f"Unrecognized Sparsify folder: {folder_name}")
+        layer = int(match.group(1))
+        hook_name = f"blocks.{layer}.hook_resid_post"
+    cfg_dict: dict[str, Any] = {
+        "architecture": "standard",
+        "d_in": old_cfg_dict["d_in"],
+        "d_sae": old_cfg_dict["d_in"] * old_cfg_dict["expansion_factor"],
+        "dtype": "bfloat16",
+        "device": device or "cpu",
+        "model_name": config_dict.get("model", path.parts[-2]),
+        "hook_name": hook_name,
+        "hook_layer": layer,
+        "hook_head_index": None,
+        "activation_fn_str": "topk",
+        "activation_fn_kwargs": {
+            "k": old_cfg_dict["k"],
+            "signed": old_cfg_dict.get("signed", False),
+        },
+        "apply_b_dec_to_input": not old_cfg_dict.get("normalize_decoder", False),
+        "dataset_path": config_dict.get(
+            "dataset", "togethercomputer/RedPajama-Data-1T-Sample"
+        ),
+        "context_size": config_dict.get("ctx_len", 2048),
+        "finetuning_scaling_factor": False,
+        "sae_lens_training_version": None,
+        "prepend_bos": True,
+        "dataset_trust_remote_code": True,
+        "normalize_activations": "none",
+        "neuronpedia_id": None,
+    }
+    if cfg_overrides:
+        cfg_dict.update(cfg_overrides)
+    return cfg_dict
+def sparsify_huggingface_loader(
+    repo_id: str,
+    folder_name: str,
+    device: str = "cpu",
+    force_download: bool = False,
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor], None]:
+    weights_filename = f"{folder_name}/{SPARSIFY_WEIGHTS_FILENAME}"
+    sae_path = hf_hub_download(
+        repo_id,
+        filename=weights_filename,
+        force_download=force_download,
+    )
+    cfg_dict, state_dict = sparsify_disk_loader(
+        Path(sae_path).parent, device=device, cfg_overrides=cfg_overrides
+    )
+    return cfg_dict, state_dict, None
+def sparsify_disk_loader(
+    path: str | Path,
+    device: str = "cpu",
+    cfg_overrides: dict[str, Any] | None = None,
+) -> tuple[dict[str, Any], dict[str, torch.Tensor]]:
+    cfg_dict = get_sparsify_config_from_disk(path, device, cfg_overrides)
+    weight_path = Path(path) / SPARSIFY_WEIGHTS_FILENAME
+    state_dict_loaded = load_file(weight_path, device=device)
+    dtype = DTYPE_MAP[cfg_dict["dtype"]]
+    W_enc = (
+        state_dict_loaded["W_enc"]
+        if "W_enc" in state_dict_loaded
+        else state_dict_loaded["encoder.weight"].T
+    ).to(dtype)
+    if "W_dec" in state_dict_loaded:
+        W_dec = state_dict_loaded["W_dec"].T.to(dtype)
+    else:
+        W_dec = state_dict_loaded["decoder.weight"].T.to(dtype)
+    if "b_enc" in state_dict_loaded:
+        b_enc = state_dict_loaded["b_enc"].to(dtype)
+    elif "encoder.bias" in state_dict_loaded:
+        b_enc = state_dict_loaded["encoder.bias"].to(dtype)
+    else:
+        b_enc = torch.zeros(cfg_dict["d_sae"], dtype=dtype, device=device)
+    if "b_dec" in state_dict_loaded:
+        b_dec = state_dict_loaded["b_dec"].to(dtype)
+    elif "decoder.bias" in state_dict_loaded:
+        b_dec = state_dict_loaded["decoder.bias"].to(dtype)
+    else:
+        b_dec = torch.zeros(cfg_dict["d_in"], dtype=dtype, device=device)
+    state_dict = {"W_enc": W_enc, "b_enc": b_enc, "W_dec": W_dec, "b_dec": b_dec}
+    return cfg_dict, state_dict
 NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "sae_lens": sae_lens_huggingface_loader,
     "connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
@@ -950,6 +1100,7 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
     "llama_scope_r1_distill": llama_scope_r1_distill_sae_huggingface_loader,
     "dictionary_learning_1": dictionary_learning_sae_huggingface_loader_1,
     "deepseek_r1": deepseek_r1_sae_huggingface_loader,
+    "sparsify": sparsify_huggingface_loader,
 }
@@ -961,4 +1112,5 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
     "llama_scope_r1_distill": get_llama_scope_r1_distill_config_from_hf,
     "dictionary_learning_1": get_dictionary_learning_config_1_from_hf,
     "deepseek_r1": get_deepseek_r1_config_from_hf,
+    "sparsify": get_sparsify_config_from_hf,
 }

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/pretrained_saes.yaml RENAMED Viewed

@@ -13634,39 +13634,51 @@ gemma-2-2b-res-matryoshka-dc:
   - id: blocks.13.hook_resid_post
     path: standard/blocks.13.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/13-res-matryoshka-dc
   - id: blocks.14.hook_resid_post
     path: standard/blocks.14.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/14-res-matryoshka-dc
   - id: blocks.15.hook_resid_post
     path: standard/blocks.15.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/15-res-matryoshka-dc
   - id: blocks.16.hook_resid_post
     path: standard/blocks.16.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/16-res-matryoshka-dc
   - id: blocks.17.hook_resid_post
     path: standard/blocks.17.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/17-res-matryoshka-dc
   - id: blocks.18.hook_resid_post
     path: standard/blocks.18.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/18-res-matryoshka-dc
   - id: blocks.19.hook_resid_post
     path: standard/blocks.19.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/19-res-matryoshka-dc
   - id: blocks.20.hook_resid_post
     path: standard/blocks.20.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/20-res-matryoshka-dc
   - id: blocks.21.hook_resid_post
     path: standard/blocks.21.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/21-res-matryoshka-dc
   - id: blocks.22.hook_resid_post
     path: standard/blocks.22.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/22-res-matryoshka-dc
   - id: blocks.23.hook_resid_post
     path: standard/blocks.23.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/23-res-matryoshka-dc
   - id: blocks.24.hook_resid_post
     path: standard/blocks.24.hook_resid_post
     l0: 40.0
+    neuronpedia: gemma-2-2b/24-res-matryoshka-dc
 gemma-2-2b-res-snap-matryoshka-dc:
   conversion_func: null
   links:

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/sae.py RENAMED Viewed

@@ -732,6 +732,64 @@ class SAE(HookedRootModule, Generic[T_SAE_CONFIG], ABC):
     ) -> type[SAEConfig]:
         return SAEConfig
+    ### Methods to support deprecated usage of SAE.from_pretrained() ###
+    def __getitem__(self, index: int) -> Any:
+        """
+        Support indexing for backward compatibility with tuple unpacking.
+        DEPRECATED: SAE.from_pretrained() no longer returns a tuple.
+        Use SAE.from_pretrained_with_cfg_and_sparsity() instead.
+        """
+        warnings.warn(
+            "Indexing SAE objects is deprecated. SAE.from_pretrained() now returns "
+            "only the SAE object. Use SAE.from_pretrained_with_cfg_and_sparsity() "
+            "to get the config dict and sparsity as well.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if index == 0:
+            return self
+        if index == 1:
+            return self.cfg.to_dict()
+        if index == 2:
+            return None
+        raise IndexError(f"SAE tuple index {index} out of range")
+    def __iter__(self):
+        """
+        Support unpacking for backward compatibility with tuple unpacking.
+        DEPRECATED: SAE.from_pretrained() no longer returns a tuple.
+        Use SAE.from_pretrained_with_cfg_and_sparsity() instead.
+        """
+        warnings.warn(
+            "Unpacking SAE objects is deprecated. SAE.from_pretrained() now returns "
+            "only the SAE object. Use SAE.from_pretrained_with_cfg_and_sparsity() "
+            "to get the config dict and sparsity as well.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        yield self
+        yield self.cfg.to_dict()
+        yield None
+    def __len__(self) -> int:
+        """
+        Support len() for backward compatibility with tuple unpacking.
+        DEPRECATED: SAE.from_pretrained() no longer returns a tuple.
+        Use SAE.from_pretrained_with_cfg_and_sparsity() instead.
+        """
+        warnings.warn(
+            "Getting length of SAE objects is deprecated. SAE.from_pretrained() now returns "
+            "only the SAE object. Use SAE.from_pretrained_with_cfg_and_sparsity() "
+            "to get the config dict and sparsity as well.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return 3
 @dataclass(kw_only=True)
 class TrainingSAEConfig(SAEConfig, ABC):

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/activations_store.py RENAMED Viewed

@@ -428,7 +428,7 @@ class ActivationsStore:
         ):
             # temporalily set estimated_norm_scaling_factor to 1.0 so the dataloader works
             self.estimated_norm_scaling_factor = 1.0
-            acts = self.next_batch()[0]
+            acts = self.next_batch()[:, 0]
             self.estimated_norm_scaling_factor = None
             norms_per_batch.append(acts.norm(dim=-1).mean().item())
         mean_norm = np.mean(norms_per_batch)

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/LICENSE RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/analysis/__init__.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/analysis/hooked_sae_transformer.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/analysis/neuronpedia_integration.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/cache_activations_runner.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/config.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/llm_sae_training_runner.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/load_model.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/loading/__init__.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/loading/pretrained_saes_directory.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/pretokenize_runner.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/registry.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/__init__.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/gated_sae.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/jumprelu_sae.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/standard_sae.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/saes/topk_sae.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/tokenization_and_batching.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/__init__.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/activation_scaler.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/mixing_buffer.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/optim.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/sae_trainer.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/types.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/training/upload_saes_to_huggingface.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/tutorial/tsea.py RENAMED Viewed

File without changes

{sae_lens-6.0.0rc4 → sae_lens-6.0.0rc5}/sae_lens/util.py RENAMED Viewed

File without changes

sae-lens 6.0.0rc4__tar.gz → 6.0.0rc5__tar.gz

sae-lens 6.0.0rc4tar.gz → 6.0.0rc5tar.gz