sae-lens 6.5.3__tar.gz → 6.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sae_lens-6.5.3 → sae_lens-6.6.1}/PKG-INFO +8 -13
- {sae_lens-6.5.3 → sae_lens-6.6.1}/pyproject.toml +8 -13
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/__init__.py +1 -1
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/analysis/hooked_sae_transformer.py +13 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/cache_activations_runner.py +1 -1
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/evals.py +6 -4
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/loading/pretrained_sae_loaders.py +79 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/pretrained_saes.yaml +662 -1
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/activations_store.py +1 -1
- {sae_lens-6.5.3 → sae_lens-6.6.1}/LICENSE +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/README.md +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/analysis/__init__.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/analysis/neuronpedia_integration.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/config.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/constants.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/llm_sae_training_runner.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/load_model.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/loading/__init__.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/loading/pretrained_saes_directory.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/pretokenize_runner.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/registry.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/__init__.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/batchtopk_sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/gated_sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/jumprelu_sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/standard_sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/topk_sae.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/saes/transcoder.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/tokenization_and_batching.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/__init__.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/activation_scaler.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/mixing_buffer.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/optim.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/sae_trainer.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/types.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/training/upload_saes_to_huggingface.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/tutorial/tsea.py +0 -0
- {sae_lens-6.5.3 → sae_lens-6.6.1}/sae_lens/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sae-lens
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.6.1
|
|
4
4
|
Summary: Training and Analyzing Sparse Autoencoders (SAEs)
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch
|
|
@@ -16,24 +16,19 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
16
16
|
Provides-Extra: mamba
|
|
17
17
|
Requires-Dist: automated-interpretability (>=0.0.5,<1.0.0)
|
|
18
18
|
Requires-Dist: babe (>=0.0.7,<0.0.8)
|
|
19
|
-
Requires-Dist: datasets (>=3.1.0
|
|
19
|
+
Requires-Dist: datasets (>=3.1.0)
|
|
20
20
|
Requires-Dist: mamba-lens (>=0.0.4,<0.0.5) ; extra == "mamba"
|
|
21
|
-
Requires-Dist: matplotlib (>=3.8.3,<4.0.0)
|
|
22
|
-
Requires-Dist: matplotlib-inline (>=0.1.6,<0.2.0)
|
|
23
21
|
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
|
24
|
-
Requires-Dist: plotly (>=5.19.0
|
|
25
|
-
Requires-Dist: plotly-express (>=0.4.1
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
22
|
+
Requires-Dist: plotly (>=5.19.0)
|
|
23
|
+
Requires-Dist: plotly-express (>=0.4.1)
|
|
24
|
+
Requires-Dist: python-dotenv (>=1.0.1)
|
|
28
25
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist: safetensors (>=0.4.2,<0.5.0)
|
|
26
|
+
Requires-Dist: safetensors (>=0.4.2,<1.0.0)
|
|
31
27
|
Requires-Dist: simple-parsing (>=0.1.6,<0.2.0)
|
|
32
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: tenacity (>=9.0.0)
|
|
29
|
+
Requires-Dist: transformer-lens (>=2.16.1,<3.0.0)
|
|
33
30
|
Requires-Dist: transformers (>=4.38.1,<5.0.0)
|
|
34
|
-
Requires-Dist: typer (>=0.12.3,<0.13.0)
|
|
35
31
|
Requires-Dist: typing-extensions (>=4.10.0,<5.0.0)
|
|
36
|
-
Requires-Dist: zstandard (>=0.22.0,<0.23.0)
|
|
37
32
|
Project-URL: Homepage, https://jbloomaus.github.io/SAELens
|
|
38
33
|
Project-URL: Repository, https://github.com/jbloomAus/SAELens
|
|
39
34
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "sae-lens"
|
|
3
|
-
version = "6.
|
|
3
|
+
version = "6.6.1"
|
|
4
4
|
description = "Training and Analyzing Sparse Autoencoders (SAEs)"
|
|
5
5
|
authors = ["Joseph Bloom"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -19,26 +19,21 @@ classifiers = ["Topic :: Scientific/Engineering :: Artificial Intelligence"]
|
|
|
19
19
|
|
|
20
20
|
[tool.poetry.dependencies]
|
|
21
21
|
python = "^3.10"
|
|
22
|
-
transformer-lens = "^2.
|
|
22
|
+
transformer-lens = "^2.16.1"
|
|
23
23
|
transformers = "^4.38.1"
|
|
24
|
-
plotly = "
|
|
25
|
-
plotly-express = "
|
|
26
|
-
|
|
27
|
-
matplotlib-inline = "^0.1.6"
|
|
28
|
-
datasets = "^3.1.0"
|
|
24
|
+
plotly = ">=5.19.0"
|
|
25
|
+
plotly-express = ">=0.4.1"
|
|
26
|
+
datasets = ">=3.1.0"
|
|
29
27
|
babe = "^0.0.7"
|
|
30
28
|
nltk = "^3.8.1"
|
|
31
|
-
safetensors = "
|
|
32
|
-
typer = "^0.12.3"
|
|
29
|
+
safetensors = ">=0.4.2,<1.0.0"
|
|
33
30
|
mamba-lens = { version = "^0.0.4", optional = true }
|
|
34
|
-
pyzmq = "26.0.0"
|
|
35
31
|
automated-interpretability = ">=0.0.5,<1.0.0"
|
|
36
|
-
python-dotenv = "
|
|
32
|
+
python-dotenv = ">=1.0.1"
|
|
37
33
|
pyyaml = "^6.0.1"
|
|
38
|
-
pytest-profiling = "^1.7.0"
|
|
39
|
-
zstandard = "^0.22.0"
|
|
40
34
|
typing-extensions = "^4.10.0"
|
|
41
35
|
simple-parsing = "^0.1.6"
|
|
36
|
+
tenacity = ">=9.0.0"
|
|
42
37
|
|
|
43
38
|
[tool.poetry.group.dev.dependencies]
|
|
44
39
|
pytest = "^8.0.2"
|
|
@@ -5,6 +5,7 @@ from typing import Any, Callable
|
|
|
5
5
|
import torch
|
|
6
6
|
from jaxtyping import Float
|
|
7
7
|
from transformer_lens.ActivationCache import ActivationCache
|
|
8
|
+
from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
|
|
8
9
|
from transformer_lens.hook_points import HookPoint # Hooking utilities
|
|
9
10
|
from transformer_lens.HookedTransformer import HookedTransformer
|
|
10
11
|
|
|
@@ -50,6 +51,13 @@ def set_deep_attr(obj: Any, path: str, value: Any):
|
|
|
50
51
|
setattr(obj, parts[-1], value)
|
|
51
52
|
|
|
52
53
|
|
|
54
|
+
def add_hook_in_to_mlp(mlp: CanBeUsedAsMLP):
|
|
55
|
+
# Temporary hack to add a `mlp.hook_in` hook to mimic what's in circuit-tracer
|
|
56
|
+
mlp.hook_in = HookPoint()
|
|
57
|
+
original_forward = mlp.forward
|
|
58
|
+
mlp.forward = lambda x: original_forward(mlp.hook_in(x)) # type: ignore
|
|
59
|
+
|
|
60
|
+
|
|
53
61
|
class HookedSAETransformer(HookedTransformer):
|
|
54
62
|
def __init__(
|
|
55
63
|
self,
|
|
@@ -66,6 +74,11 @@ class HookedSAETransformer(HookedTransformer):
|
|
|
66
74
|
**model_kwargs: Keyword arguments for HookedTransformer initialization
|
|
67
75
|
"""
|
|
68
76
|
super().__init__(*model_args, **model_kwargs)
|
|
77
|
+
|
|
78
|
+
for block in self.blocks:
|
|
79
|
+
add_hook_in_to_mlp(block.mlp) # type: ignore
|
|
80
|
+
self.setup()
|
|
81
|
+
|
|
69
82
|
self.acts_to_saes: dict[str, SAE] = {} # type: ignore
|
|
70
83
|
|
|
71
84
|
def add_sae(self, sae: SAE[Any], use_error_term: bool | None = None):
|
|
@@ -82,7 +82,7 @@ class CacheActivationsRunner:
|
|
|
82
82
|
)
|
|
83
83
|
for hook_name in [self.cfg.hook_name]
|
|
84
84
|
}
|
|
85
|
-
features_dict["token_ids"] = Sequence(
|
|
85
|
+
features_dict["token_ids"] = Sequence( # type: ignore
|
|
86
86
|
Value(dtype="int32"), length=self.context_size
|
|
87
87
|
)
|
|
88
88
|
self.features = Features(features_dict)
|
|
@@ -459,14 +459,16 @@ def get_sparsity_and_variance_metrics(
|
|
|
459
459
|
original_act = cache[hook_name]
|
|
460
460
|
|
|
461
461
|
# normalise if necessary (necessary in training only, otherwise we should fold the scaling in)
|
|
462
|
-
|
|
462
|
+
original_act_scaled = activation_scaler.scale(original_act)
|
|
463
463
|
|
|
464
464
|
# send the (maybe normalised) activations into the SAE
|
|
465
|
-
sae_feature_activations = sae.encode(
|
|
466
|
-
|
|
465
|
+
sae_feature_activations = sae.encode(original_act_scaled.to(sae.device))
|
|
466
|
+
sae_out_scaled = sae.decode(sae_feature_activations).to(
|
|
467
|
+
original_act_scaled.device
|
|
468
|
+
)
|
|
467
469
|
del cache
|
|
468
470
|
|
|
469
|
-
sae_out = activation_scaler.unscale(
|
|
471
|
+
sae_out = activation_scaler.unscale(sae_out_scaled)
|
|
470
472
|
|
|
471
473
|
flattened_sae_input = einops.rearrange(original_act, "b ctx d -> (b ctx) d")
|
|
472
474
|
flattened_sae_feature_acts = einops.rearrange(
|
|
@@ -5,6 +5,7 @@ from typing import Any, Protocol
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import torch
|
|
8
|
+
import yaml
|
|
8
9
|
from huggingface_hub import hf_hub_download
|
|
9
10
|
from huggingface_hub.utils import EntryNotFoundError
|
|
10
11
|
from packaging.version import Version
|
|
@@ -1232,6 +1233,82 @@ def gemma_2_transcoder_huggingface_loader(
|
|
|
1232
1233
|
return cfg_dict, state_dict, None
|
|
1233
1234
|
|
|
1234
1235
|
|
|
1236
|
+
def get_mwhanna_transcoder_config_from_hf(
|
|
1237
|
+
repo_id: str,
|
|
1238
|
+
folder_name: str,
|
|
1239
|
+
device: str | None = None,
|
|
1240
|
+
force_download: bool = False, # noqa: ARG001
|
|
1241
|
+
cfg_overrides: dict[str, Any] | None = None,
|
|
1242
|
+
) -> dict[str, Any]:
|
|
1243
|
+
"""Get config for mwhanna transcoders"""
|
|
1244
|
+
|
|
1245
|
+
# Extract layer from folder name
|
|
1246
|
+
layer = int(folder_name.replace(".safetensors", "").split("_")[-1])
|
|
1247
|
+
|
|
1248
|
+
wandb_config_path = hf_hub_download(
|
|
1249
|
+
repo_id, "wanb-config.yaml", force_download=force_download
|
|
1250
|
+
)
|
|
1251
|
+
base_config_path = hf_hub_download(
|
|
1252
|
+
repo_id, "config.yaml", force_download=force_download
|
|
1253
|
+
)
|
|
1254
|
+
with open(base_config_path) as f:
|
|
1255
|
+
base_cfg_info: dict[str, Any] = yaml.safe_load(f)
|
|
1256
|
+
with open(wandb_config_path) as f:
|
|
1257
|
+
wandb_cfg_info: dict[str, Any] = yaml.safe_load(f)
|
|
1258
|
+
|
|
1259
|
+
return {
|
|
1260
|
+
"architecture": "transcoder",
|
|
1261
|
+
"d_in": wandb_cfg_info["d_model"]["value"],
|
|
1262
|
+
"d_out": wandb_cfg_info["d_model"]["value"],
|
|
1263
|
+
"d_sae": wandb_cfg_info["d_feature"]["value"],
|
|
1264
|
+
"dtype": "float32",
|
|
1265
|
+
"device": device if device is not None else "cpu",
|
|
1266
|
+
"activation_fn": "relu",
|
|
1267
|
+
"normalize_activations": "none",
|
|
1268
|
+
"model_name": base_cfg_info["model_name"],
|
|
1269
|
+
"hook_name": f"blocks.{layer}.mlp.hook_in",
|
|
1270
|
+
"hook_name_out": f"blocks.{layer}.hook_mlp_out",
|
|
1271
|
+
"dataset_path": "monology/pile-uncopyrighted",
|
|
1272
|
+
"context_size": wandb_cfg_info["batch_size"]["value"],
|
|
1273
|
+
"apply_b_dec_to_input": False,
|
|
1274
|
+
"model_from_pretrained_kwargs": {"fold_ln": False},
|
|
1275
|
+
**(cfg_overrides or {}),
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
def mwhanna_transcoder_huggingface_loader(
|
|
1280
|
+
repo_id: str,
|
|
1281
|
+
folder_name: str,
|
|
1282
|
+
device: str = "cpu",
|
|
1283
|
+
force_download: bool = False,
|
|
1284
|
+
cfg_overrides: dict[str, Any] | None = None,
|
|
1285
|
+
) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
|
|
1286
|
+
"""Load mwhanna transcoders from HuggingFace"""
|
|
1287
|
+
cfg_dict = get_mwhanna_transcoder_config_from_hf(
|
|
1288
|
+
repo_id,
|
|
1289
|
+
folder_name,
|
|
1290
|
+
device,
|
|
1291
|
+
force_download,
|
|
1292
|
+
cfg_overrides,
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
# Download the safetensors file
|
|
1296
|
+
revision = cfg_overrides.get("revision", None) if cfg_overrides else None
|
|
1297
|
+
|
|
1298
|
+
file_path = hf_hub_download(
|
|
1299
|
+
repo_id=repo_id,
|
|
1300
|
+
filename=folder_name,
|
|
1301
|
+
force_download=force_download,
|
|
1302
|
+
revision=revision,
|
|
1303
|
+
)
|
|
1304
|
+
|
|
1305
|
+
# Load weights from safetensors
|
|
1306
|
+
state_dict = load_file(file_path, device=device)
|
|
1307
|
+
state_dict["W_enc"] = state_dict["W_enc"].T
|
|
1308
|
+
|
|
1309
|
+
return cfg_dict, state_dict, None
|
|
1310
|
+
|
|
1311
|
+
|
|
1235
1312
|
NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
|
|
1236
1313
|
"sae_lens": sae_lens_huggingface_loader,
|
|
1237
1314
|
"connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
|
|
@@ -1242,6 +1319,7 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
|
|
|
1242
1319
|
"deepseek_r1": deepseek_r1_sae_huggingface_loader,
|
|
1243
1320
|
"sparsify": sparsify_huggingface_loader,
|
|
1244
1321
|
"gemma_2_transcoder": gemma_2_transcoder_huggingface_loader,
|
|
1322
|
+
"mwhanna_transcoder": mwhanna_transcoder_huggingface_loader,
|
|
1245
1323
|
}
|
|
1246
1324
|
|
|
1247
1325
|
|
|
@@ -1255,4 +1333,5 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
|
|
|
1255
1333
|
"deepseek_r1": get_deepseek_r1_config_from_hf,
|
|
1256
1334
|
"sparsify": get_sparsify_config_from_hf,
|
|
1257
1335
|
"gemma_2_transcoder": get_gemma_2_transcoder_config_from_hf,
|
|
1336
|
+
"mwhanna_transcoder": get_mwhanna_transcoder_config_from_hf,
|
|
1258
1337
|
}
|
|
@@ -14083,4 +14083,665 @@ gemma-scope-2b-pt-transcoders:
|
|
|
14083
14083
|
- id: layer_25/width_16k/average_l0_41
|
|
14084
14084
|
neuronpedia: gemma-2-2b/25-gemmascope-transcoder-16k
|
|
14085
14085
|
l0: 41
|
|
14086
|
-
path: layer_25/width_16k/average_l0_41
|
|
14086
|
+
path: layer_25/width_16k/average_l0_41
|
|
14087
|
+
|
|
14088
|
+
|
|
14089
|
+
mwhanna-qwen3-4b-transcoders:
|
|
14090
|
+
conversion_func: mwhanna_transcoder
|
|
14091
|
+
model: qwen3-4b
|
|
14092
|
+
repo_id: mwhanna/qwen3-4b-transcoders
|
|
14093
|
+
saes:
|
|
14094
|
+
- id: layer_0
|
|
14095
|
+
path: layer_0.safetensors
|
|
14096
|
+
neuronpedia: qwen3-4b/0-transcoder-hp
|
|
14097
|
+
- id: layer_1
|
|
14098
|
+
path: layer_1.safetensors
|
|
14099
|
+
neuronpedia: qwen3-4b/1-transcoder-hp
|
|
14100
|
+
- id: layer_2
|
|
14101
|
+
path: layer_2.safetensors
|
|
14102
|
+
neuronpedia: qwen3-4b/2-transcoder-hp
|
|
14103
|
+
- id: layer_3
|
|
14104
|
+
path: layer_3.safetensors
|
|
14105
|
+
neuronpedia: qwen3-4b/3-transcoder-hp
|
|
14106
|
+
- id: layer_4
|
|
14107
|
+
path: layer_4.safetensors
|
|
14108
|
+
neuronpedia: qwen3-4b/4-transcoder-hp
|
|
14109
|
+
- id: layer_5
|
|
14110
|
+
path: layer_5.safetensors
|
|
14111
|
+
neuronpedia: qwen3-4b/5-transcoder-hp
|
|
14112
|
+
- id: layer_6
|
|
14113
|
+
path: layer_6.safetensors
|
|
14114
|
+
neuronpedia: qwen3-4b/6-transcoder-hp
|
|
14115
|
+
- id: layer_7
|
|
14116
|
+
path: layer_7.safetensors
|
|
14117
|
+
neuronpedia: qwen3-4b/7-transcoder-hp
|
|
14118
|
+
- id: layer_8
|
|
14119
|
+
path: layer_8.safetensors
|
|
14120
|
+
neuronpedia: qwen3-4b/8-transcoder-hp
|
|
14121
|
+
- id: layer_9
|
|
14122
|
+
path: layer_9.safetensors
|
|
14123
|
+
neuronpedia: qwen3-4b/9-transcoder-hp
|
|
14124
|
+
- id: layer_10
|
|
14125
|
+
path: layer_10.safetensors
|
|
14126
|
+
neuronpedia: qwen3-4b/10-transcoder-hp
|
|
14127
|
+
- id: layer_11
|
|
14128
|
+
path: layer_11.safetensors
|
|
14129
|
+
neuronpedia: qwen3-4b/11-transcoder-hp
|
|
14130
|
+
- id: layer_12
|
|
14131
|
+
path: layer_12.safetensors
|
|
14132
|
+
neuronpedia: qwen3-4b/12-transcoder-hp
|
|
14133
|
+
- id: layer_13
|
|
14134
|
+
path: layer_13.safetensors
|
|
14135
|
+
neuronpedia: qwen3-4b/13-transcoder-hp
|
|
14136
|
+
- id: layer_14
|
|
14137
|
+
path: layer_14.safetensors
|
|
14138
|
+
neuronpedia: qwen3-4b/14-transcoder-hp
|
|
14139
|
+
- id: layer_15
|
|
14140
|
+
path: layer_15.safetensors
|
|
14141
|
+
neuronpedia: qwen3-4b/15-transcoder-hp
|
|
14142
|
+
- id: layer_16
|
|
14143
|
+
path: layer_16.safetensors
|
|
14144
|
+
neuronpedia: qwen3-4b/16-transcoder-hp
|
|
14145
|
+
- id: layer_17
|
|
14146
|
+
path: layer_17.safetensors
|
|
14147
|
+
neuronpedia: qwen3-4b/17-transcoder-hp
|
|
14148
|
+
- id: layer_18
|
|
14149
|
+
path: layer_18.safetensors
|
|
14150
|
+
neuronpedia: qwen3-4b/18-transcoder-hp
|
|
14151
|
+
- id: layer_19
|
|
14152
|
+
path: layer_19.safetensors
|
|
14153
|
+
neuronpedia: qwen3-4b/19-transcoder-hp
|
|
14154
|
+
- id: layer_20
|
|
14155
|
+
path: layer_20.safetensors
|
|
14156
|
+
neuronpedia: qwen3-4b/20-transcoder-hp
|
|
14157
|
+
- id: layer_21
|
|
14158
|
+
path: layer_21.safetensors
|
|
14159
|
+
neuronpedia: qwen3-4b/21-transcoder-hp
|
|
14160
|
+
- id: layer_22
|
|
14161
|
+
path: layer_22.safetensors
|
|
14162
|
+
neuronpedia: qwen3-4b/22-transcoder-hp
|
|
14163
|
+
- id: layer_23
|
|
14164
|
+
path: layer_23.safetensors
|
|
14165
|
+
neuronpedia: qwen3-4b/23-transcoder-hp
|
|
14166
|
+
- id: layer_24
|
|
14167
|
+
path: layer_24.safetensors
|
|
14168
|
+
neuronpedia: qwen3-4b/24-transcoder-hp
|
|
14169
|
+
- id: layer_25
|
|
14170
|
+
path: layer_25.safetensors
|
|
14171
|
+
neuronpedia: qwen3-4b/25-transcoder-hp
|
|
14172
|
+
- id: layer_26
|
|
14173
|
+
path: layer_26.safetensors
|
|
14174
|
+
neuronpedia: qwen3-4b/26-transcoder-hp
|
|
14175
|
+
- id: layer_27
|
|
14176
|
+
path: layer_27.safetensors
|
|
14177
|
+
neuronpedia: qwen3-4b/27-transcoder-hp
|
|
14178
|
+
- id: layer_28
|
|
14179
|
+
path: layer_28.safetensors
|
|
14180
|
+
neuronpedia: qwen3-4b/28-transcoder-hp
|
|
14181
|
+
- id: layer_29
|
|
14182
|
+
path: layer_29.safetensors
|
|
14183
|
+
neuronpedia: qwen3-4b/29-transcoder-hp
|
|
14184
|
+
- id: layer_30
|
|
14185
|
+
path: layer_30.safetensors
|
|
14186
|
+
neuronpedia: qwen3-4b/30-transcoder-hp
|
|
14187
|
+
- id: layer_31
|
|
14188
|
+
path: layer_31.safetensors
|
|
14189
|
+
neuronpedia: qwen3-4b/31-transcoder-hp
|
|
14190
|
+
- id: layer_32
|
|
14191
|
+
path: layer_32.safetensors
|
|
14192
|
+
neuronpedia: qwen3-4b/32-transcoder-hp
|
|
14193
|
+
- id: layer_33
|
|
14194
|
+
path: layer_33.safetensors
|
|
14195
|
+
neuronpedia: qwen3-4b/33-transcoder-hp
|
|
14196
|
+
- id: layer_34
|
|
14197
|
+
path: layer_34.safetensors
|
|
14198
|
+
neuronpedia: qwen3-4b/34-transcoder-hp
|
|
14199
|
+
- id: layer_35
|
|
14200
|
+
path: layer_35.safetensors
|
|
14201
|
+
neuronpedia: qwen3-4b/35-transcoder-hp
|
|
14202
|
+
|
|
14203
|
+
mwhanna-qwen3-8b-transcoders:
|
|
14204
|
+
conversion_func: mwhanna_transcoder
|
|
14205
|
+
model: qwen3-8b
|
|
14206
|
+
repo_id: mwhanna/qwen3-8b-transcoders
|
|
14207
|
+
saes:
|
|
14208
|
+
- id: layer_0
|
|
14209
|
+
path: layer_0.safetensors
|
|
14210
|
+
neuronpedia: qwen3-8b/0-transcoder-hp
|
|
14211
|
+
- id: layer_1
|
|
14212
|
+
path: layer_1.safetensors
|
|
14213
|
+
neuronpedia: qwen3-8b/1-transcoder-hp
|
|
14214
|
+
- id: layer_2
|
|
14215
|
+
path: layer_2.safetensors
|
|
14216
|
+
neuronpedia: qwen3-8b/2-transcoder-hp
|
|
14217
|
+
- id: layer_3
|
|
14218
|
+
path: layer_3.safetensors
|
|
14219
|
+
neuronpedia: qwen3-8b/3-transcoder-hp
|
|
14220
|
+
- id: layer_4
|
|
14221
|
+
path: layer_4.safetensors
|
|
14222
|
+
neuronpedia: qwen3-8b/4-transcoder-hp
|
|
14223
|
+
- id: layer_5
|
|
14224
|
+
path: layer_5.safetensors
|
|
14225
|
+
neuronpedia: qwen3-8b/5-transcoder-hp
|
|
14226
|
+
- id: layer_6
|
|
14227
|
+
path: layer_6.safetensors
|
|
14228
|
+
neuronpedia: qwen3-8b/6-transcoder-hp
|
|
14229
|
+
- id: layer_7
|
|
14230
|
+
path: layer_7.safetensors
|
|
14231
|
+
neuronpedia: qwen3-8b/7-transcoder-hp
|
|
14232
|
+
- id: layer_8
|
|
14233
|
+
path: layer_8.safetensors
|
|
14234
|
+
neuronpedia: qwen3-8b/8-transcoder-hp
|
|
14235
|
+
- id: layer_9
|
|
14236
|
+
path: layer_9.safetensors
|
|
14237
|
+
neuronpedia: qwen3-8b/9-transcoder-hp
|
|
14238
|
+
- id: layer_10
|
|
14239
|
+
path: layer_10.safetensors
|
|
14240
|
+
neuronpedia: qwen3-8b/10-transcoder-hp
|
|
14241
|
+
- id: layer_11
|
|
14242
|
+
path: layer_11.safetensors
|
|
14243
|
+
neuronpedia: qwen3-8b/11-transcoder-hp
|
|
14244
|
+
- id: layer_12
|
|
14245
|
+
path: layer_12.safetensors
|
|
14246
|
+
neuronpedia: qwen3-8b/12-transcoder-hp
|
|
14247
|
+
- id: layer_13
|
|
14248
|
+
path: layer_13.safetensors
|
|
14249
|
+
neuronpedia: qwen3-8b/13-transcoder-hp
|
|
14250
|
+
- id: layer_14
|
|
14251
|
+
path: layer_14.safetensors
|
|
14252
|
+
neuronpedia: qwen3-8b/14-transcoder-hp
|
|
14253
|
+
- id: layer_15
|
|
14254
|
+
path: layer_15.safetensors
|
|
14255
|
+
neuronpedia: qwen3-8b/15-transcoder-hp
|
|
14256
|
+
- id: layer_16
|
|
14257
|
+
path: layer_16.safetensors
|
|
14258
|
+
neuronpedia: qwen3-8b/16-transcoder-hp
|
|
14259
|
+
- id: layer_17
|
|
14260
|
+
path: layer_17.safetensors
|
|
14261
|
+
neuronpedia: qwen3-8b/17-transcoder-hp
|
|
14262
|
+
- id: layer_18
|
|
14263
|
+
path: layer_18.safetensors
|
|
14264
|
+
neuronpedia: qwen3-8b/18-transcoder-hp
|
|
14265
|
+
- id: layer_19
|
|
14266
|
+
path: layer_19.safetensors
|
|
14267
|
+
neuronpedia: qwen3-8b/19-transcoder-hp
|
|
14268
|
+
- id: layer_20
|
|
14269
|
+
path: layer_20.safetensors
|
|
14270
|
+
neuronpedia: qwen3-8b/20-transcoder-hp
|
|
14271
|
+
- id: layer_21
|
|
14272
|
+
path: layer_21.safetensors
|
|
14273
|
+
neuronpedia: qwen3-8b/21-transcoder-hp
|
|
14274
|
+
- id: layer_22
|
|
14275
|
+
path: layer_22.safetensors
|
|
14276
|
+
neuronpedia: qwen3-8b/22-transcoder-hp
|
|
14277
|
+
- id: layer_23
|
|
14278
|
+
path: layer_23.safetensors
|
|
14279
|
+
neuronpedia: qwen3-8b/23-transcoder-hp
|
|
14280
|
+
- id: layer_24
|
|
14281
|
+
path: layer_24.safetensors
|
|
14282
|
+
neuronpedia: qwen3-8b/24-transcoder-hp
|
|
14283
|
+
- id: layer_25
|
|
14284
|
+
path: layer_25.safetensors
|
|
14285
|
+
neuronpedia: qwen3-8b/25-transcoder-hp
|
|
14286
|
+
- id: layer_26
|
|
14287
|
+
path: layer_26.safetensors
|
|
14288
|
+
neuronpedia: qwen3-8b/26-transcoder-hp
|
|
14289
|
+
- id: layer_27
|
|
14290
|
+
path: layer_27.safetensors
|
|
14291
|
+
neuronpedia: qwen3-8b/27-transcoder-hp
|
|
14292
|
+
- id: layer_28
|
|
14293
|
+
path: layer_28.safetensors
|
|
14294
|
+
neuronpedia: qwen3-8b/28-transcoder-hp
|
|
14295
|
+
- id: layer_29
|
|
14296
|
+
path: layer_29.safetensors
|
|
14297
|
+
neuronpedia: qwen3-8b/29-transcoder-hp
|
|
14298
|
+
- id: layer_30
|
|
14299
|
+
path: layer_30.safetensors
|
|
14300
|
+
neuronpedia: qwen3-8b/30-transcoder-hp
|
|
14301
|
+
- id: layer_31
|
|
14302
|
+
path: layer_31.safetensors
|
|
14303
|
+
neuronpedia: qwen3-8b/31-transcoder-hp
|
|
14304
|
+
- id: layer_32
|
|
14305
|
+
path: layer_32.safetensors
|
|
14306
|
+
neuronpedia: qwen3-8b/32-transcoder-hp
|
|
14307
|
+
- id: layer_33
|
|
14308
|
+
path: layer_33.safetensors
|
|
14309
|
+
neuronpedia: qwen3-8b/33-transcoder-hp
|
|
14310
|
+
- id: layer_34
|
|
14311
|
+
path: layer_34.safetensors
|
|
14312
|
+
neuronpedia: qwen3-8b/34-transcoder-hp
|
|
14313
|
+
- id: layer_35
|
|
14314
|
+
path: layer_35.safetensors
|
|
14315
|
+
neuronpedia: qwen3-8b/35-transcoder-hp
|
|
14316
|
+
|
|
14317
|
+
mwhanna-qwen3-14b-transcoders:
|
|
14318
|
+
conversion_func: mwhanna_transcoder
|
|
14319
|
+
model: qwen3-14b
|
|
14320
|
+
repo_id: mwhanna/qwen3-14b-transcoders
|
|
14321
|
+
saes:
|
|
14322
|
+
- id: layer_0
|
|
14323
|
+
path: layer_0.safetensors
|
|
14324
|
+
neuronpedia: qwen3-14b/0-transcoder-hp
|
|
14325
|
+
- id: layer_1
|
|
14326
|
+
path: layer_1.safetensors
|
|
14327
|
+
neuronpedia: qwen3-14b/1-transcoder-hp
|
|
14328
|
+
- id: layer_2
|
|
14329
|
+
path: layer_2.safetensors
|
|
14330
|
+
neuronpedia: qwen3-14b/2-transcoder-hp
|
|
14331
|
+
- id: layer_3
|
|
14332
|
+
path: layer_3.safetensors
|
|
14333
|
+
neuronpedia: qwen3-14b/3-transcoder-hp
|
|
14334
|
+
- id: layer_4
|
|
14335
|
+
path: layer_4.safetensors
|
|
14336
|
+
neuronpedia: qwen3-14b/4-transcoder-hp
|
|
14337
|
+
- id: layer_5
|
|
14338
|
+
path: layer_5.safetensors
|
|
14339
|
+
neuronpedia: qwen3-14b/5-transcoder-hp
|
|
14340
|
+
- id: layer_6
|
|
14341
|
+
path: layer_6.safetensors
|
|
14342
|
+
neuronpedia: qwen3-14b/6-transcoder-hp
|
|
14343
|
+
- id: layer_7
|
|
14344
|
+
path: layer_7.safetensors
|
|
14345
|
+
neuronpedia: qwen3-14b/7-transcoder-hp
|
|
14346
|
+
- id: layer_8
|
|
14347
|
+
path: layer_8.safetensors
|
|
14348
|
+
neuronpedia: qwen3-14b/8-transcoder-hp
|
|
14349
|
+
- id: layer_9
|
|
14350
|
+
path: layer_9.safetensors
|
|
14351
|
+
neuronpedia: qwen3-14b/9-transcoder-hp
|
|
14352
|
+
- id: layer_10
|
|
14353
|
+
path: layer_10.safetensors
|
|
14354
|
+
neuronpedia: qwen3-14b/10-transcoder-hp
|
|
14355
|
+
- id: layer_11
|
|
14356
|
+
path: layer_11.safetensors
|
|
14357
|
+
neuronpedia: qwen3-14b/11-transcoder-hp
|
|
14358
|
+
- id: layer_12
|
|
14359
|
+
path: layer_12.safetensors
|
|
14360
|
+
neuronpedia: qwen3-14b/12-transcoder-hp
|
|
14361
|
+
- id: layer_13
|
|
14362
|
+
path: layer_13.safetensors
|
|
14363
|
+
neuronpedia: qwen3-14b/13-transcoder-hp
|
|
14364
|
+
- id: layer_14
|
|
14365
|
+
path: layer_14.safetensors
|
|
14366
|
+
neuronpedia: qwen3-14b/14-transcoder-hp
|
|
14367
|
+
- id: layer_15
|
|
14368
|
+
path: layer_15.safetensors
|
|
14369
|
+
neuronpedia: qwen3-14b/15-transcoder-hp
|
|
14370
|
+
- id: layer_16
|
|
14371
|
+
path: layer_16.safetensors
|
|
14372
|
+
neuronpedia: qwen3-14b/16-transcoder-hp
|
|
14373
|
+
- id: layer_17
|
|
14374
|
+
path: layer_17.safetensors
|
|
14375
|
+
neuronpedia: qwen3-14b/17-transcoder-hp
|
|
14376
|
+
- id: layer_18
|
|
14377
|
+
path: layer_18.safetensors
|
|
14378
|
+
neuronpedia: qwen3-14b/18-transcoder-hp
|
|
14379
|
+
- id: layer_19
|
|
14380
|
+
path: layer_19.safetensors
|
|
14381
|
+
neuronpedia: qwen3-14b/19-transcoder-hp
|
|
14382
|
+
- id: layer_20
|
|
14383
|
+
path: layer_20.safetensors
|
|
14384
|
+
neuronpedia: qwen3-14b/20-transcoder-hp
|
|
14385
|
+
- id: layer_21
|
|
14386
|
+
path: layer_21.safetensors
|
|
14387
|
+
neuronpedia: qwen3-14b/21-transcoder-hp
|
|
14388
|
+
- id: layer_22
|
|
14389
|
+
path: layer_22.safetensors
|
|
14390
|
+
neuronpedia: qwen3-14b/22-transcoder-hp
|
|
14391
|
+
- id: layer_23
|
|
14392
|
+
path: layer_23.safetensors
|
|
14393
|
+
neuronpedia: qwen3-14b/23-transcoder-hp
|
|
14394
|
+
- id: layer_24
|
|
14395
|
+
path: layer_24.safetensors
|
|
14396
|
+
neuronpedia: qwen3-14b/24-transcoder-hp
|
|
14397
|
+
- id: layer_25
|
|
14398
|
+
path: layer_25.safetensors
|
|
14399
|
+
neuronpedia: qwen3-14b/25-transcoder-hp
|
|
14400
|
+
- id: layer_26
|
|
14401
|
+
path: layer_26.safetensors
|
|
14402
|
+
neuronpedia: qwen3-14b/26-transcoder-hp
|
|
14403
|
+
- id: layer_27
|
|
14404
|
+
path: layer_27.safetensors
|
|
14405
|
+
neuronpedia: qwen3-14b/27-transcoder-hp
|
|
14406
|
+
- id: layer_28
|
|
14407
|
+
path: layer_28.safetensors
|
|
14408
|
+
neuronpedia: qwen3-14b/28-transcoder-hp
|
|
14409
|
+
- id: layer_29
|
|
14410
|
+
path: layer_29.safetensors
|
|
14411
|
+
neuronpedia: qwen3-14b/29-transcoder-hp
|
|
14412
|
+
- id: layer_30
|
|
14413
|
+
path: layer_30.safetensors
|
|
14414
|
+
neuronpedia: qwen3-14b/30-transcoder-hp
|
|
14415
|
+
- id: layer_31
|
|
14416
|
+
path: layer_31.safetensors
|
|
14417
|
+
neuronpedia: qwen3-14b/31-transcoder-hp
|
|
14418
|
+
- id: layer_32
|
|
14419
|
+
path: layer_32.safetensors
|
|
14420
|
+
neuronpedia: qwen3-14b/32-transcoder-hp
|
|
14421
|
+
- id: layer_33
|
|
14422
|
+
path: layer_33.safetensors
|
|
14423
|
+
neuronpedia: qwen3-14b/33-transcoder-hp
|
|
14424
|
+
- id: layer_34
|
|
14425
|
+
path: layer_34.safetensors
|
|
14426
|
+
neuronpedia: qwen3-14b/34-transcoder-hp
|
|
14427
|
+
- id: layer_35
|
|
14428
|
+
path: layer_35.safetensors
|
|
14429
|
+
neuronpedia: qwen3-14b/35-transcoder-hp
|
|
14430
|
+
- id: layer_36
|
|
14431
|
+
path: layer_36.safetensors
|
|
14432
|
+
neuronpedia: qwen3-14b/36-transcoder-hp
|
|
14433
|
+
- id: layer_37
|
|
14434
|
+
path: layer_37.safetensors
|
|
14435
|
+
neuronpedia: qwen3-14b/37-transcoder-hp
|
|
14436
|
+
- id: layer_38
|
|
14437
|
+
path: layer_38.safetensors
|
|
14438
|
+
neuronpedia: qwen3-14b/38-transcoder-hp
|
|
14439
|
+
- id: layer_39
|
|
14440
|
+
path: layer_39.safetensors
|
|
14441
|
+
neuronpedia: qwen3-14b/39-transcoder-hp
|
|
14442
|
+
|
|
14443
|
+
mwhanna-qwen3-14b-transcoders-lowl0:
|
|
14444
|
+
conversion_func: mwhanna_transcoder
|
|
14445
|
+
model: qwen3-14b
|
|
14446
|
+
repo_id: mwhanna/qwen3-14b-transcoders-lowl0
|
|
14447
|
+
saes:
|
|
14448
|
+
- id: layer_0
|
|
14449
|
+
path: layer_0.safetensors
|
|
14450
|
+
neuronpedia: qwen3-14b/0-transcoder-hp-lowl0
|
|
14451
|
+
- id: layer_1
|
|
14452
|
+
path: layer_1.safetensors
|
|
14453
|
+
neuronpedia: qwen3-14b/1-transcoder-hp-lowl0
|
|
14454
|
+
- id: layer_2
|
|
14455
|
+
path: layer_2.safetensors
|
|
14456
|
+
neuronpedia: qwen3-14b/2-transcoder-hp-lowl0
|
|
14457
|
+
- id: layer_3
|
|
14458
|
+
path: layer_3.safetensors
|
|
14459
|
+
neuronpedia: qwen3-14b/3-transcoder-hp-lowl0
|
|
14460
|
+
- id: layer_4
|
|
14461
|
+
path: layer_4.safetensors
|
|
14462
|
+
neuronpedia: qwen3-14b/4-transcoder-hp-lowl0
|
|
14463
|
+
- id: layer_5
|
|
14464
|
+
path: layer_5.safetensors
|
|
14465
|
+
neuronpedia: qwen3-14b/5-transcoder-hp-lowl0
|
|
14466
|
+
- id: layer_6
|
|
14467
|
+
path: layer_6.safetensors
|
|
14468
|
+
neuronpedia: qwen3-14b/6-transcoder-hp-lowl0
|
|
14469
|
+
- id: layer_7
|
|
14470
|
+
path: layer_7.safetensors
|
|
14471
|
+
neuronpedia: qwen3-14b/7-transcoder-hp-lowl0
|
|
14472
|
+
- id: layer_8
|
|
14473
|
+
path: layer_8.safetensors
|
|
14474
|
+
neuronpedia: qwen3-14b/8-transcoder-hp-lowl0
|
|
14475
|
+
- id: layer_9
|
|
14476
|
+
path: layer_9.safetensors
|
|
14477
|
+
neuronpedia: qwen3-14b/9-transcoder-hp-lowl0
|
|
14478
|
+
- id: layer_10
|
|
14479
|
+
path: layer_10.safetensors
|
|
14480
|
+
neuronpedia: qwen3-14b/10-transcoder-hp-lowl0
|
|
14481
|
+
- id: layer_11
|
|
14482
|
+
path: layer_11.safetensors
|
|
14483
|
+
neuronpedia: qwen3-14b/11-transcoder-hp-lowl0
|
|
14484
|
+
- id: layer_12
|
|
14485
|
+
path: layer_12.safetensors
|
|
14486
|
+
neuronpedia: qwen3-14b/12-transcoder-hp-lowl0
|
|
14487
|
+
- id: layer_13
|
|
14488
|
+
path: layer_13.safetensors
|
|
14489
|
+
neuronpedia: qwen3-14b/13-transcoder-hp-lowl0
|
|
14490
|
+
- id: layer_14
|
|
14491
|
+
path: layer_14.safetensors
|
|
14492
|
+
neuronpedia: qwen3-14b/14-transcoder-hp-lowl0
|
|
14493
|
+
- id: layer_15
|
|
14494
|
+
path: layer_15.safetensors
|
|
14495
|
+
neuronpedia: qwen3-14b/15-transcoder-hp-lowl0
|
|
14496
|
+
- id: layer_16
|
|
14497
|
+
path: layer_16.safetensors
|
|
14498
|
+
neuronpedia: qwen3-14b/16-transcoder-hp-lowl0
|
|
14499
|
+
- id: layer_17
|
|
14500
|
+
path: layer_17.safetensors
|
|
14501
|
+
neuronpedia: qwen3-14b/17-transcoder-hp-lowl0
|
|
14502
|
+
- id: layer_18
|
|
14503
|
+
path: layer_18.safetensors
|
|
14504
|
+
neuronpedia: qwen3-14b/18-transcoder-hp-lowl0
|
|
14505
|
+
- id: layer_19
|
|
14506
|
+
path: layer_19.safetensors
|
|
14507
|
+
neuronpedia: qwen3-14b/19-transcoder-hp-lowl0
|
|
14508
|
+
- id: layer_20
|
|
14509
|
+
path: layer_20.safetensors
|
|
14510
|
+
neuronpedia: qwen3-14b/20-transcoder-hp-lowl0
|
|
14511
|
+
- id: layer_21
|
|
14512
|
+
path: layer_21.safetensors
|
|
14513
|
+
neuronpedia: qwen3-14b/21-transcoder-hp-lowl0
|
|
14514
|
+
- id: layer_22
|
|
14515
|
+
path: layer_22.safetensors
|
|
14516
|
+
neuronpedia: qwen3-14b/22-transcoder-hp-lowl0
|
|
14517
|
+
- id: layer_23
|
|
14518
|
+
path: layer_23.safetensors
|
|
14519
|
+
neuronpedia: qwen3-14b/23-transcoder-hp-lowl0
|
|
14520
|
+
- id: layer_24
|
|
14521
|
+
path: layer_24.safetensors
|
|
14522
|
+
neuronpedia: qwen3-14b/24-transcoder-hp-lowl0
|
|
14523
|
+
- id: layer_25
|
|
14524
|
+
path: layer_25.safetensors
|
|
14525
|
+
neuronpedia: qwen3-14b/25-transcoder-hp-lowl0
|
|
14526
|
+
- id: layer_26
|
|
14527
|
+
path: layer_26.safetensors
|
|
14528
|
+
neuronpedia: qwen3-14b/26-transcoder-hp-lowl0
|
|
14529
|
+
- id: layer_27
|
|
14530
|
+
path: layer_27.safetensors
|
|
14531
|
+
neuronpedia: qwen3-14b/27-transcoder-hp-lowl0
|
|
14532
|
+
- id: layer_28
|
|
14533
|
+
path: layer_28.safetensors
|
|
14534
|
+
neuronpedia: qwen3-14b/28-transcoder-hp-lowl0
|
|
14535
|
+
- id: layer_29
|
|
14536
|
+
path: layer_29.safetensors
|
|
14537
|
+
neuronpedia: qwen3-14b/29-transcoder-hp-lowl0
|
|
14538
|
+
- id: layer_30
|
|
14539
|
+
path: layer_30.safetensors
|
|
14540
|
+
neuronpedia: qwen3-14b/30-transcoder-hp-lowl0
|
|
14541
|
+
- id: layer_31
|
|
14542
|
+
path: layer_31.safetensors
|
|
14543
|
+
neuronpedia: qwen3-14b/31-transcoder-hp-lowl0
|
|
14544
|
+
- id: layer_32
|
|
14545
|
+
path: layer_32.safetensors
|
|
14546
|
+
neuronpedia: qwen3-14b/32-transcoder-hp-lowl0
|
|
14547
|
+
- id: layer_33
|
|
14548
|
+
path: layer_33.safetensors
|
|
14549
|
+
neuronpedia: qwen3-14b/33-transcoder-hp-lowl0
|
|
14550
|
+
- id: layer_34
|
|
14551
|
+
path: layer_34.safetensors
|
|
14552
|
+
neuronpedia: qwen3-14b/34-transcoder-hp-lowl0
|
|
14553
|
+
- id: layer_35
|
|
14554
|
+
path: layer_35.safetensors
|
|
14555
|
+
neuronpedia: qwen3-14b/35-transcoder-hp-lowl0
|
|
14556
|
+
- id: layer_36
|
|
14557
|
+
path: layer_36.safetensors
|
|
14558
|
+
neuronpedia: qwen3-14b/36-transcoder-hp-lowl0
|
|
14559
|
+
- id: layer_37
|
|
14560
|
+
path: layer_37.safetensors
|
|
14561
|
+
neuronpedia: qwen3-14b/37-transcoder-hp-lowl0
|
|
14562
|
+
- id: layer_38
|
|
14563
|
+
path: layer_38.safetensors
|
|
14564
|
+
neuronpedia: qwen3-14b/38-transcoder-hp-lowl0
|
|
14565
|
+
- id: layer_39
|
|
14566
|
+
path: layer_39.safetensors
|
|
14567
|
+
neuronpedia: qwen3-14b/39-transcoder-hp-lowl0
|
|
14568
|
+
|
|
14569
|
+
mwhanna-qwen3-1.7b-transcoders-lowl0:
|
|
14570
|
+
conversion_func: mwhanna_transcoder
|
|
14571
|
+
model: qwen3-1.7b
|
|
14572
|
+
repo_id: mwhanna/qwen3-1.7b-transcoders-lowl0
|
|
14573
|
+
saes:
|
|
14574
|
+
- id: layer_0
|
|
14575
|
+
path: layer_0.safetensors
|
|
14576
|
+
neuronpedia: qwen3-1.7b/0-transcoder-hp-lowl0
|
|
14577
|
+
- id: layer_1
|
|
14578
|
+
path: layer_1.safetensors
|
|
14579
|
+
neuronpedia: qwen3-1.7b/1-transcoder-hp-lowl0
|
|
14580
|
+
- id: layer_2
|
|
14581
|
+
path: layer_2.safetensors
|
|
14582
|
+
neuronpedia: qwen3-1.7b/2-transcoder-hp-lowl0
|
|
14583
|
+
- id: layer_3
|
|
14584
|
+
path: layer_3.safetensors
|
|
14585
|
+
neuronpedia: qwen3-1.7b/3-transcoder-hp-lowl0
|
|
14586
|
+
- id: layer_4
|
|
14587
|
+
path: layer_4.safetensors
|
|
14588
|
+
neuronpedia: qwen3-1.7b/4-transcoder-hp-lowl0
|
|
14589
|
+
- id: layer_5
|
|
14590
|
+
path: layer_5.safetensors
|
|
14591
|
+
neuronpedia: qwen3-1.7b/5-transcoder-hp-lowl0
|
|
14592
|
+
- id: layer_6
|
|
14593
|
+
path: layer_6.safetensors
|
|
14594
|
+
neuronpedia: qwen3-1.7b/6-transcoder-hp-lowl0
|
|
14595
|
+
- id: layer_7
|
|
14596
|
+
path: layer_7.safetensors
|
|
14597
|
+
neuronpedia: qwen3-1.7b/7-transcoder-hp-lowl0
|
|
14598
|
+
- id: layer_8
|
|
14599
|
+
path: layer_8.safetensors
|
|
14600
|
+
neuronpedia: qwen3-1.7b/8-transcoder-hp-lowl0
|
|
14601
|
+
- id: layer_9
|
|
14602
|
+
path: layer_9.safetensors
|
|
14603
|
+
neuronpedia: qwen3-1.7b/9-transcoder-hp-lowl0
|
|
14604
|
+
- id: layer_10
|
|
14605
|
+
path: layer_10.safetensors
|
|
14606
|
+
neuronpedia: qwen3-1.7b/10-transcoder-hp-lowl0
|
|
14607
|
+
- id: layer_11
|
|
14608
|
+
path: layer_11.safetensors
|
|
14609
|
+
neuronpedia: qwen3-1.7b/11-transcoder-hp-lowl0
|
|
14610
|
+
- id: layer_12
|
|
14611
|
+
path: layer_12.safetensors
|
|
14612
|
+
neuronpedia: qwen3-1.7b/12-transcoder-hp-lowl0
|
|
14613
|
+
- id: layer_13
|
|
14614
|
+
path: layer_13.safetensors
|
|
14615
|
+
neuronpedia: qwen3-1.7b/13-transcoder-hp-lowl0
|
|
14616
|
+
- id: layer_14
|
|
14617
|
+
path: layer_14.safetensors
|
|
14618
|
+
neuronpedia: qwen3-1.7b/14-transcoder-hp-lowl0
|
|
14619
|
+
- id: layer_15
|
|
14620
|
+
path: layer_15.safetensors
|
|
14621
|
+
neuronpedia: qwen3-1.7b/15-transcoder-hp-lowl0
|
|
14622
|
+
- id: layer_16
|
|
14623
|
+
path: layer_16.safetensors
|
|
14624
|
+
neuronpedia: qwen3-1.7b/16-transcoder-hp-lowl0
|
|
14625
|
+
- id: layer_17
|
|
14626
|
+
path: layer_17.safetensors
|
|
14627
|
+
neuronpedia: qwen3-1.7b/17-transcoder-hp-lowl0
|
|
14628
|
+
- id: layer_18
|
|
14629
|
+
path: layer_18.safetensors
|
|
14630
|
+
neuronpedia: qwen3-1.7b/18-transcoder-hp-lowl0
|
|
14631
|
+
- id: layer_19
|
|
14632
|
+
path: layer_19.safetensors
|
|
14633
|
+
neuronpedia: qwen3-1.7b/19-transcoder-hp-lowl0
|
|
14634
|
+
- id: layer_20
|
|
14635
|
+
path: layer_20.safetensors
|
|
14636
|
+
neuronpedia: qwen3-1.7b/20-transcoder-hp-lowl0
|
|
14637
|
+
- id: layer_21
|
|
14638
|
+
path: layer_21.safetensors
|
|
14639
|
+
neuronpedia: qwen3-1.7b/21-transcoder-hp-lowl0
|
|
14640
|
+
- id: layer_22
|
|
14641
|
+
path: layer_22.safetensors
|
|
14642
|
+
neuronpedia: qwen3-1.7b/22-transcoder-hp-lowl0
|
|
14643
|
+
- id: layer_23
|
|
14644
|
+
path: layer_23.safetensors
|
|
14645
|
+
neuronpedia: qwen3-1.7b/23-transcoder-hp-lowl0
|
|
14646
|
+
- id: layer_24
|
|
14647
|
+
path: layer_24.safetensors
|
|
14648
|
+
neuronpedia: qwen3-1.7b/24-transcoder-hp-lowl0
|
|
14649
|
+
- id: layer_25
|
|
14650
|
+
path: layer_25.safetensors
|
|
14651
|
+
neuronpedia: qwen3-1.7b/25-transcoder-hp-lowl0
|
|
14652
|
+
- id: layer_26
|
|
14653
|
+
path: layer_26.safetensors
|
|
14654
|
+
neuronpedia: qwen3-1.7b/26-transcoder-hp-lowl0
|
|
14655
|
+
- id: layer_27
|
|
14656
|
+
path: layer_27.safetensors
|
|
14657
|
+
neuronpedia: qwen3-1.7b/27-transcoder-hp-lowl0
|
|
14658
|
+
|
|
14659
|
+
mwhanna-qwen3-0.6b-transcoders-lowl0:
|
|
14660
|
+
conversion_func: mwhanna_transcoder
|
|
14661
|
+
model: qwen3-0.6b
|
|
14662
|
+
repo_id: mwhanna/qwen3-0.6b-transcoders-lowl0
|
|
14663
|
+
saes:
|
|
14664
|
+
- id: layer_0
|
|
14665
|
+
path: layer_0.safetensors
|
|
14666
|
+
neuronpedia: qwen3-0.6b/0-transcoder-hp-lowl0
|
|
14667
|
+
- id: layer_1
|
|
14668
|
+
path: layer_1.safetensors
|
|
14669
|
+
neuronpedia: qwen3-0.6b/1-transcoder-hp-lowl0
|
|
14670
|
+
- id: layer_2
|
|
14671
|
+
path: layer_2.safetensors
|
|
14672
|
+
neuronpedia: qwen3-0.6b/2-transcoder-hp-lowl0
|
|
14673
|
+
- id: layer_3
|
|
14674
|
+
path: layer_3.safetensors
|
|
14675
|
+
neuronpedia: qwen3-0.6b/3-transcoder-hp-lowl0
|
|
14676
|
+
- id: layer_4
|
|
14677
|
+
path: layer_4.safetensors
|
|
14678
|
+
neuronpedia: qwen3-0.6b/4-transcoder-hp-lowl0
|
|
14679
|
+
- id: layer_5
|
|
14680
|
+
path: layer_5.safetensors
|
|
14681
|
+
neuronpedia: qwen3-0.6b/5-transcoder-hp-lowl0
|
|
14682
|
+
- id: layer_6
|
|
14683
|
+
path: layer_6.safetensors
|
|
14684
|
+
neuronpedia: qwen3-0.6b/6-transcoder-hp-lowl0
|
|
14685
|
+
- id: layer_7
|
|
14686
|
+
path: layer_7.safetensors
|
|
14687
|
+
neuronpedia: qwen3-0.6b/7-transcoder-hp-lowl0
|
|
14688
|
+
- id: layer_8
|
|
14689
|
+
path: layer_8.safetensors
|
|
14690
|
+
neuronpedia: qwen3-0.6b/8-transcoder-hp-lowl0
|
|
14691
|
+
- id: layer_9
|
|
14692
|
+
path: layer_9.safetensors
|
|
14693
|
+
neuronpedia: qwen3-0.6b/9-transcoder-hp-lowl0
|
|
14694
|
+
- id: layer_10
|
|
14695
|
+
path: layer_10.safetensors
|
|
14696
|
+
neuronpedia: qwen3-0.6b/10-transcoder-hp-lowl0
|
|
14697
|
+
- id: layer_11
|
|
14698
|
+
path: layer_11.safetensors
|
|
14699
|
+
neuronpedia: qwen3-0.6b/11-transcoder-hp-lowl0
|
|
14700
|
+
- id: layer_12
|
|
14701
|
+
path: layer_12.safetensors
|
|
14702
|
+
neuronpedia: qwen3-0.6b/12-transcoder-hp-lowl0
|
|
14703
|
+
- id: layer_13
|
|
14704
|
+
path: layer_13.safetensors
|
|
14705
|
+
neuronpedia: qwen3-0.6b/13-transcoder-hp-lowl0
|
|
14706
|
+
- id: layer_14
|
|
14707
|
+
path: layer_14.safetensors
|
|
14708
|
+
neuronpedia: qwen3-0.6b/14-transcoder-hp-lowl0
|
|
14709
|
+
- id: layer_15
|
|
14710
|
+
path: layer_15.safetensors
|
|
14711
|
+
neuronpedia: qwen3-0.6b/15-transcoder-hp-lowl0
|
|
14712
|
+
- id: layer_16
|
|
14713
|
+
path: layer_16.safetensors
|
|
14714
|
+
neuronpedia: qwen3-0.6b/16-transcoder-hp-lowl0
|
|
14715
|
+
- id: layer_17
|
|
14716
|
+
path: layer_17.safetensors
|
|
14717
|
+
neuronpedia: qwen3-0.6b/17-transcoder-hp-lowl0
|
|
14718
|
+
- id: layer_18
|
|
14719
|
+
path: layer_18.safetensors
|
|
14720
|
+
neuronpedia: qwen3-0.6b/18-transcoder-hp-lowl0
|
|
14721
|
+
- id: layer_19
|
|
14722
|
+
path: layer_19.safetensors
|
|
14723
|
+
neuronpedia: qwen3-0.6b/19-transcoder-hp-lowl0
|
|
14724
|
+
- id: layer_20
|
|
14725
|
+
path: layer_20.safetensors
|
|
14726
|
+
neuronpedia: qwen3-0.6b/20-transcoder-hp-lowl0
|
|
14727
|
+
- id: layer_21
|
|
14728
|
+
path: layer_21.safetensors
|
|
14729
|
+
neuronpedia: qwen3-0.6b/21-transcoder-hp-lowl0
|
|
14730
|
+
- id: layer_22
|
|
14731
|
+
path: layer_22.safetensors
|
|
14732
|
+
neuronpedia: qwen3-0.6b/22-transcoder-hp-lowl0
|
|
14733
|
+
- id: layer_23
|
|
14734
|
+
path: layer_23.safetensors
|
|
14735
|
+
neuronpedia: qwen3-0.6b/23-transcoder-hp-lowl0
|
|
14736
|
+
- id: layer_24
|
|
14737
|
+
path: layer_24.safetensors
|
|
14738
|
+
neuronpedia: qwen3-0.6b/24-transcoder-hp-lowl0
|
|
14739
|
+
- id: layer_25
|
|
14740
|
+
path: layer_25.safetensors
|
|
14741
|
+
neuronpedia: qwen3-0.6b/25-transcoder-hp-lowl0
|
|
14742
|
+
- id: layer_26
|
|
14743
|
+
path: layer_26.safetensors
|
|
14744
|
+
neuronpedia: qwen3-0.6b/26-transcoder-hp-lowl0
|
|
14745
|
+
- id: layer_27
|
|
14746
|
+
path: layer_27.safetensors
|
|
14747
|
+
neuronpedia: qwen3-0.6b/27-transcoder-hp-lowl0
|
|
@@ -289,7 +289,7 @@ class ActivationsStore:
|
|
|
289
289
|
"Dataset must have a 'tokens', 'input_ids', 'text', or 'problem' column."
|
|
290
290
|
)
|
|
291
291
|
if self.is_dataset_tokenized:
|
|
292
|
-
ds_context_size = len(dataset_sample[self.tokens_column])
|
|
292
|
+
ds_context_size = len(dataset_sample[self.tokens_column]) # type: ignore
|
|
293
293
|
if ds_context_size < self.context_size:
|
|
294
294
|
raise ValueError(
|
|
295
295
|
f"""pretokenized dataset has context_size {ds_context_size}, but the provided context_size is {self.context_size}.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|