sae-lens 6.5.3__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sae_lens/__init__.py +1 -1
- sae_lens/analysis/hooked_sae_transformer.py +13 -0
- sae_lens/loading/pretrained_sae_loaders.py +79 -0
- sae_lens/pretrained_saes.yaml +662 -1
- {sae_lens-6.5.3.dist-info → sae_lens-6.6.0.dist-info}/METADATA +2 -2
- {sae_lens-6.5.3.dist-info → sae_lens-6.6.0.dist-info}/RECORD +8 -8
- {sae_lens-6.5.3.dist-info → sae_lens-6.6.0.dist-info}/LICENSE +0 -0
- {sae_lens-6.5.3.dist-info → sae_lens-6.6.0.dist-info}/WHEEL +0 -0
sae_lens/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Any, Callable
|
|
|
5
5
|
import torch
|
|
6
6
|
from jaxtyping import Float
|
|
7
7
|
from transformer_lens.ActivationCache import ActivationCache
|
|
8
|
+
from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
|
|
8
9
|
from transformer_lens.hook_points import HookPoint # Hooking utilities
|
|
9
10
|
from transformer_lens.HookedTransformer import HookedTransformer
|
|
10
11
|
|
|
@@ -50,6 +51,13 @@ def set_deep_attr(obj: Any, path: str, value: Any):
|
|
|
50
51
|
setattr(obj, parts[-1], value)
|
|
51
52
|
|
|
52
53
|
|
|
54
|
+
def add_hook_in_to_mlp(mlp: CanBeUsedAsMLP):
|
|
55
|
+
# Temporary hack to add a `mlp.hook_in` hook to mimic what's in circuit-tracer
|
|
56
|
+
mlp.hook_in = HookPoint()
|
|
57
|
+
original_forward = mlp.forward
|
|
58
|
+
mlp.forward = lambda x: original_forward(mlp.hook_in(x)) # type: ignore
|
|
59
|
+
|
|
60
|
+
|
|
53
61
|
class HookedSAETransformer(HookedTransformer):
|
|
54
62
|
def __init__(
|
|
55
63
|
self,
|
|
@@ -66,6 +74,11 @@ class HookedSAETransformer(HookedTransformer):
|
|
|
66
74
|
**model_kwargs: Keyword arguments for HookedTransformer initialization
|
|
67
75
|
"""
|
|
68
76
|
super().__init__(*model_args, **model_kwargs)
|
|
77
|
+
|
|
78
|
+
for block in self.blocks:
|
|
79
|
+
add_hook_in_to_mlp(block.mlp) # type: ignore
|
|
80
|
+
self.setup()
|
|
81
|
+
|
|
69
82
|
self.acts_to_saes: dict[str, SAE] = {} # type: ignore
|
|
70
83
|
|
|
71
84
|
def add_sae(self, sae: SAE[Any], use_error_term: bool | None = None):
|
|
@@ -5,6 +5,7 @@ from typing import Any, Protocol
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import torch
|
|
8
|
+
import yaml
|
|
8
9
|
from huggingface_hub import hf_hub_download
|
|
9
10
|
from huggingface_hub.utils import EntryNotFoundError
|
|
10
11
|
from packaging.version import Version
|
|
@@ -1232,6 +1233,82 @@ def gemma_2_transcoder_huggingface_loader(
|
|
|
1232
1233
|
return cfg_dict, state_dict, None
|
|
1233
1234
|
|
|
1234
1235
|
|
|
1236
|
+
def get_mwhanna_transcoder_config_from_hf(
|
|
1237
|
+
repo_id: str,
|
|
1238
|
+
folder_name: str,
|
|
1239
|
+
device: str | None = None,
|
|
1240
|
+
force_download: bool = False, # noqa: ARG001
|
|
1241
|
+
cfg_overrides: dict[str, Any] | None = None,
|
|
1242
|
+
) -> dict[str, Any]:
|
|
1243
|
+
"""Get config for mwhanna transcoders"""
|
|
1244
|
+
|
|
1245
|
+
# Extract layer from folder name
|
|
1246
|
+
layer = int(folder_name.replace(".safetensors", "").split("_")[-1])
|
|
1247
|
+
|
|
1248
|
+
wandb_config_path = hf_hub_download(
|
|
1249
|
+
repo_id, "wanb-config.yaml", force_download=force_download
|
|
1250
|
+
)
|
|
1251
|
+
base_config_path = hf_hub_download(
|
|
1252
|
+
repo_id, "config.yaml", force_download=force_download
|
|
1253
|
+
)
|
|
1254
|
+
with open(base_config_path) as f:
|
|
1255
|
+
base_cfg_info: dict[str, Any] = yaml.safe_load(f)
|
|
1256
|
+
with open(wandb_config_path) as f:
|
|
1257
|
+
wandb_cfg_info: dict[str, Any] = yaml.safe_load(f)
|
|
1258
|
+
|
|
1259
|
+
return {
|
|
1260
|
+
"architecture": "transcoder",
|
|
1261
|
+
"d_in": wandb_cfg_info["d_model"]["value"],
|
|
1262
|
+
"d_out": wandb_cfg_info["d_model"]["value"],
|
|
1263
|
+
"d_sae": wandb_cfg_info["d_feature"]["value"],
|
|
1264
|
+
"dtype": "float32",
|
|
1265
|
+
"device": device if device is not None else "cpu",
|
|
1266
|
+
"activation_fn": "relu",
|
|
1267
|
+
"normalize_activations": "none",
|
|
1268
|
+
"model_name": base_cfg_info["model_name"],
|
|
1269
|
+
"hook_name": f"blocks.{layer}.mlp.hook_in",
|
|
1270
|
+
"hook_name_out": f"blocks.{layer}.hook_mlp_out",
|
|
1271
|
+
"dataset_path": "monology/pile-uncopyrighted",
|
|
1272
|
+
"context_size": wandb_cfg_info["batch_size"]["value"],
|
|
1273
|
+
"apply_b_dec_to_input": False,
|
|
1274
|
+
"model_from_pretrained_kwargs": {"fold_ln": False},
|
|
1275
|
+
**(cfg_overrides or {}),
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
def mwhanna_transcoder_huggingface_loader(
|
|
1280
|
+
repo_id: str,
|
|
1281
|
+
folder_name: str,
|
|
1282
|
+
device: str = "cpu",
|
|
1283
|
+
force_download: bool = False,
|
|
1284
|
+
cfg_overrides: dict[str, Any] | None = None,
|
|
1285
|
+
) -> tuple[dict[str, Any], dict[str, torch.Tensor], torch.Tensor | None]:
|
|
1286
|
+
"""Load mwhanna transcoders from HuggingFace"""
|
|
1287
|
+
cfg_dict = get_mwhanna_transcoder_config_from_hf(
|
|
1288
|
+
repo_id,
|
|
1289
|
+
folder_name,
|
|
1290
|
+
device,
|
|
1291
|
+
force_download,
|
|
1292
|
+
cfg_overrides,
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
# Download the safetensors file
|
|
1296
|
+
revision = cfg_overrides.get("revision", None) if cfg_overrides else None
|
|
1297
|
+
|
|
1298
|
+
file_path = hf_hub_download(
|
|
1299
|
+
repo_id=repo_id,
|
|
1300
|
+
filename=folder_name,
|
|
1301
|
+
force_download=force_download,
|
|
1302
|
+
revision=revision,
|
|
1303
|
+
)
|
|
1304
|
+
|
|
1305
|
+
# Load weights from safetensors
|
|
1306
|
+
state_dict = load_file(file_path, device=device)
|
|
1307
|
+
state_dict["W_enc"] = state_dict["W_enc"].T
|
|
1308
|
+
|
|
1309
|
+
return cfg_dict, state_dict, None
|
|
1310
|
+
|
|
1311
|
+
|
|
1235
1312
|
NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
|
|
1236
1313
|
"sae_lens": sae_lens_huggingface_loader,
|
|
1237
1314
|
"connor_rob_hook_z": connor_rob_hook_z_huggingface_loader,
|
|
@@ -1242,6 +1319,7 @@ NAMED_PRETRAINED_SAE_LOADERS: dict[str, PretrainedSaeHuggingfaceLoader] = {
|
|
|
1242
1319
|
"deepseek_r1": deepseek_r1_sae_huggingface_loader,
|
|
1243
1320
|
"sparsify": sparsify_huggingface_loader,
|
|
1244
1321
|
"gemma_2_transcoder": gemma_2_transcoder_huggingface_loader,
|
|
1322
|
+
"mwhanna_transcoder": mwhanna_transcoder_huggingface_loader,
|
|
1245
1323
|
}
|
|
1246
1324
|
|
|
1247
1325
|
|
|
@@ -1255,4 +1333,5 @@ NAMED_PRETRAINED_SAE_CONFIG_GETTERS: dict[str, PretrainedSaeConfigHuggingfaceLoa
|
|
|
1255
1333
|
"deepseek_r1": get_deepseek_r1_config_from_hf,
|
|
1256
1334
|
"sparsify": get_sparsify_config_from_hf,
|
|
1257
1335
|
"gemma_2_transcoder": get_gemma_2_transcoder_config_from_hf,
|
|
1336
|
+
"mwhanna_transcoder": get_mwhanna_transcoder_config_from_hf,
|
|
1258
1337
|
}
|
sae_lens/pretrained_saes.yaml
CHANGED
|
@@ -14083,4 +14083,665 @@ gemma-scope-2b-pt-transcoders:
|
|
|
14083
14083
|
- id: layer_25/width_16k/average_l0_41
|
|
14084
14084
|
neuronpedia: gemma-2-2b/25-gemmascope-transcoder-16k
|
|
14085
14085
|
l0: 41
|
|
14086
|
-
path: layer_25/width_16k/average_l0_41
|
|
14086
|
+
path: layer_25/width_16k/average_l0_41
|
|
14087
|
+
|
|
14088
|
+
|
|
14089
|
+
mwhanna-qwen3-4b-transcoders:
|
|
14090
|
+
conversion_func: mwhanna_transcoder
|
|
14091
|
+
model: qwen3-4b
|
|
14092
|
+
repo_id: mwhanna/qwen3-4b-transcoders
|
|
14093
|
+
saes:
|
|
14094
|
+
- id: layer_0
|
|
14095
|
+
path: layer_0.safetensors
|
|
14096
|
+
neuronpedia: qwen3-4b/0-transcoder-hp
|
|
14097
|
+
- id: layer_1
|
|
14098
|
+
path: layer_1.safetensors
|
|
14099
|
+
neuronpedia: qwen3-4b/1-transcoder-hp
|
|
14100
|
+
- id: layer_2
|
|
14101
|
+
path: layer_2.safetensors
|
|
14102
|
+
neuronpedia: qwen3-4b/2-transcoder-hp
|
|
14103
|
+
- id: layer_3
|
|
14104
|
+
path: layer_3.safetensors
|
|
14105
|
+
neuronpedia: qwen3-4b/3-transcoder-hp
|
|
14106
|
+
- id: layer_4
|
|
14107
|
+
path: layer_4.safetensors
|
|
14108
|
+
neuronpedia: qwen3-4b/4-transcoder-hp
|
|
14109
|
+
- id: layer_5
|
|
14110
|
+
path: layer_5.safetensors
|
|
14111
|
+
neuronpedia: qwen3-4b/5-transcoder-hp
|
|
14112
|
+
- id: layer_6
|
|
14113
|
+
path: layer_6.safetensors
|
|
14114
|
+
neuronpedia: qwen3-4b/6-transcoder-hp
|
|
14115
|
+
- id: layer_7
|
|
14116
|
+
path: layer_7.safetensors
|
|
14117
|
+
neuronpedia: qwen3-4b/7-transcoder-hp
|
|
14118
|
+
- id: layer_8
|
|
14119
|
+
path: layer_8.safetensors
|
|
14120
|
+
neuronpedia: qwen3-4b/8-transcoder-hp
|
|
14121
|
+
- id: layer_9
|
|
14122
|
+
path: layer_9.safetensors
|
|
14123
|
+
neuronpedia: qwen3-4b/9-transcoder-hp
|
|
14124
|
+
- id: layer_10
|
|
14125
|
+
path: layer_10.safetensors
|
|
14126
|
+
neuronpedia: qwen3-4b/10-transcoder-hp
|
|
14127
|
+
- id: layer_11
|
|
14128
|
+
path: layer_11.safetensors
|
|
14129
|
+
neuronpedia: qwen3-4b/11-transcoder-hp
|
|
14130
|
+
- id: layer_12
|
|
14131
|
+
path: layer_12.safetensors
|
|
14132
|
+
neuronpedia: qwen3-4b/12-transcoder-hp
|
|
14133
|
+
- id: layer_13
|
|
14134
|
+
path: layer_13.safetensors
|
|
14135
|
+
neuronpedia: qwen3-4b/13-transcoder-hp
|
|
14136
|
+
- id: layer_14
|
|
14137
|
+
path: layer_14.safetensors
|
|
14138
|
+
neuronpedia: qwen3-4b/14-transcoder-hp
|
|
14139
|
+
- id: layer_15
|
|
14140
|
+
path: layer_15.safetensors
|
|
14141
|
+
neuronpedia: qwen3-4b/15-transcoder-hp
|
|
14142
|
+
- id: layer_16
|
|
14143
|
+
path: layer_16.safetensors
|
|
14144
|
+
neuronpedia: qwen3-4b/16-transcoder-hp
|
|
14145
|
+
- id: layer_17
|
|
14146
|
+
path: layer_17.safetensors
|
|
14147
|
+
neuronpedia: qwen3-4b/17-transcoder-hp
|
|
14148
|
+
- id: layer_18
|
|
14149
|
+
path: layer_18.safetensors
|
|
14150
|
+
neuronpedia: qwen3-4b/18-transcoder-hp
|
|
14151
|
+
- id: layer_19
|
|
14152
|
+
path: layer_19.safetensors
|
|
14153
|
+
neuronpedia: qwen3-4b/19-transcoder-hp
|
|
14154
|
+
- id: layer_20
|
|
14155
|
+
path: layer_20.safetensors
|
|
14156
|
+
neuronpedia: qwen3-4b/20-transcoder-hp
|
|
14157
|
+
- id: layer_21
|
|
14158
|
+
path: layer_21.safetensors
|
|
14159
|
+
neuronpedia: qwen3-4b/21-transcoder-hp
|
|
14160
|
+
- id: layer_22
|
|
14161
|
+
path: layer_22.safetensors
|
|
14162
|
+
neuronpedia: qwen3-4b/22-transcoder-hp
|
|
14163
|
+
- id: layer_23
|
|
14164
|
+
path: layer_23.safetensors
|
|
14165
|
+
neuronpedia: qwen3-4b/23-transcoder-hp
|
|
14166
|
+
- id: layer_24
|
|
14167
|
+
path: layer_24.safetensors
|
|
14168
|
+
neuronpedia: qwen3-4b/24-transcoder-hp
|
|
14169
|
+
- id: layer_25
|
|
14170
|
+
path: layer_25.safetensors
|
|
14171
|
+
neuronpedia: qwen3-4b/25-transcoder-hp
|
|
14172
|
+
- id: layer_26
|
|
14173
|
+
path: layer_26.safetensors
|
|
14174
|
+
neuronpedia: qwen3-4b/26-transcoder-hp
|
|
14175
|
+
- id: layer_27
|
|
14176
|
+
path: layer_27.safetensors
|
|
14177
|
+
neuronpedia: qwen3-4b/27-transcoder-hp
|
|
14178
|
+
- id: layer_28
|
|
14179
|
+
path: layer_28.safetensors
|
|
14180
|
+
neuronpedia: qwen3-4b/28-transcoder-hp
|
|
14181
|
+
- id: layer_29
|
|
14182
|
+
path: layer_29.safetensors
|
|
14183
|
+
neuronpedia: qwen3-4b/29-transcoder-hp
|
|
14184
|
+
- id: layer_30
|
|
14185
|
+
path: layer_30.safetensors
|
|
14186
|
+
neuronpedia: qwen3-4b/30-transcoder-hp
|
|
14187
|
+
- id: layer_31
|
|
14188
|
+
path: layer_31.safetensors
|
|
14189
|
+
neuronpedia: qwen3-4b/31-transcoder-hp
|
|
14190
|
+
- id: layer_32
|
|
14191
|
+
path: layer_32.safetensors
|
|
14192
|
+
neuronpedia: qwen3-4b/32-transcoder-hp
|
|
14193
|
+
- id: layer_33
|
|
14194
|
+
path: layer_33.safetensors
|
|
14195
|
+
neuronpedia: qwen3-4b/33-transcoder-hp
|
|
14196
|
+
- id: layer_34
|
|
14197
|
+
path: layer_34.safetensors
|
|
14198
|
+
neuronpedia: qwen3-4b/34-transcoder-hp
|
|
14199
|
+
- id: layer_35
|
|
14200
|
+
path: layer_35.safetensors
|
|
14201
|
+
neuronpedia: qwen3-4b/35-transcoder-hp
|
|
14202
|
+
|
|
14203
|
+
mwhanna-qwen3-8b-transcoders:
|
|
14204
|
+
conversion_func: mwhanna_transcoder
|
|
14205
|
+
model: qwen3-8b
|
|
14206
|
+
repo_id: mwhanna/qwen3-8b-transcoders
|
|
14207
|
+
saes:
|
|
14208
|
+
- id: layer_0
|
|
14209
|
+
path: layer_0.safetensors
|
|
14210
|
+
neuronpedia: qwen3-8b/0-transcoder-hp
|
|
14211
|
+
- id: layer_1
|
|
14212
|
+
path: layer_1.safetensors
|
|
14213
|
+
neuronpedia: qwen3-8b/1-transcoder-hp
|
|
14214
|
+
- id: layer_2
|
|
14215
|
+
path: layer_2.safetensors
|
|
14216
|
+
neuronpedia: qwen3-8b/2-transcoder-hp
|
|
14217
|
+
- id: layer_3
|
|
14218
|
+
path: layer_3.safetensors
|
|
14219
|
+
neuronpedia: qwen3-8b/3-transcoder-hp
|
|
14220
|
+
- id: layer_4
|
|
14221
|
+
path: layer_4.safetensors
|
|
14222
|
+
neuronpedia: qwen3-8b/4-transcoder-hp
|
|
14223
|
+
- id: layer_5
|
|
14224
|
+
path: layer_5.safetensors
|
|
14225
|
+
neuronpedia: qwen3-8b/5-transcoder-hp
|
|
14226
|
+
- id: layer_6
|
|
14227
|
+
path: layer_6.safetensors
|
|
14228
|
+
neuronpedia: qwen3-8b/6-transcoder-hp
|
|
14229
|
+
- id: layer_7
|
|
14230
|
+
path: layer_7.safetensors
|
|
14231
|
+
neuronpedia: qwen3-8b/7-transcoder-hp
|
|
14232
|
+
- id: layer_8
|
|
14233
|
+
path: layer_8.safetensors
|
|
14234
|
+
neuronpedia: qwen3-8b/8-transcoder-hp
|
|
14235
|
+
- id: layer_9
|
|
14236
|
+
path: layer_9.safetensors
|
|
14237
|
+
neuronpedia: qwen3-8b/9-transcoder-hp
|
|
14238
|
+
- id: layer_10
|
|
14239
|
+
path: layer_10.safetensors
|
|
14240
|
+
neuronpedia: qwen3-8b/10-transcoder-hp
|
|
14241
|
+
- id: layer_11
|
|
14242
|
+
path: layer_11.safetensors
|
|
14243
|
+
neuronpedia: qwen3-8b/11-transcoder-hp
|
|
14244
|
+
- id: layer_12
|
|
14245
|
+
path: layer_12.safetensors
|
|
14246
|
+
neuronpedia: qwen3-8b/12-transcoder-hp
|
|
14247
|
+
- id: layer_13
|
|
14248
|
+
path: layer_13.safetensors
|
|
14249
|
+
neuronpedia: qwen3-8b/13-transcoder-hp
|
|
14250
|
+
- id: layer_14
|
|
14251
|
+
path: layer_14.safetensors
|
|
14252
|
+
neuronpedia: qwen3-8b/14-transcoder-hp
|
|
14253
|
+
- id: layer_15
|
|
14254
|
+
path: layer_15.safetensors
|
|
14255
|
+
neuronpedia: qwen3-8b/15-transcoder-hp
|
|
14256
|
+
- id: layer_16
|
|
14257
|
+
path: layer_16.safetensors
|
|
14258
|
+
neuronpedia: qwen3-8b/16-transcoder-hp
|
|
14259
|
+
- id: layer_17
|
|
14260
|
+
path: layer_17.safetensors
|
|
14261
|
+
neuronpedia: qwen3-8b/17-transcoder-hp
|
|
14262
|
+
- id: layer_18
|
|
14263
|
+
path: layer_18.safetensors
|
|
14264
|
+
neuronpedia: qwen3-8b/18-transcoder-hp
|
|
14265
|
+
- id: layer_19
|
|
14266
|
+
path: layer_19.safetensors
|
|
14267
|
+
neuronpedia: qwen3-8b/19-transcoder-hp
|
|
14268
|
+
- id: layer_20
|
|
14269
|
+
path: layer_20.safetensors
|
|
14270
|
+
neuronpedia: qwen3-8b/20-transcoder-hp
|
|
14271
|
+
- id: layer_21
|
|
14272
|
+
path: layer_21.safetensors
|
|
14273
|
+
neuronpedia: qwen3-8b/21-transcoder-hp
|
|
14274
|
+
- id: layer_22
|
|
14275
|
+
path: layer_22.safetensors
|
|
14276
|
+
neuronpedia: qwen3-8b/22-transcoder-hp
|
|
14277
|
+
- id: layer_23
|
|
14278
|
+
path: layer_23.safetensors
|
|
14279
|
+
neuronpedia: qwen3-8b/23-transcoder-hp
|
|
14280
|
+
- id: layer_24
|
|
14281
|
+
path: layer_24.safetensors
|
|
14282
|
+
neuronpedia: qwen3-8b/24-transcoder-hp
|
|
14283
|
+
- id: layer_25
|
|
14284
|
+
path: layer_25.safetensors
|
|
14285
|
+
neuronpedia: qwen3-8b/25-transcoder-hp
|
|
14286
|
+
- id: layer_26
|
|
14287
|
+
path: layer_26.safetensors
|
|
14288
|
+
neuronpedia: qwen3-8b/26-transcoder-hp
|
|
14289
|
+
- id: layer_27
|
|
14290
|
+
path: layer_27.safetensors
|
|
14291
|
+
neuronpedia: qwen3-8b/27-transcoder-hp
|
|
14292
|
+
- id: layer_28
|
|
14293
|
+
path: layer_28.safetensors
|
|
14294
|
+
neuronpedia: qwen3-8b/28-transcoder-hp
|
|
14295
|
+
- id: layer_29
|
|
14296
|
+
path: layer_29.safetensors
|
|
14297
|
+
neuronpedia: qwen3-8b/29-transcoder-hp
|
|
14298
|
+
- id: layer_30
|
|
14299
|
+
path: layer_30.safetensors
|
|
14300
|
+
neuronpedia: qwen3-8b/30-transcoder-hp
|
|
14301
|
+
- id: layer_31
|
|
14302
|
+
path: layer_31.safetensors
|
|
14303
|
+
neuronpedia: qwen3-8b/31-transcoder-hp
|
|
14304
|
+
- id: layer_32
|
|
14305
|
+
path: layer_32.safetensors
|
|
14306
|
+
neuronpedia: qwen3-8b/32-transcoder-hp
|
|
14307
|
+
- id: layer_33
|
|
14308
|
+
path: layer_33.safetensors
|
|
14309
|
+
neuronpedia: qwen3-8b/33-transcoder-hp
|
|
14310
|
+
- id: layer_34
|
|
14311
|
+
path: layer_34.safetensors
|
|
14312
|
+
neuronpedia: qwen3-8b/34-transcoder-hp
|
|
14313
|
+
- id: layer_35
|
|
14314
|
+
path: layer_35.safetensors
|
|
14315
|
+
neuronpedia: qwen3-8b/35-transcoder-hp
|
|
14316
|
+
|
|
14317
|
+
mwhanna-qwen3-14b-transcoders:
|
|
14318
|
+
conversion_func: mwhanna_transcoder
|
|
14319
|
+
model: qwen3-14b
|
|
14320
|
+
repo_id: mwhanna/qwen3-14b-transcoders
|
|
14321
|
+
saes:
|
|
14322
|
+
- id: layer_0
|
|
14323
|
+
path: layer_0.safetensors
|
|
14324
|
+
neuronpedia: qwen3-14b/0-transcoder-hp
|
|
14325
|
+
- id: layer_1
|
|
14326
|
+
path: layer_1.safetensors
|
|
14327
|
+
neuronpedia: qwen3-14b/1-transcoder-hp
|
|
14328
|
+
- id: layer_2
|
|
14329
|
+
path: layer_2.safetensors
|
|
14330
|
+
neuronpedia: qwen3-14b/2-transcoder-hp
|
|
14331
|
+
- id: layer_3
|
|
14332
|
+
path: layer_3.safetensors
|
|
14333
|
+
neuronpedia: qwen3-14b/3-transcoder-hp
|
|
14334
|
+
- id: layer_4
|
|
14335
|
+
path: layer_4.safetensors
|
|
14336
|
+
neuronpedia: qwen3-14b/4-transcoder-hp
|
|
14337
|
+
- id: layer_5
|
|
14338
|
+
path: layer_5.safetensors
|
|
14339
|
+
neuronpedia: qwen3-14b/5-transcoder-hp
|
|
14340
|
+
- id: layer_6
|
|
14341
|
+
path: layer_6.safetensors
|
|
14342
|
+
neuronpedia: qwen3-14b/6-transcoder-hp
|
|
14343
|
+
- id: layer_7
|
|
14344
|
+
path: layer_7.safetensors
|
|
14345
|
+
neuronpedia: qwen3-14b/7-transcoder-hp
|
|
14346
|
+
- id: layer_8
|
|
14347
|
+
path: layer_8.safetensors
|
|
14348
|
+
neuronpedia: qwen3-14b/8-transcoder-hp
|
|
14349
|
+
- id: layer_9
|
|
14350
|
+
path: layer_9.safetensors
|
|
14351
|
+
neuronpedia: qwen3-14b/9-transcoder-hp
|
|
14352
|
+
- id: layer_10
|
|
14353
|
+
path: layer_10.safetensors
|
|
14354
|
+
neuronpedia: qwen3-14b/10-transcoder-hp
|
|
14355
|
+
- id: layer_11
|
|
14356
|
+
path: layer_11.safetensors
|
|
14357
|
+
neuronpedia: qwen3-14b/11-transcoder-hp
|
|
14358
|
+
- id: layer_12
|
|
14359
|
+
path: layer_12.safetensors
|
|
14360
|
+
neuronpedia: qwen3-14b/12-transcoder-hp
|
|
14361
|
+
- id: layer_13
|
|
14362
|
+
path: layer_13.safetensors
|
|
14363
|
+
neuronpedia: qwen3-14b/13-transcoder-hp
|
|
14364
|
+
- id: layer_14
|
|
14365
|
+
path: layer_14.safetensors
|
|
14366
|
+
neuronpedia: qwen3-14b/14-transcoder-hp
|
|
14367
|
+
- id: layer_15
|
|
14368
|
+
path: layer_15.safetensors
|
|
14369
|
+
neuronpedia: qwen3-14b/15-transcoder-hp
|
|
14370
|
+
- id: layer_16
|
|
14371
|
+
path: layer_16.safetensors
|
|
14372
|
+
neuronpedia: qwen3-14b/16-transcoder-hp
|
|
14373
|
+
- id: layer_17
|
|
14374
|
+
path: layer_17.safetensors
|
|
14375
|
+
neuronpedia: qwen3-14b/17-transcoder-hp
|
|
14376
|
+
- id: layer_18
|
|
14377
|
+
path: layer_18.safetensors
|
|
14378
|
+
neuronpedia: qwen3-14b/18-transcoder-hp
|
|
14379
|
+
- id: layer_19
|
|
14380
|
+
path: layer_19.safetensors
|
|
14381
|
+
neuronpedia: qwen3-14b/19-transcoder-hp
|
|
14382
|
+
- id: layer_20
|
|
14383
|
+
path: layer_20.safetensors
|
|
14384
|
+
neuronpedia: qwen3-14b/20-transcoder-hp
|
|
14385
|
+
- id: layer_21
|
|
14386
|
+
path: layer_21.safetensors
|
|
14387
|
+
neuronpedia: qwen3-14b/21-transcoder-hp
|
|
14388
|
+
- id: layer_22
|
|
14389
|
+
path: layer_22.safetensors
|
|
14390
|
+
neuronpedia: qwen3-14b/22-transcoder-hp
|
|
14391
|
+
- id: layer_23
|
|
14392
|
+
path: layer_23.safetensors
|
|
14393
|
+
neuronpedia: qwen3-14b/23-transcoder-hp
|
|
14394
|
+
- id: layer_24
|
|
14395
|
+
path: layer_24.safetensors
|
|
14396
|
+
neuronpedia: qwen3-14b/24-transcoder-hp
|
|
14397
|
+
- id: layer_25
|
|
14398
|
+
path: layer_25.safetensors
|
|
14399
|
+
neuronpedia: qwen3-14b/25-transcoder-hp
|
|
14400
|
+
- id: layer_26
|
|
14401
|
+
path: layer_26.safetensors
|
|
14402
|
+
neuronpedia: qwen3-14b/26-transcoder-hp
|
|
14403
|
+
- id: layer_27
|
|
14404
|
+
path: layer_27.safetensors
|
|
14405
|
+
neuronpedia: qwen3-14b/27-transcoder-hp
|
|
14406
|
+
- id: layer_28
|
|
14407
|
+
path: layer_28.safetensors
|
|
14408
|
+
neuronpedia: qwen3-14b/28-transcoder-hp
|
|
14409
|
+
- id: layer_29
|
|
14410
|
+
path: layer_29.safetensors
|
|
14411
|
+
neuronpedia: qwen3-14b/29-transcoder-hp
|
|
14412
|
+
- id: layer_30
|
|
14413
|
+
path: layer_30.safetensors
|
|
14414
|
+
neuronpedia: qwen3-14b/30-transcoder-hp
|
|
14415
|
+
- id: layer_31
|
|
14416
|
+
path: layer_31.safetensors
|
|
14417
|
+
neuronpedia: qwen3-14b/31-transcoder-hp
|
|
14418
|
+
- id: layer_32
|
|
14419
|
+
path: layer_32.safetensors
|
|
14420
|
+
neuronpedia: qwen3-14b/32-transcoder-hp
|
|
14421
|
+
- id: layer_33
|
|
14422
|
+
path: layer_33.safetensors
|
|
14423
|
+
neuronpedia: qwen3-14b/33-transcoder-hp
|
|
14424
|
+
- id: layer_34
|
|
14425
|
+
path: layer_34.safetensors
|
|
14426
|
+
neuronpedia: qwen3-14b/34-transcoder-hp
|
|
14427
|
+
- id: layer_35
|
|
14428
|
+
path: layer_35.safetensors
|
|
14429
|
+
neuronpedia: qwen3-14b/35-transcoder-hp
|
|
14430
|
+
- id: layer_36
|
|
14431
|
+
path: layer_36.safetensors
|
|
14432
|
+
neuronpedia: qwen3-14b/36-transcoder-hp
|
|
14433
|
+
- id: layer_37
|
|
14434
|
+
path: layer_37.safetensors
|
|
14435
|
+
neuronpedia: qwen3-14b/37-transcoder-hp
|
|
14436
|
+
- id: layer_38
|
|
14437
|
+
path: layer_38.safetensors
|
|
14438
|
+
neuronpedia: qwen3-14b/38-transcoder-hp
|
|
14439
|
+
- id: layer_39
|
|
14440
|
+
path: layer_39.safetensors
|
|
14441
|
+
neuronpedia: qwen3-14b/39-transcoder-hp
|
|
14442
|
+
|
|
14443
|
+
mwhanna-qwen3-14b-transcoders-lowl0:
|
|
14444
|
+
conversion_func: mwhanna_transcoder
|
|
14445
|
+
model: qwen3-14b
|
|
14446
|
+
repo_id: mwhanna/qwen3-14b-transcoders-lowl0
|
|
14447
|
+
saes:
|
|
14448
|
+
- id: layer_0
|
|
14449
|
+
path: layer_0.safetensors
|
|
14450
|
+
neuronpedia: qwen3-14b/0-transcoder-hp-lowl0
|
|
14451
|
+
- id: layer_1
|
|
14452
|
+
path: layer_1.safetensors
|
|
14453
|
+
neuronpedia: qwen3-14b/1-transcoder-hp-lowl0
|
|
14454
|
+
- id: layer_2
|
|
14455
|
+
path: layer_2.safetensors
|
|
14456
|
+
neuronpedia: qwen3-14b/2-transcoder-hp-lowl0
|
|
14457
|
+
- id: layer_3
|
|
14458
|
+
path: layer_3.safetensors
|
|
14459
|
+
neuronpedia: qwen3-14b/3-transcoder-hp-lowl0
|
|
14460
|
+
- id: layer_4
|
|
14461
|
+
path: layer_4.safetensors
|
|
14462
|
+
neuronpedia: qwen3-14b/4-transcoder-hp-lowl0
|
|
14463
|
+
- id: layer_5
|
|
14464
|
+
path: layer_5.safetensors
|
|
14465
|
+
neuronpedia: qwen3-14b/5-transcoder-hp-lowl0
|
|
14466
|
+
- id: layer_6
|
|
14467
|
+
path: layer_6.safetensors
|
|
14468
|
+
neuronpedia: qwen3-14b/6-transcoder-hp-lowl0
|
|
14469
|
+
- id: layer_7
|
|
14470
|
+
path: layer_7.safetensors
|
|
14471
|
+
neuronpedia: qwen3-14b/7-transcoder-hp-lowl0
|
|
14472
|
+
- id: layer_8
|
|
14473
|
+
path: layer_8.safetensors
|
|
14474
|
+
neuronpedia: qwen3-14b/8-transcoder-hp-lowl0
|
|
14475
|
+
- id: layer_9
|
|
14476
|
+
path: layer_9.safetensors
|
|
14477
|
+
neuronpedia: qwen3-14b/9-transcoder-hp-lowl0
|
|
14478
|
+
- id: layer_10
|
|
14479
|
+
path: layer_10.safetensors
|
|
14480
|
+
neuronpedia: qwen3-14b/10-transcoder-hp-lowl0
|
|
14481
|
+
- id: layer_11
|
|
14482
|
+
path: layer_11.safetensors
|
|
14483
|
+
neuronpedia: qwen3-14b/11-transcoder-hp-lowl0
|
|
14484
|
+
- id: layer_12
|
|
14485
|
+
path: layer_12.safetensors
|
|
14486
|
+
neuronpedia: qwen3-14b/12-transcoder-hp-lowl0
|
|
14487
|
+
- id: layer_13
|
|
14488
|
+
path: layer_13.safetensors
|
|
14489
|
+
neuronpedia: qwen3-14b/13-transcoder-hp-lowl0
|
|
14490
|
+
- id: layer_14
|
|
14491
|
+
path: layer_14.safetensors
|
|
14492
|
+
neuronpedia: qwen3-14b/14-transcoder-hp-lowl0
|
|
14493
|
+
- id: layer_15
|
|
14494
|
+
path: layer_15.safetensors
|
|
14495
|
+
neuronpedia: qwen3-14b/15-transcoder-hp-lowl0
|
|
14496
|
+
- id: layer_16
|
|
14497
|
+
path: layer_16.safetensors
|
|
14498
|
+
neuronpedia: qwen3-14b/16-transcoder-hp-lowl0
|
|
14499
|
+
- id: layer_17
|
|
14500
|
+
path: layer_17.safetensors
|
|
14501
|
+
neuronpedia: qwen3-14b/17-transcoder-hp-lowl0
|
|
14502
|
+
- id: layer_18
|
|
14503
|
+
path: layer_18.safetensors
|
|
14504
|
+
neuronpedia: qwen3-14b/18-transcoder-hp-lowl0
|
|
14505
|
+
- id: layer_19
|
|
14506
|
+
path: layer_19.safetensors
|
|
14507
|
+
neuronpedia: qwen3-14b/19-transcoder-hp-lowl0
|
|
14508
|
+
- id: layer_20
|
|
14509
|
+
path: layer_20.safetensors
|
|
14510
|
+
neuronpedia: qwen3-14b/20-transcoder-hp-lowl0
|
|
14511
|
+
- id: layer_21
|
|
14512
|
+
path: layer_21.safetensors
|
|
14513
|
+
neuronpedia: qwen3-14b/21-transcoder-hp-lowl0
|
|
14514
|
+
- id: layer_22
|
|
14515
|
+
path: layer_22.safetensors
|
|
14516
|
+
neuronpedia: qwen3-14b/22-transcoder-hp-lowl0
|
|
14517
|
+
- id: layer_23
|
|
14518
|
+
path: layer_23.safetensors
|
|
14519
|
+
neuronpedia: qwen3-14b/23-transcoder-hp-lowl0
|
|
14520
|
+
- id: layer_24
|
|
14521
|
+
path: layer_24.safetensors
|
|
14522
|
+
neuronpedia: qwen3-14b/24-transcoder-hp-lowl0
|
|
14523
|
+
- id: layer_25
|
|
14524
|
+
path: layer_25.safetensors
|
|
14525
|
+
neuronpedia: qwen3-14b/25-transcoder-hp-lowl0
|
|
14526
|
+
- id: layer_26
|
|
14527
|
+
path: layer_26.safetensors
|
|
14528
|
+
neuronpedia: qwen3-14b/26-transcoder-hp-lowl0
|
|
14529
|
+
- id: layer_27
|
|
14530
|
+
path: layer_27.safetensors
|
|
14531
|
+
neuronpedia: qwen3-14b/27-transcoder-hp-lowl0
|
|
14532
|
+
- id: layer_28
|
|
14533
|
+
path: layer_28.safetensors
|
|
14534
|
+
neuronpedia: qwen3-14b/28-transcoder-hp-lowl0
|
|
14535
|
+
- id: layer_29
|
|
14536
|
+
path: layer_29.safetensors
|
|
14537
|
+
neuronpedia: qwen3-14b/29-transcoder-hp-lowl0
|
|
14538
|
+
- id: layer_30
|
|
14539
|
+
path: layer_30.safetensors
|
|
14540
|
+
neuronpedia: qwen3-14b/30-transcoder-hp-lowl0
|
|
14541
|
+
- id: layer_31
|
|
14542
|
+
path: layer_31.safetensors
|
|
14543
|
+
neuronpedia: qwen3-14b/31-transcoder-hp-lowl0
|
|
14544
|
+
- id: layer_32
|
|
14545
|
+
path: layer_32.safetensors
|
|
14546
|
+
neuronpedia: qwen3-14b/32-transcoder-hp-lowl0
|
|
14547
|
+
- id: layer_33
|
|
14548
|
+
path: layer_33.safetensors
|
|
14549
|
+
neuronpedia: qwen3-14b/33-transcoder-hp-lowl0
|
|
14550
|
+
- id: layer_34
|
|
14551
|
+
path: layer_34.safetensors
|
|
14552
|
+
neuronpedia: qwen3-14b/34-transcoder-hp-lowl0
|
|
14553
|
+
- id: layer_35
|
|
14554
|
+
path: layer_35.safetensors
|
|
14555
|
+
neuronpedia: qwen3-14b/35-transcoder-hp-lowl0
|
|
14556
|
+
- id: layer_36
|
|
14557
|
+
path: layer_36.safetensors
|
|
14558
|
+
neuronpedia: qwen3-14b/36-transcoder-hp-lowl0
|
|
14559
|
+
- id: layer_37
|
|
14560
|
+
path: layer_37.safetensors
|
|
14561
|
+
neuronpedia: qwen3-14b/37-transcoder-hp-lowl0
|
|
14562
|
+
- id: layer_38
|
|
14563
|
+
path: layer_38.safetensors
|
|
14564
|
+
neuronpedia: qwen3-14b/38-transcoder-hp-lowl0
|
|
14565
|
+
- id: layer_39
|
|
14566
|
+
path: layer_39.safetensors
|
|
14567
|
+
neuronpedia: qwen3-14b/39-transcoder-hp-lowl0
|
|
14568
|
+
|
|
14569
|
+
mwhanna-qwen3-1.7b-transcoders-lowl0:
|
|
14570
|
+
conversion_func: mwhanna_transcoder
|
|
14571
|
+
model: qwen3-1.7b
|
|
14572
|
+
repo_id: mwhanna/qwen3-1.7b-transcoders-lowl0
|
|
14573
|
+
saes:
|
|
14574
|
+
- id: layer_0
|
|
14575
|
+
path: layer_0.safetensors
|
|
14576
|
+
neuronpedia: qwen3-1.7b/0-transcoder-hp-lowl0
|
|
14577
|
+
- id: layer_1
|
|
14578
|
+
path: layer_1.safetensors
|
|
14579
|
+
neuronpedia: qwen3-1.7b/1-transcoder-hp-lowl0
|
|
14580
|
+
- id: layer_2
|
|
14581
|
+
path: layer_2.safetensors
|
|
14582
|
+
neuronpedia: qwen3-1.7b/2-transcoder-hp-lowl0
|
|
14583
|
+
- id: layer_3
|
|
14584
|
+
path: layer_3.safetensors
|
|
14585
|
+
neuronpedia: qwen3-1.7b/3-transcoder-hp-lowl0
|
|
14586
|
+
- id: layer_4
|
|
14587
|
+
path: layer_4.safetensors
|
|
14588
|
+
neuronpedia: qwen3-1.7b/4-transcoder-hp-lowl0
|
|
14589
|
+
- id: layer_5
|
|
14590
|
+
path: layer_5.safetensors
|
|
14591
|
+
neuronpedia: qwen3-1.7b/5-transcoder-hp-lowl0
|
|
14592
|
+
- id: layer_6
|
|
14593
|
+
path: layer_6.safetensors
|
|
14594
|
+
neuronpedia: qwen3-1.7b/6-transcoder-hp-lowl0
|
|
14595
|
+
- id: layer_7
|
|
14596
|
+
path: layer_7.safetensors
|
|
14597
|
+
neuronpedia: qwen3-1.7b/7-transcoder-hp-lowl0
|
|
14598
|
+
- id: layer_8
|
|
14599
|
+
path: layer_8.safetensors
|
|
14600
|
+
neuronpedia: qwen3-1.7b/8-transcoder-hp-lowl0
|
|
14601
|
+
- id: layer_9
|
|
14602
|
+
path: layer_9.safetensors
|
|
14603
|
+
neuronpedia: qwen3-1.7b/9-transcoder-hp-lowl0
|
|
14604
|
+
- id: layer_10
|
|
14605
|
+
path: layer_10.safetensors
|
|
14606
|
+
neuronpedia: qwen3-1.7b/10-transcoder-hp-lowl0
|
|
14607
|
+
- id: layer_11
|
|
14608
|
+
path: layer_11.safetensors
|
|
14609
|
+
neuronpedia: qwen3-1.7b/11-transcoder-hp-lowl0
|
|
14610
|
+
- id: layer_12
|
|
14611
|
+
path: layer_12.safetensors
|
|
14612
|
+
neuronpedia: qwen3-1.7b/12-transcoder-hp-lowl0
|
|
14613
|
+
- id: layer_13
|
|
14614
|
+
path: layer_13.safetensors
|
|
14615
|
+
neuronpedia: qwen3-1.7b/13-transcoder-hp-lowl0
|
|
14616
|
+
- id: layer_14
|
|
14617
|
+
path: layer_14.safetensors
|
|
14618
|
+
neuronpedia: qwen3-1.7b/14-transcoder-hp-lowl0
|
|
14619
|
+
- id: layer_15
|
|
14620
|
+
path: layer_15.safetensors
|
|
14621
|
+
neuronpedia: qwen3-1.7b/15-transcoder-hp-lowl0
|
|
14622
|
+
- id: layer_16
|
|
14623
|
+
path: layer_16.safetensors
|
|
14624
|
+
neuronpedia: qwen3-1.7b/16-transcoder-hp-lowl0
|
|
14625
|
+
- id: layer_17
|
|
14626
|
+
path: layer_17.safetensors
|
|
14627
|
+
neuronpedia: qwen3-1.7b/17-transcoder-hp-lowl0
|
|
14628
|
+
- id: layer_18
|
|
14629
|
+
path: layer_18.safetensors
|
|
14630
|
+
neuronpedia: qwen3-1.7b/18-transcoder-hp-lowl0
|
|
14631
|
+
- id: layer_19
|
|
14632
|
+
path: layer_19.safetensors
|
|
14633
|
+
neuronpedia: qwen3-1.7b/19-transcoder-hp-lowl0
|
|
14634
|
+
- id: layer_20
|
|
14635
|
+
path: layer_20.safetensors
|
|
14636
|
+
neuronpedia: qwen3-1.7b/20-transcoder-hp-lowl0
|
|
14637
|
+
- id: layer_21
|
|
14638
|
+
path: layer_21.safetensors
|
|
14639
|
+
neuronpedia: qwen3-1.7b/21-transcoder-hp-lowl0
|
|
14640
|
+
- id: layer_22
|
|
14641
|
+
path: layer_22.safetensors
|
|
14642
|
+
neuronpedia: qwen3-1.7b/22-transcoder-hp-lowl0
|
|
14643
|
+
- id: layer_23
|
|
14644
|
+
path: layer_23.safetensors
|
|
14645
|
+
neuronpedia: qwen3-1.7b/23-transcoder-hp-lowl0
|
|
14646
|
+
- id: layer_24
|
|
14647
|
+
path: layer_24.safetensors
|
|
14648
|
+
neuronpedia: qwen3-1.7b/24-transcoder-hp-lowl0
|
|
14649
|
+
- id: layer_25
|
|
14650
|
+
path: layer_25.safetensors
|
|
14651
|
+
neuronpedia: qwen3-1.7b/25-transcoder-hp-lowl0
|
|
14652
|
+
- id: layer_26
|
|
14653
|
+
path: layer_26.safetensors
|
|
14654
|
+
neuronpedia: qwen3-1.7b/26-transcoder-hp-lowl0
|
|
14655
|
+
- id: layer_27
|
|
14656
|
+
path: layer_27.safetensors
|
|
14657
|
+
neuronpedia: qwen3-1.7b/27-transcoder-hp-lowl0
|
|
14658
|
+
|
|
14659
|
+
mwhanna-qwen3-0.6b-transcoders-lowl0:
|
|
14660
|
+
conversion_func: mwhanna_transcoder
|
|
14661
|
+
model: qwen3-0.6b
|
|
14662
|
+
repo_id: mwhanna/qwen3-0.6b-transcoders-lowl0
|
|
14663
|
+
saes:
|
|
14664
|
+
- id: layer_0
|
|
14665
|
+
path: layer_0.safetensors
|
|
14666
|
+
neuronpedia: qwen3-0.6b/0-transcoder-hp-lowl0
|
|
14667
|
+
- id: layer_1
|
|
14668
|
+
path: layer_1.safetensors
|
|
14669
|
+
neuronpedia: qwen3-0.6b/1-transcoder-hp-lowl0
|
|
14670
|
+
- id: layer_2
|
|
14671
|
+
path: layer_2.safetensors
|
|
14672
|
+
neuronpedia: qwen3-0.6b/2-transcoder-hp-lowl0
|
|
14673
|
+
- id: layer_3
|
|
14674
|
+
path: layer_3.safetensors
|
|
14675
|
+
neuronpedia: qwen3-0.6b/3-transcoder-hp-lowl0
|
|
14676
|
+
- id: layer_4
|
|
14677
|
+
path: layer_4.safetensors
|
|
14678
|
+
neuronpedia: qwen3-0.6b/4-transcoder-hp-lowl0
|
|
14679
|
+
- id: layer_5
|
|
14680
|
+
path: layer_5.safetensors
|
|
14681
|
+
neuronpedia: qwen3-0.6b/5-transcoder-hp-lowl0
|
|
14682
|
+
- id: layer_6
|
|
14683
|
+
path: layer_6.safetensors
|
|
14684
|
+
neuronpedia: qwen3-0.6b/6-transcoder-hp-lowl0
|
|
14685
|
+
- id: layer_7
|
|
14686
|
+
path: layer_7.safetensors
|
|
14687
|
+
neuronpedia: qwen3-0.6b/7-transcoder-hp-lowl0
|
|
14688
|
+
- id: layer_8
|
|
14689
|
+
path: layer_8.safetensors
|
|
14690
|
+
neuronpedia: qwen3-0.6b/8-transcoder-hp-lowl0
|
|
14691
|
+
- id: layer_9
|
|
14692
|
+
path: layer_9.safetensors
|
|
14693
|
+
neuronpedia: qwen3-0.6b/9-transcoder-hp-lowl0
|
|
14694
|
+
- id: layer_10
|
|
14695
|
+
path: layer_10.safetensors
|
|
14696
|
+
neuronpedia: qwen3-0.6b/10-transcoder-hp-lowl0
|
|
14697
|
+
- id: layer_11
|
|
14698
|
+
path: layer_11.safetensors
|
|
14699
|
+
neuronpedia: qwen3-0.6b/11-transcoder-hp-lowl0
|
|
14700
|
+
- id: layer_12
|
|
14701
|
+
path: layer_12.safetensors
|
|
14702
|
+
neuronpedia: qwen3-0.6b/12-transcoder-hp-lowl0
|
|
14703
|
+
- id: layer_13
|
|
14704
|
+
path: layer_13.safetensors
|
|
14705
|
+
neuronpedia: qwen3-0.6b/13-transcoder-hp-lowl0
|
|
14706
|
+
- id: layer_14
|
|
14707
|
+
path: layer_14.safetensors
|
|
14708
|
+
neuronpedia: qwen3-0.6b/14-transcoder-hp-lowl0
|
|
14709
|
+
- id: layer_15
|
|
14710
|
+
path: layer_15.safetensors
|
|
14711
|
+
neuronpedia: qwen3-0.6b/15-transcoder-hp-lowl0
|
|
14712
|
+
- id: layer_16
|
|
14713
|
+
path: layer_16.safetensors
|
|
14714
|
+
neuronpedia: qwen3-0.6b/16-transcoder-hp-lowl0
|
|
14715
|
+
- id: layer_17
|
|
14716
|
+
path: layer_17.safetensors
|
|
14717
|
+
neuronpedia: qwen3-0.6b/17-transcoder-hp-lowl0
|
|
14718
|
+
- id: layer_18
|
|
14719
|
+
path: layer_18.safetensors
|
|
14720
|
+
neuronpedia: qwen3-0.6b/18-transcoder-hp-lowl0
|
|
14721
|
+
- id: layer_19
|
|
14722
|
+
path: layer_19.safetensors
|
|
14723
|
+
neuronpedia: qwen3-0.6b/19-transcoder-hp-lowl0
|
|
14724
|
+
- id: layer_20
|
|
14725
|
+
path: layer_20.safetensors
|
|
14726
|
+
neuronpedia: qwen3-0.6b/20-transcoder-hp-lowl0
|
|
14727
|
+
- id: layer_21
|
|
14728
|
+
path: layer_21.safetensors
|
|
14729
|
+
neuronpedia: qwen3-0.6b/21-transcoder-hp-lowl0
|
|
14730
|
+
- id: layer_22
|
|
14731
|
+
path: layer_22.safetensors
|
|
14732
|
+
neuronpedia: qwen3-0.6b/22-transcoder-hp-lowl0
|
|
14733
|
+
- id: layer_23
|
|
14734
|
+
path: layer_23.safetensors
|
|
14735
|
+
neuronpedia: qwen3-0.6b/23-transcoder-hp-lowl0
|
|
14736
|
+
- id: layer_24
|
|
14737
|
+
path: layer_24.safetensors
|
|
14738
|
+
neuronpedia: qwen3-0.6b/24-transcoder-hp-lowl0
|
|
14739
|
+
- id: layer_25
|
|
14740
|
+
path: layer_25.safetensors
|
|
14741
|
+
neuronpedia: qwen3-0.6b/25-transcoder-hp-lowl0
|
|
14742
|
+
- id: layer_26
|
|
14743
|
+
path: layer_26.safetensors
|
|
14744
|
+
neuronpedia: qwen3-0.6b/26-transcoder-hp-lowl0
|
|
14745
|
+
- id: layer_27
|
|
14746
|
+
path: layer_27.safetensors
|
|
14747
|
+
neuronpedia: qwen3-0.6b/27-transcoder-hp-lowl0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sae-lens
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.6.0
|
|
4
4
|
Summary: Training and Analyzing Sparse Autoencoders (SAEs)
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch
|
|
@@ -29,7 +29,7 @@ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
|
29
29
|
Requires-Dist: pyzmq (==26.0.0)
|
|
30
30
|
Requires-Dist: safetensors (>=0.4.2,<0.5.0)
|
|
31
31
|
Requires-Dist: simple-parsing (>=0.1.6,<0.2.0)
|
|
32
|
-
Requires-Dist: transformer-lens (>=2.
|
|
32
|
+
Requires-Dist: transformer-lens (>=2.16.1,<3.0.0)
|
|
33
33
|
Requires-Dist: transformers (>=4.38.1,<5.0.0)
|
|
34
34
|
Requires-Dist: typer (>=0.12.3,<0.13.0)
|
|
35
35
|
Requires-Dist: typing-extensions (>=4.10.0,<5.0.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
sae_lens/__init__.py,sha256=
|
|
1
|
+
sae_lens/__init__.py,sha256=nZaNNPsvcyXtYBDQj-3dU5cZNmVynuc0d1gpeRUqQq8,3588
|
|
2
2
|
sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
sae_lens/analysis/hooked_sae_transformer.py,sha256=
|
|
3
|
+
sae_lens/analysis/hooked_sae_transformer.py,sha256=vRu6JseH1lZaEeILD5bEkQEQ1wYHHDcxD-f2olKmE9Y,14275
|
|
4
4
|
sae_lens/analysis/neuronpedia_integration.py,sha256=MrENqc81Mc2SMbxGjbwHzpkGUCAFKSf0i4EdaUF2Oj4,18707
|
|
5
5
|
sae_lens/cache_activations_runner.py,sha256=L5hhuU2-zPQr2S3L64GMKKLeMQfqXxwDl8NbuOtrybI,12567
|
|
6
6
|
sae_lens/config.py,sha256=IrjbsKBbaZoFXYrsPJ5xBwIqi9uZJIIFXjV_uoErJaE,28176
|
|
@@ -9,10 +9,10 @@ sae_lens/evals.py,sha256=rVmKqdSa4xlZw-78qpgdhDleJIO1EI1QHRiPsObW1Tc,39129
|
|
|
9
9
|
sae_lens/llm_sae_training_runner.py,sha256=exxNX_OEhdiUrlgmBP9bjX9DOf0HUcNQGO4unKeDjKM,13713
|
|
10
10
|
sae_lens/load_model.py,sha256=C8AMykctj6H7tz_xRwB06-EXj6TfW64PtSJZR5Jxn1Y,8649
|
|
11
11
|
sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
sae_lens/loading/pretrained_sae_loaders.py,sha256=
|
|
12
|
+
sae_lens/loading/pretrained_sae_loaders.py,sha256=hAWYByePjRiKdVBShmrdAZZVP4TZ01Nbg4eTAUbey-4,43601
|
|
13
13
|
sae_lens/loading/pretrained_saes_directory.py,sha256=4Vn-Jex6SveD7EbxcSOBv8cx1gkPfUMLU1QOP-ww1ZE,3752
|
|
14
14
|
sae_lens/pretokenize_runner.py,sha256=w0f6SfZLAxbp5eAAKnet8RqUB_DKofZ9RGsoJwFnYbA,7058
|
|
15
|
-
sae_lens/pretrained_saes.yaml,sha256=
|
|
15
|
+
sae_lens/pretrained_saes.yaml,sha256=O_FwoOe7fU9_WLEOnMk1IWXRxD4nwzf1tCfbof1r0D0,598578
|
|
16
16
|
sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
|
|
17
17
|
sae_lens/saes/__init__.py,sha256=jVwazK8Q6dW5J6_zFXPoNAuBvSxgziQ8eMOjGM3t-X8,1475
|
|
18
18
|
sae_lens/saes/batchtopk_sae.py,sha256=CyaFG2hMyyDaEaXXrAMJC8wQDW1JoddTKF5mvxxBQKY,3395
|
|
@@ -33,7 +33,7 @@ sae_lens/training/types.py,sha256=qSjmGzXf3MLalygG0psnVjmhX_mpLmL47MQtZfe7qxg,81
|
|
|
33
33
|
sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
|
|
34
34
|
sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
|
|
35
35
|
sae_lens/util.py,sha256=mCwLAilGMVo8Scm7CIsCafU7GsfmBvCcjwmloI4Ly7Y,1718
|
|
36
|
-
sae_lens-6.
|
|
37
|
-
sae_lens-6.
|
|
38
|
-
sae_lens-6.
|
|
39
|
-
sae_lens-6.
|
|
36
|
+
sae_lens-6.6.0.dist-info/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
|
|
37
|
+
sae_lens-6.6.0.dist-info/METADATA,sha256=hAg3k2VOAwVSyF3KV54z3VBFk9neHz3EON504xMQl58,5555
|
|
38
|
+
sae_lens-6.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
39
|
+
sae_lens-6.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|