sae-lens 6.22.3__py3-none-any.whl → 6.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sae_lens/__init__.py +1 -1
- sae_lens/loading/pretrained_sae_loaders.py +20 -5
- sae_lens/pretrained_saes.yaml +78 -0
- {sae_lens-6.22.3.dist-info → sae_lens-6.23.0.dist-info}/METADATA +1 -1
- {sae_lens-6.22.3.dist-info → sae_lens-6.23.0.dist-info}/RECORD +7 -7
- {sae_lens-6.22.3.dist-info → sae_lens-6.23.0.dist-info}/WHEEL +0 -0
- {sae_lens-6.22.3.dist-info → sae_lens-6.23.0.dist-info}/licenses/LICENSE +0 -0
sae_lens/__init__.py
CHANGED
|
@@ -753,10 +753,14 @@ def get_dictionary_learning_config_1_from_hf(
|
|
|
753
753
|
activation_fn = "topk" if trainer["dict_class"] == "AutoEncoderTopK" else "relu"
|
|
754
754
|
activation_fn_kwargs = {"k": trainer["k"]} if activation_fn == "topk" else {}
|
|
755
755
|
|
|
756
|
+
architecture = "standard"
|
|
757
|
+
if trainer["dict_class"] == "GatedAutoEncoder":
|
|
758
|
+
architecture = "gated"
|
|
759
|
+
elif trainer["dict_class"] == "MatryoshkaBatchTopKSAE":
|
|
760
|
+
architecture = "jumprelu"
|
|
761
|
+
|
|
756
762
|
return {
|
|
757
|
-
"architecture":
|
|
758
|
-
"gated" if trainer["dict_class"] == "GatedAutoEncoder" else "standard"
|
|
759
|
-
),
|
|
763
|
+
"architecture": architecture,
|
|
760
764
|
"d_in": trainer["activation_dim"],
|
|
761
765
|
"d_sae": trainer["dict_size"],
|
|
762
766
|
"dtype": "float32",
|
|
@@ -905,9 +909,12 @@ def dictionary_learning_sae_huggingface_loader_1(
|
|
|
905
909
|
)
|
|
906
910
|
encoder = torch.load(encoder_path, map_location="cpu")
|
|
907
911
|
|
|
912
|
+
W_enc = encoder["W_enc"] if "W_enc" in encoder else encoder["encoder.weight"].T
|
|
913
|
+
W_dec = encoder["W_dec"] if "W_dec" in encoder else encoder["decoder.weight"].T
|
|
914
|
+
|
|
908
915
|
state_dict = {
|
|
909
|
-
"W_enc":
|
|
910
|
-
"W_dec":
|
|
916
|
+
"W_enc": W_enc,
|
|
917
|
+
"W_dec": W_dec,
|
|
911
918
|
"b_dec": encoder.get(
|
|
912
919
|
"b_dec", encoder.get("bias", encoder.get("decoder_bias", None))
|
|
913
920
|
),
|
|
@@ -915,6 +922,8 @@ def dictionary_learning_sae_huggingface_loader_1(
|
|
|
915
922
|
|
|
916
923
|
if "encoder.bias" in encoder:
|
|
917
924
|
state_dict["b_enc"] = encoder["encoder.bias"]
|
|
925
|
+
if "b_enc" in encoder:
|
|
926
|
+
state_dict["b_enc"] = encoder["b_enc"]
|
|
918
927
|
|
|
919
928
|
if "mag_bias" in encoder:
|
|
920
929
|
state_dict["b_mag"] = encoder["mag_bias"]
|
|
@@ -923,6 +932,12 @@ def dictionary_learning_sae_huggingface_loader_1(
|
|
|
923
932
|
if "r_mag" in encoder:
|
|
924
933
|
state_dict["r_mag"] = encoder["r_mag"]
|
|
925
934
|
|
|
935
|
+
if "threshold" in encoder:
|
|
936
|
+
threshold = encoder["threshold"]
|
|
937
|
+
if threshold.ndim == 0:
|
|
938
|
+
threshold = torch.full((W_enc.size(1),), threshold)
|
|
939
|
+
state_dict["threshold"] = threshold
|
|
940
|
+
|
|
926
941
|
return cfg_dict, state_dict, None
|
|
927
942
|
|
|
928
943
|
|
sae_lens/pretrained_saes.yaml
CHANGED
|
@@ -14959,3 +14959,81 @@ goodfire-llama-3.1-8b-instruct:
|
|
|
14959
14959
|
path: Llama-3.1-8B-Instruct-SAE-l19.pth
|
|
14960
14960
|
l0: 91
|
|
14961
14961
|
neuronpedia: llama3.1-8b-it/19-resid-post-gf
|
|
14962
|
+
|
|
14963
|
+
saebench_gemma-2-2b_width-2pow12_date-0108:
|
|
14964
|
+
conversion_func: dictionary_learning_1
|
|
14965
|
+
links:
|
|
14966
|
+
model: https://huggingface.co/google/gemma-2-2b
|
|
14967
|
+
model: gemma-2-2b
|
|
14968
|
+
repo_id: adamkarvonen/saebench_gemma-2-2b_width-2pow12_date-0108
|
|
14969
|
+
saes:
|
|
14970
|
+
- id: blocks.12.hook_resid_post__trainer_0
|
|
14971
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_0_step_final
|
|
14972
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_0
|
|
14973
|
+
- id: blocks.12.hook_resid_post__trainer_1
|
|
14974
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_1_step_final
|
|
14975
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_1
|
|
14976
|
+
- id: blocks.12.hook_resid_post__trainer_2
|
|
14977
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_2_step_final
|
|
14978
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_2
|
|
14979
|
+
- id: blocks.12.hook_resid_post__trainer_3
|
|
14980
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_3_step_final
|
|
14981
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_3
|
|
14982
|
+
- id: blocks.12.hook_resid_post__trainer_4
|
|
14983
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_4_step_final
|
|
14984
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_4
|
|
14985
|
+
- id: blocks.12.hook_resid_post__trainer_5
|
|
14986
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-4k__trainer_5_step_final
|
|
14987
|
+
path: MatryoshkaBatchTopK_gemma-2-2b__0108/resid_post_layer_12/trainer_5
|
|
14988
|
+
|
|
14989
|
+
saebench_gemma-2-2b_width-2pow14_date-0107:
|
|
14990
|
+
conversion_func: dictionary_learning_1
|
|
14991
|
+
links:
|
|
14992
|
+
model: https://huggingface.co/google/gemma-2-2b
|
|
14993
|
+
model: gemma-2-2b
|
|
14994
|
+
repo_id: canrager/saebench_gemma-2-2b_width-2pow14_date-0107
|
|
14995
|
+
saes:
|
|
14996
|
+
- id: blocks.12.hook_resid_post__trainer_0
|
|
14997
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_0_step_final
|
|
14998
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0
|
|
14999
|
+
- id: blocks.12.hook_resid_post__trainer_1
|
|
15000
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_1_step_final
|
|
15001
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1
|
|
15002
|
+
- id: blocks.12.hook_resid_post__trainer_2
|
|
15003
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_2_step_final
|
|
15004
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2
|
|
15005
|
+
- id: blocks.12.hook_resid_post__trainer_3
|
|
15006
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_3_step_final
|
|
15007
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3
|
|
15008
|
+
- id: blocks.12.hook_resid_post__trainer_4
|
|
15009
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_4_step_final
|
|
15010
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4
|
|
15011
|
+
- id: blocks.12.hook_resid_post__trainer_5
|
|
15012
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-16k__trainer_5_step_final
|
|
15013
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5
|
|
15014
|
+
|
|
15015
|
+
saebench_gemma-2-2b_width-2pow16_date-0107:
|
|
15016
|
+
conversion_func: dictionary_learning_1
|
|
15017
|
+
links:
|
|
15018
|
+
model: https://huggingface.co/google/gemma-2-2b
|
|
15019
|
+
model: gemma-2-2b
|
|
15020
|
+
repo_id: canrager/saebench_gemma-2-2b_width-2pow16_date-0107
|
|
15021
|
+
saes:
|
|
15022
|
+
- id: blocks.12.hook_resid_post__trainer_0
|
|
15023
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_0_step_final
|
|
15024
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_0
|
|
15025
|
+
- id: blocks.12.hook_resid_post__trainer_1
|
|
15026
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_1_step_final
|
|
15027
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_1
|
|
15028
|
+
- id: blocks.12.hook_resid_post__trainer_2
|
|
15029
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_2_step_final
|
|
15030
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_2
|
|
15031
|
+
- id: blocks.12.hook_resid_post__trainer_3
|
|
15032
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_3_step_final
|
|
15033
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_3
|
|
15034
|
+
- id: blocks.12.hook_resid_post__trainer_4
|
|
15035
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_4_step_final
|
|
15036
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_4
|
|
15037
|
+
- id: blocks.12.hook_resid_post__trainer_5
|
|
15038
|
+
neuronpedia: gemma-2-2b/12-sae_bench-matryoshka-res-65k__trainer_5_step_final
|
|
15039
|
+
path: gemma-2-2b_matryoshka_batch_top_k_width-2pow16_date-0107/resid_post_layer_12/trainer_5
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sae_lens/__init__.py,sha256
|
|
1
|
+
sae_lens/__init__.py,sha256=XYVTI_d_xGq0CTozbkj8QZA3Mrf4U4lNZyCcE8uv3x8,4033
|
|
2
2
|
sae_lens/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
sae_lens/analysis/hooked_sae_transformer.py,sha256=dQRgGVwce8XwylL2AzJE7l9elhtMRFCs2hdUj-Qyy4g,14038
|
|
4
4
|
sae_lens/analysis/neuronpedia_integration.py,sha256=Gx1W7hUBEuMoasNcnOnZ1wmqbXDd1pSZ1nqKEya1HQc,4962
|
|
@@ -9,10 +9,10 @@ sae_lens/evals.py,sha256=P0NUsJeGzYxFBiVKhbPzd72IFKY4gH40HHlEZ3jEAmg,39598
|
|
|
9
9
|
sae_lens/llm_sae_training_runner.py,sha256=M7BK55gSFYu2qFQKABHX3c8i46P1LfODCeyHFzGGuqU,15196
|
|
10
10
|
sae_lens/load_model.py,sha256=C8AMykctj6H7tz_xRwB06-EXj6TfW64PtSJZR5Jxn1Y,8649
|
|
11
11
|
sae_lens/loading/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
sae_lens/loading/pretrained_sae_loaders.py,sha256=
|
|
12
|
+
sae_lens/loading/pretrained_sae_loaders.py,sha256=_YmZAiFd2Av-mgwEBzvQaylgwlwfY-Aqj62s2-C8SIs,56589
|
|
13
13
|
sae_lens/loading/pretrained_saes_directory.py,sha256=hejNfLUepYCSGPalRfQwxxCEUqMMUPsn1tufwvwct5k,3820
|
|
14
14
|
sae_lens/pretokenize_runner.py,sha256=x-reJzVPFDS9iRFbZtrFYSzNguJYki9gd0pbHjYJ3r4,7085
|
|
15
|
-
sae_lens/pretrained_saes.yaml,sha256=
|
|
15
|
+
sae_lens/pretrained_saes.yaml,sha256=pKfO-KfBU7HlhdukEP9Wdxxoka6LRrDDcsVAI0HYqjg,609596
|
|
16
16
|
sae_lens/registry.py,sha256=nhy7BPSudSATqW4lo9H_k3Na7sfGHmAf9v-3wpnLL_o,1490
|
|
17
17
|
sae_lens/saes/__init__.py,sha256=nTNPnJ7edyfedo1MX96xwn9WOG8504yHbT9LFw9od_0,1778
|
|
18
18
|
sae_lens/saes/batchtopk_sae.py,sha256=x4EbgZl0GUickRPcCmtKNGS2Ra3Uy1Z1OtF2FnrSabQ,5422
|
|
@@ -35,7 +35,7 @@ sae_lens/training/types.py,sha256=1FpLx_Doda9vZpmfm-x1e8wGBYpyhe9Kpb_JuM5nIFM,90
|
|
|
35
35
|
sae_lens/training/upload_saes_to_huggingface.py,sha256=r_WzI1zLtGZ5TzAxuG3xa_8T09j3zXJrWd_vzPsPGkQ,4469
|
|
36
36
|
sae_lens/tutorial/tsea.py,sha256=fd1am_XXsf2KMbByDapJo-2qlxduKaa62Z2qcQZ3QKU,18145
|
|
37
37
|
sae_lens/util.py,sha256=tCovQ-eZa1L7thPpNDL6PGOJrIMML2yLI5e0EHCOpS8,3309
|
|
38
|
-
sae_lens-6.
|
|
39
|
-
sae_lens-6.
|
|
40
|
-
sae_lens-6.
|
|
41
|
-
sae_lens-6.
|
|
38
|
+
sae_lens-6.23.0.dist-info/METADATA,sha256=BJ0kFOaNRgcbMx-bLpI17h42zVugibTBCZPG0JaCDLU,5369
|
|
39
|
+
sae_lens-6.23.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
40
|
+
sae_lens-6.23.0.dist-info/licenses/LICENSE,sha256=DW6e-hDosiu4CfW0-imI57sV1I5f9UEslpviNQcOAKs,1069
|
|
41
|
+
sae_lens-6.23.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|