PyPI - sae-lens - Versions diffs - 5.7.1__tar.gz → 6.25.1__tar.gz - Mend

sae-lens 5.7.1tar.gz → 6.25.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sae-lens might be problematic. Click here for more details.

Files changed (50) hide show

{sae_lens-5.7.1 → sae_lens-6.25.1}/PKG-INFO +31 -31
{sae_lens-5.7.1 → sae_lens-6.25.1}/README.md +18 -14
{sae_lens-5.7.1 → sae_lens-6.25.1}/pyproject.toml +20 -20
sae_lens-6.25.1/sae_lens/__init__.py +141 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/analysis/hooked_sae_transformer.py +29 -25
sae_lens-6.25.1/sae_lens/analysis/neuronpedia_integration.py +163 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/cache_activations_runner.py +13 -12
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/config.py +254 -271
sae_lens-6.25.1/sae_lens/constants.py +30 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/evals.py +146 -87
sae_lens-6.25.1/sae_lens/llm_sae_training_runner.py +429 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/load_model.py +54 -6
sae_lens-6.25.1/sae_lens/loading/pretrained_sae_loaders.py +1911 -0
{sae_lens-5.7.1/sae_lens/toolkit → sae_lens-6.25.1/sae_lens/loading}/pretrained_saes_directory.py +17 -3
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/pretokenize_runner.py +8 -4
sae_lens-6.25.1/sae_lens/pretrained_saes.yaml +41813 -0
sae_lens-6.25.1/sae_lens/registry.py +49 -0
sae_lens-6.25.1/sae_lens/saes/__init__.py +81 -0
sae_lens-6.25.1/sae_lens/saes/batchtopk_sae.py +134 -0
sae_lens-6.25.1/sae_lens/saes/gated_sae.py +242 -0
sae_lens-6.25.1/sae_lens/saes/jumprelu_sae.py +367 -0
sae_lens-6.25.1/sae_lens/saes/matryoshka_batchtopk_sae.py +136 -0
sae_lens-6.25.1/sae_lens/saes/sae.py +1067 -0
sae_lens-6.25.1/sae_lens/saes/standard_sae.py +165 -0
sae_lens-6.25.1/sae_lens/saes/temporal_sae.py +365 -0
sae_lens-6.25.1/sae_lens/saes/topk_sae.py +538 -0
sae_lens-6.25.1/sae_lens/saes/transcoder.py +411 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/tokenization_and_batching.py +25 -2
sae_lens-6.25.1/sae_lens/training/activation_scaler.py +60 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/training/activations_store.py +179 -237
sae_lens-6.25.1/sae_lens/training/mixing_buffer.py +56 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/training/optim.py +36 -34
sae_lens-6.25.1/sae_lens/training/sae_trainer.py +455 -0
sae_lens-6.25.1/sae_lens/training/types.py +5 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/training/upload_saes_to_huggingface.py +17 -6
sae_lens-6.25.1/sae_lens/util.py +113 -0
sae_lens-5.7.1/sae_lens/__init__.py +0 -39
sae_lens-5.7.1/sae_lens/analysis/neuronpedia_integration.py +0 -492
sae_lens-5.7.1/sae_lens/pretrained_saes.yaml +0 -13961
sae_lens-5.7.1/sae_lens/sae.py +0 -737
sae_lens-5.7.1/sae_lens/sae_training_runner.py +0 -251
sae_lens-5.7.1/sae_lens/toolkit/pretrained_sae_loaders.py +0 -879
sae_lens-5.7.1/sae_lens/training/geometric_median.py +0 -101
sae_lens-5.7.1/sae_lens/training/sae_trainer.py +0 -444
sae_lens-5.7.1/sae_lens/training/training_sae.py +0 -711
{sae_lens-5.7.1 → sae_lens-6.25.1}/LICENSE +0 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/analysis/__init__.py +0 -0
{sae_lens-5.7.1/sae_lens/toolkit → sae_lens-6.25.1/sae_lens/loading}/__init__.py +0 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/training/__init__.py +0 -0
{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/tutorial/tsea.py +0 -0

{sae_lens-5.7.1 → sae_lens-6.25.1}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: sae-lens
-Version: 5.7.1
+Version: 6.25.1
 Summary: Training and Analyzing Sparse Autoencoders (SAEs)
 License: MIT
+License-File: LICENSE
 Keywords: deep-learning,sparse-autoencoders,mechanistic-interpretability,PyTorch
 Author: Joseph Bloom
 Requires-Python: >=3.10,<4.0
@@ -12,41 +13,36 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Provides-Extra: mamba
-Requires-Dist: automated-interpretability (>=0.0.5,<1.0.0)
 Requires-Dist: babe (>=0.0.7,<0.0.8)
-Requires-Dist: datasets (>=2.17.1,<3.0.0)
+Requires-Dist: datasets (>=3.1.0)
 Requires-Dist: mamba-lens (>=0.0.4,<0.0.5) ; extra == "mamba"
-Requires-Dist: matplotlib (>=3.8.3,<4.0.0)
-Requires-Dist: matplotlib-inline (>=0.1.6,<0.2.0)
 Requires-Dist: nltk (>=3.8.1,<4.0.0)
-Requires-Dist: plotly (>=5.19.0,<6.0.0)
-Requires-Dist: plotly-express (>=0.4.1,<0.5.0)
-Requires-Dist: pytest-profiling (>=1.7.0,<2.0.0)
-Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
+Requires-Dist: plotly (>=5.19.0)
+Requires-Dist: plotly-express (>=0.4.1)
+Requires-Dist: python-dotenv (>=1.0.1)
 Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
-Requires-Dist: pyzmq (==26.0.0)
-Requires-Dist: safetensors (>=0.4.2,<0.5.0)
+Requires-Dist: safetensors (>=0.4.2,<1.0.0)
 Requires-Dist: simple-parsing (>=0.1.6,<0.2.0)
-Requires-Dist: transformer-lens (>=2.0.0,<3.0.0)
+Requires-Dist: tenacity (>=9.0.0)
+Requires-Dist: transformer-lens (>=2.16.1,<3.0.0)
 Requires-Dist: transformers (>=4.38.1,<5.0.0)
-Requires-Dist: typer (>=0.12.3,<0.13.0)
 Requires-Dist: typing-extensions (>=4.10.0,<5.0.0)
-Requires-Dist: zstandard (>=0.22.0,<0.23.0)
-Project-URL: Homepage, https://jbloomaus.github.io/SAELens
-Project-URL: Repository, https://github.com/jbloomAus/SAELens
+Project-URL: Homepage, https://decoderesearch.github.io/SAELens
+Project-URL: Repository, https://github.com/decoderesearch/SAELens
 Description-Content-Type: text/markdown
-<img width="1308" alt="Screenshot 2024-03-21 at 3 08 28 pm" src="https://github.com/jbloomAus/mats_sae_training/assets/69127271/209012ec-a779-4036-b4be-7b7739ea87f6">
+<img width="1308" height="532" alt="saes_pic" src="https://github.com/user-attachments/assets/2a5d752f-b261-4ee4-ad5d-ebf282321371" />
 # SAE Lens
 [![PyPI](https://img.shields.io/pypi/v/sae-lens?color=blue)](https://pypi.org/project/sae-lens/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![build](https://github.com/jbloomAus/SAELens/actions/workflows/build.yml/badge.svg)](https://github.com/jbloomAus/SAELens/actions/workflows/build.yml)
-[![Deploy Docs](https://github.com/jbloomAus/SAELens/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/jbloomAus/SAELens/actions/workflows/deploy_docs.yml)
-[![codecov](https://codecov.io/gh/jbloomAus/SAELens/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/jbloomAus/SAELens)
+[![build](https://github.com/decoderesearch/SAELens/actions/workflows/build.yml/badge.svg)](https://github.com/decoderesearch/SAELens/actions/workflows/build.yml)
+[![Deploy Docs](https://github.com/decoderesearch/SAELens/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/decoderesearch/SAELens/actions/workflows/deploy_docs.yml)
+[![codecov](https://codecov.io/gh/decoderesearch/SAELens/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/decoderesearch/SAELens)
 SAELens exists to help researchers:
@@ -54,7 +50,7 @@ SAELens exists to help researchers:
 - Analyse sparse autoencoders / research mechanistic interpretability.
 - Generate insights which make it easier to create safe and aligned AI systems.
-Please refer to the [documentation](https://jbloomaus.github.io/SAELens/) for information on how to:
+Please refer to the [documentation](https://decoderesearch.github.io/SAELens/) for information on how to:
 - Download and Analyse pre-trained sparse autoencoders.
 - Train your own sparse autoencoders.
@@ -62,25 +58,29 @@ Please refer to the [documentation](https://jbloomaus.github.io/SAELens/) for in
 SAE Lens is the result of many contributors working collectively to improve humanity's understanding of neural networks, many of whom are motivated by a desire to [safeguard humanity from risks posed by artificial intelligence](https://80000hours.org/problem-profiles/artificial-intelligence/).
-This library is maintained by [Joseph Bloom](https://www.jbloomaus.com/) and [David Chanin](https://github.com/chanind).
+This library is maintained by [Joseph Bloom](https://www.decoderesearch.com/), [Curt Tigges](https://curttigges.com/), [Anthony Duong](https://github.com/anthonyduong9) and [David Chanin](https://github.com/chanind).
 ## Loading Pre-trained SAEs.
-Pre-trained SAEs for various models can be imported via SAE Lens. See this [page](https://jbloomaus.github.io/SAELens/sae_table/) in the readme for a list of all SAEs.
+Pre-trained SAEs for various models can be imported via SAE Lens. See this [page](https://decoderesearch.github.io/SAELens/pretrained_saes/) for a list of all SAEs.
+## Migrating to SAELens v6
+The new v6 update is a major refactor to SAELens and changes the way training code is structured. Check out the [migration guide](https://decoderesearch.github.io/SAELens/latest/migrating/) for more details.
 ## Tutorials
-- [SAE Lens + Neuronpedia](tutorials/tutorial_2_0.ipynb)[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/tutorial_2_0.ipynb)
+- [SAE Lens + Neuronpedia](tutorials/tutorial_2_0.ipynb)[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/tutorial_2_0.ipynb)
 - [Loading and Analysing Pre-Trained Sparse Autoencoders](tutorials/basic_loading_and_analysing.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/basic_loading_and_analysing.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/basic_loading_and_analysing.ipynb)
 - [Understanding SAE Features with the Logit Lens](tutorials/logits_lens_with_features.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
 - [Training a Sparse Autoencoder](tutorials/training_a_sparse_autoencoder.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
 ## Join the Slack!
-Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-2o756ku1c-_yKBeUQMVfS_p_qcK6QLeA) for support!
+Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-375zalm04-GFd5tdBU1yLKlu_T_JSqZQ) for support!
 ## Citation
@@ -89,9 +89,9 @@ Please cite the package as follows:
 ```
 @misc{bloom2024saetrainingcodebase,
    title = {SAELens},
-   author = {Joseph Bloom, Curt Tigges and David Chanin},
+   author = {Bloom, Joseph and Tigges, Curt and Duong, Anthony and Chanin, David},
    year = {2024},
-   howpublished = {\url{https://github.com/jbloomAus/SAELens}},
+   howpublished = {\url{https://github.com/decoderesearch/SAELens}},
 }
 ```

{sae_lens-5.7.1 → sae_lens-6.25.1}/README.md RENAMED Viewed

@@ -1,12 +1,12 @@
-<img width="1308" alt="Screenshot 2024-03-21 at 3 08 28 pm" src="https://github.com/jbloomAus/mats_sae_training/assets/69127271/209012ec-a779-4036-b4be-7b7739ea87f6">
+<img width="1308" height="532" alt="saes_pic" src="https://github.com/user-attachments/assets/2a5d752f-b261-4ee4-ad5d-ebf282321371" />
 # SAE Lens
 [![PyPI](https://img.shields.io/pypi/v/sae-lens?color=blue)](https://pypi.org/project/sae-lens/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![build](https://github.com/jbloomAus/SAELens/actions/workflows/build.yml/badge.svg)](https://github.com/jbloomAus/SAELens/actions/workflows/build.yml)
-[![Deploy Docs](https://github.com/jbloomAus/SAELens/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/jbloomAus/SAELens/actions/workflows/deploy_docs.yml)
-[![codecov](https://codecov.io/gh/jbloomAus/SAELens/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/jbloomAus/SAELens)
+[![build](https://github.com/decoderesearch/SAELens/actions/workflows/build.yml/badge.svg)](https://github.com/decoderesearch/SAELens/actions/workflows/build.yml)
+[![Deploy Docs](https://github.com/decoderesearch/SAELens/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/decoderesearch/SAELens/actions/workflows/deploy_docs.yml)
+[![codecov](https://codecov.io/gh/decoderesearch/SAELens/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/decoderesearch/SAELens)
 SAELens exists to help researchers:
@@ -14,7 +14,7 @@ SAELens exists to help researchers:
 - Analyse sparse autoencoders / research mechanistic interpretability.
 - Generate insights which make it easier to create safe and aligned AI systems.
-Please refer to the [documentation](https://jbloomaus.github.io/SAELens/) for information on how to:
+Please refer to the [documentation](https://decoderesearch.github.io/SAELens/) for information on how to:
 - Download and Analyse pre-trained sparse autoencoders.
 - Train your own sparse autoencoders.
@@ -22,25 +22,29 @@ Please refer to the [documentation](https://jbloomaus.github.io/SAELens/) for in
 SAE Lens is the result of many contributors working collectively to improve humanity's understanding of neural networks, many of whom are motivated by a desire to [safeguard humanity from risks posed by artificial intelligence](https://80000hours.org/problem-profiles/artificial-intelligence/).
-This library is maintained by [Joseph Bloom](https://www.jbloomaus.com/) and [David Chanin](https://github.com/chanind).
+This library is maintained by [Joseph Bloom](https://www.decoderesearch.com/), [Curt Tigges](https://curttigges.com/), [Anthony Duong](https://github.com/anthonyduong9) and [David Chanin](https://github.com/chanind).
 ## Loading Pre-trained SAEs.
-Pre-trained SAEs for various models can be imported via SAE Lens. See this [page](https://jbloomaus.github.io/SAELens/sae_table/) in the readme for a list of all SAEs.
+Pre-trained SAEs for various models can be imported via SAE Lens. See this [page](https://decoderesearch.github.io/SAELens/pretrained_saes/) for a list of all SAEs.
+## Migrating to SAELens v6
+The new v6 update is a major refactor to SAELens and changes the way training code is structured. Check out the [migration guide](https://decoderesearch.github.io/SAELens/latest/migrating/) for more details.
 ## Tutorials
-- [SAE Lens + Neuronpedia](tutorials/tutorial_2_0.ipynb)[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/tutorial_2_0.ipynb)
+- [SAE Lens + Neuronpedia](tutorials/tutorial_2_0.ipynb)[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/tutorial_2_0.ipynb)
 - [Loading and Analysing Pre-Trained Sparse Autoencoders](tutorials/basic_loading_and_analysing.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/basic_loading_and_analysing.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/basic_loading_and_analysing.ipynb)
 - [Understanding SAE Features with the Logit Lens](tutorials/logits_lens_with_features.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
 - [Training a Sparse Autoencoder](tutorials/training_a_sparse_autoencoder.ipynb)
-  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/jbloomAus/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
+  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/decoderesearch/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
 ## Join the Slack!
-Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-2o756ku1c-_yKBeUQMVfS_p_qcK6QLeA) for support!
+Feel free to join the [Open Source Mechanistic Interpretability Slack](https://join.slack.com/t/opensourcemechanistic/shared_invite/zt-375zalm04-GFd5tdBU1yLKlu_T_JSqZQ) for support!
 ## Citation
@@ -49,8 +53,8 @@ Please cite the package as follows:
 ```
 @misc{bloom2024saetrainingcodebase,
    title = {SAELens},
-   author = {Joseph Bloom, Curt Tigges and David Chanin},
+   author = {Bloom, Joseph and Tigges, Curt and Duong, Anthony and Chanin, David},
    year = {2024},
-   howpublished = {\url{https://github.com/jbloomAus/SAELens}},
+   howpublished = {\url{https://github.com/decoderesearch/SAELens}},
 }
 ```

{sae_lens-5.7.1 → sae_lens-6.25.1}/pyproject.toml RENAMED Viewed

@@ -1,13 +1,13 @@
 [tool.poetry]
 name = "sae-lens"
-version = "5.7.1"
+version = "6.25.1"
 description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"
 packages = [{ include = "sae_lens" }]
 include = ["pretrained_saes.yaml"]
-repository = "https://github.com/jbloomAus/SAELens"
-homepage = "https://jbloomaus.github.io/SAELens"
+repository = "https://github.com/decoderesearch/SAELens"
+homepage = "https://decoderesearch.github.io/SAELens"
 license = "MIT"
 keywords = [
     "deep-learning",
@@ -19,26 +19,20 @@ classifiers = ["Topic :: Scientific/Engineering :: Artificial Intelligence"]
 [tool.poetry.dependencies]
 python = "^3.10"
-transformer-lens = "^2.0.0"
+transformer-lens = "^2.16.1"
 transformers = "^4.38.1"
-plotly = "^5.19.0"
-plotly-express = "^0.4.1"
-matplotlib = "^3.8.3"
-matplotlib-inline = "^0.1.6"
-datasets = "^2.17.1"
+plotly = ">=5.19.0"
+plotly-express = ">=0.4.1"
+datasets = ">=3.1.0"
 babe = "^0.0.7"
 nltk = "^3.8.1"
-safetensors = "^0.4.2"
-typer = "^0.12.3"
+safetensors = ">=0.4.2,<1.0.0"
 mamba-lens = { version = "^0.0.4", optional = true }
-pyzmq = "26.0.0"
-automated-interpretability = ">=0.0.5,<1.0.0"
-python-dotenv = "^1.0.1"
+python-dotenv = ">=1.0.1"
 pyyaml = "^6.0.1"
-pytest-profiling = "^1.7.0"
-zstandard = "^0.22.0"
 typing-extensions = "^4.10.0"
 simple-parsing = "^0.1.6"
+tenacity = ">=9.0.0"
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.2"
@@ -52,13 +46,17 @@ boto3 = "^1.34.101"
 docstr-coverage = "^2.3.2"
 mkdocs = "^1.6.1"
 mkdocs-material = "^9.5.34"
-mkdocs-autorefs = "^1.1.0"
+mkdocs-autorefs = "^1.4.2"
+mkdocs-redirects = "^1.2.1"
 mkdocs-section-index = "^0.3.9"
 mkdocstrings = "^0.25.2"
 mkdocstrings-python = "^1.10.9"
 tabulate = "^0.9.0"
 ruff = "^0.7.4"
-sparsify = {git = "https://github.com/EleutherAI/sparsify"}
+eai-sparsify = "^1.1.1"
+mike = "^2.0.0"
+trio = "^0.30.0"
+dictionary-learning = "^0.1.0"
 [tool.poetry.extras]
 mamba = ["mamba-lens"]
@@ -69,8 +67,9 @@ ignore = ["E203", "E501", "E731", "F722", "E741", "F821", "F403", "ARG002"]
 select = ["UP", "TID", "I", "F", "E", "ARG", "SIM", "RET", "LOG", "T20"]
 [tool.ruff.lint.per-file-ignores]
-"benchmark/*" = ["T20"]
+"benchmark/*" = ["T20", "TID251"]
 "scripts/*" = ["T20"]
+"tests/*" = ["TID251"]
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
 "typing.Union".msg = "Use `|` instead"
@@ -78,6 +77,7 @@ select = ["UP", "TID", "I", "F", "E", "ARG", "SIM", "RET", "LOG", "T20"]
 "typing.Dict".msg = "Use `dict` instead"
 "typing.Tuple".msg = "Use `tuple` instead"
 "typing.List".msg = "Use `list` instead"
+"tests".msg = "Do not import from tests in the main codebase."
 [tool.pyright]
 typeCheckingMode = "strict"
@@ -102,5 +102,5 @@ build-backend = "poetry.core.masonry.api"
 [tool.semantic_release]
 version_variables = ["sae_lens/__init__.py:__version__"]
 version_toml = ["pyproject.toml:tool.poetry.version"]
-branch = "main"
 build_command = "pip install poetry && poetry build"
+branches = { main = { match = "main" }, alpha = { match = "alpha", prerelease = true }, beta = { match = "beta", prerelease = true } }

sae_lens-6.25.1/sae_lens/__init__.py ADDED Viewed

@@ -0,0 +1,141 @@
+# ruff: noqa: E402
+__version__ = "6.25.1"
+import logging
+logger = logging.getLogger(__name__)
+from sae_lens.saes import (
+    SAE,
+    BatchTopKTrainingSAE,
+    BatchTopKTrainingSAEConfig,
+    GatedSAE,
+    GatedSAEConfig,
+    GatedTrainingSAE,
+    GatedTrainingSAEConfig,
+    JumpReLUSAE,
+    JumpReLUSAEConfig,
+    JumpReLUSkipTranscoder,
+    JumpReLUSkipTranscoderConfig,
+    JumpReLUTrainingSAE,
+    JumpReLUTrainingSAEConfig,
+    JumpReLUTranscoder,
+    JumpReLUTranscoderConfig,
+    MatryoshkaBatchTopKTrainingSAE,
+    MatryoshkaBatchTopKTrainingSAEConfig,
+    SAEConfig,
+    SkipTranscoder,
+    SkipTranscoderConfig,
+    StandardSAE,
+    StandardSAEConfig,
+    StandardTrainingSAE,
+    StandardTrainingSAEConfig,
+    TemporalSAE,
+    TemporalSAEConfig,
+    TopKSAE,
+    TopKSAEConfig,
+    TopKTrainingSAE,
+    TopKTrainingSAEConfig,
+    TrainingSAE,
+    TrainingSAEConfig,
+    Transcoder,
+    TranscoderConfig,
+)
+from .analysis.hooked_sae_transformer import HookedSAETransformer
+from .cache_activations_runner import CacheActivationsRunner
+from .config import (
+    CacheActivationsRunnerConfig,
+    LanguageModelSAERunnerConfig,
+    LoggingConfig,
+    PretokenizeRunnerConfig,
+)
+from .evals import run_evals
+from .llm_sae_training_runner import LanguageModelSAETrainingRunner, SAETrainingRunner
+from .loading.pretrained_sae_loaders import (
+    PretrainedSaeDiskLoader,
+    PretrainedSaeHuggingfaceLoader,
+)
+from .pretokenize_runner import PretokenizeRunner, pretokenize_runner
+from .registry import register_sae_class, register_sae_training_class
+from .training.activations_store import ActivationsStore
+from .training.upload_saes_to_huggingface import upload_saes_to_huggingface
+__all__ = [
+    "SAE",
+    "SAEConfig",
+    "TrainingSAE",
+    "TrainingSAEConfig",
+    "HookedSAETransformer",
+    "ActivationsStore",
+    "LanguageModelSAERunnerConfig",
+    "LanguageModelSAETrainingRunner",
+    "CacheActivationsRunnerConfig",
+    "CacheActivationsRunner",
+    "PretokenizeRunnerConfig",
+    "PretokenizeRunner",
+    "pretokenize_runner",
+    "run_evals",
+    "upload_saes_to_huggingface",
+    "PretrainedSaeHuggingfaceLoader",
+    "PretrainedSaeDiskLoader",
+    "register_sae_class",
+    "register_sae_training_class",
+    "StandardSAE",
+    "StandardSAEConfig",
+    "StandardTrainingSAE",
+    "StandardTrainingSAEConfig",
+    "GatedSAE",
+    "GatedSAEConfig",
+    "GatedTrainingSAE",
+    "GatedTrainingSAEConfig",
+    "TopKSAE",
+    "TopKSAEConfig",
+    "TopKTrainingSAE",
+    "TopKTrainingSAEConfig",
+    "JumpReLUSAE",
+    "JumpReLUSAEConfig",
+    "JumpReLUTrainingSAE",
+    "JumpReLUTrainingSAEConfig",
+    "SAETrainingRunner",
+    "LoggingConfig",
+    "BatchTopKTrainingSAE",
+    "BatchTopKTrainingSAEConfig",
+    "Transcoder",
+    "TranscoderConfig",
+    "SkipTranscoder",
+    "SkipTranscoderConfig",
+    "JumpReLUTranscoder",
+    "JumpReLUTranscoderConfig",
+    "JumpReLUSkipTranscoder",
+    "JumpReLUSkipTranscoderConfig",
+    "MatryoshkaBatchTopKTrainingSAE",
+    "MatryoshkaBatchTopKTrainingSAEConfig",
+    "TemporalSAE",
+    "TemporalSAEConfig",
+]
+register_sae_class("standard", StandardSAE, StandardSAEConfig)
+register_sae_training_class("standard", StandardTrainingSAE, StandardTrainingSAEConfig)
+register_sae_class("gated", GatedSAE, GatedSAEConfig)
+register_sae_training_class("gated", GatedTrainingSAE, GatedTrainingSAEConfig)
+register_sae_class("topk", TopKSAE, TopKSAEConfig)
+register_sae_training_class("topk", TopKTrainingSAE, TopKTrainingSAEConfig)
+register_sae_class("jumprelu", JumpReLUSAE, JumpReLUSAEConfig)
+register_sae_training_class("jumprelu", JumpReLUTrainingSAE, JumpReLUTrainingSAEConfig)
+register_sae_training_class(
+    "batchtopk", BatchTopKTrainingSAE, BatchTopKTrainingSAEConfig
+)
+register_sae_training_class(
+    "matryoshka_batchtopk",
+    MatryoshkaBatchTopKTrainingSAE,
+    MatryoshkaBatchTopKTrainingSAEConfig,
+)
+register_sae_class("transcoder", Transcoder, TranscoderConfig)
+register_sae_class("skip_transcoder", SkipTranscoder, SkipTranscoderConfig)
+register_sae_class("jumprelu_transcoder", JumpReLUTranscoder, JumpReLUTranscoderConfig)
+register_sae_class(
+    "jumprelu_skip_transcoder", JumpReLUSkipTranscoder, JumpReLUSkipTranscoderConfig
+)
+register_sae_class("temporal", TemporalSAE, TemporalSAEConfig)

{sae_lens-5.7.1 → sae_lens-6.25.1}/sae_lens/analysis/hooked_sae_transformer.py RENAMED Viewed

@@ -3,15 +3,15 @@ from contextlib import contextmanager
 from typing import Any, Callable
 import torch
-from jaxtyping import Float
 from transformer_lens.ActivationCache import ActivationCache
+from transformer_lens.components.mlps.can_be_used_as_mlp import CanBeUsedAsMLP
 from transformer_lens.hook_points import HookPoint  # Hooking utilities
 from transformer_lens.HookedTransformer import HookedTransformer
-from sae_lens.sae import SAE
+from sae_lens.saes.sae import SAE
-SingleLoss = Float[torch.Tensor, ""]  # Type alias for a single element tensor
-LossPerToken = Float[torch.Tensor, "batch pos-1"]
+SingleLoss = torch.Tensor  # Type alias for a single element tensor
+LossPerToken = torch.Tensor
 Loss = SingleLoss | LossPerToken
@@ -50,6 +50,13 @@ def set_deep_attr(obj: Any, path: str, value: Any):
     setattr(obj, parts[-1], value)
+def add_hook_in_to_mlp(mlp: CanBeUsedAsMLP):
+    # Temporary hack to add a `mlp.hook_in` hook to mimic what's in circuit-tracer
+    mlp.hook_in = HookPoint()
+    original_forward = mlp.forward
+    mlp.forward = lambda x: original_forward(mlp.hook_in(x))  # type: ignore
 class HookedSAETransformer(HookedTransformer):
     def __init__(
         self,
@@ -66,9 +73,14 @@ class HookedSAETransformer(HookedTransformer):
             **model_kwargs: Keyword arguments for HookedTransformer initialization
         """
         super().__init__(*model_args, **model_kwargs)
+        for block in self.blocks:
+            add_hook_in_to_mlp(block.mlp)  # type: ignore
+        self.setup()
         self.acts_to_saes: dict[str, SAE] = {}  # type: ignore
-    def add_sae(self, sae: SAE, use_error_term: bool | None = None):
+    def add_sae(self, sae: SAE[Any], use_error_term: bool | None = None):
         """Attaches an SAE to the model
         WARNING: This sae will be permanantly attached until you remove it with reset_saes. This function will also overwrite any existing SAE attached to the same hook point.
@@ -77,7 +89,7 @@ class HookedSAETransformer(HookedTransformer):
             sae: SparseAutoencoderBase. The SAE to attach to the model
             use_error_term: (bool | None) If provided, will set the use_error_term attribute of the SAE to this value. Determines whether the SAE returns input or reconstruction. Defaults to None.
         """
-        act_name = sae.cfg.hook_name
+        act_name = sae.cfg.metadata.hook_name
         if (act_name not in self.acts_to_saes) and (act_name not in self.hook_dict):
             logging.warning(
                 f"No hook found for {act_name}. Skipping. Check model.hook_dict for available hooks."
@@ -92,7 +104,7 @@ class HookedSAETransformer(HookedTransformer):
         set_deep_attr(self, act_name, sae)
         self.setup()
-    def _reset_sae(self, act_name: str, prev_sae: SAE | None = None):
+    def _reset_sae(self, act_name: str, prev_sae: SAE[Any] | None = None):
         """Resets an SAE that was attached to the model
         By default will remove the SAE from that hook_point.
@@ -124,7 +136,7 @@ class HookedSAETransformer(HookedTransformer):
     def reset_saes(
         self,
         act_names: str | list[str] | None = None,
-        prev_saes: list[SAE | None] | None = None,
+        prev_saes: list[SAE[Any] | None] | None = None,
     ):
         """Reset the SAEs attached to the model
@@ -154,16 +166,11 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         **model_kwargs: Any,
-    ) -> (
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss]
-    ):
+    ) -> None | torch.Tensor | Loss | tuple[torch.Tensor, Loss]:
         """Wrapper around HookedTransformer forward pass.
         Runs the model with the given SAEs attached for one forward pass, then removes them. By default, will reset all SAEs to original state after.
@@ -183,17 +190,14 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_cache_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
         return_cache_object: bool = True,
         remove_batch_dim: bool = False,
         **kwargs: Any,
     ) -> tuple[
-        None
-        | Float[torch.Tensor, "batch pos d_vocab"]
-        | Loss
-        | tuple[Float[torch.Tensor, "batch pos d_vocab"], Loss],
+        None | torch.Tensor | Loss | tuple[torch.Tensor, Loss],
         ActivationCache | dict[str, torch.Tensor],
     ]:
         """Wrapper around 'run_with_cache' in HookedTransformer.
@@ -225,7 +229,7 @@ class HookedSAETransformer(HookedTransformer):
     def run_with_hooks_with_saes(
         self,
         *model_args: Any,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         fwd_hooks: list[tuple[str | Callable, Callable]] = [],  # type: ignore
         bwd_hooks: list[tuple[str | Callable, Callable]] = [],  # type: ignore
@@ -261,7 +265,7 @@ class HookedSAETransformer(HookedTransformer):
     @contextmanager
     def saes(
         self,
-        saes: SAE | list[SAE] = [],
+        saes: SAE[Any] | list[SAE[Any]] = [],
         reset_saes_end: bool = True,
         use_error_term: bool | None = None,
     ):
@@ -275,7 +279,7 @@ class HookedSAETransformer(HookedTransformer):
         .. code-block:: python
             from transformer_lens import HookedSAETransformer
-            from sae_lens.sae import SAE
+            from sae_lens.saes.sae import SAE
             model = HookedSAETransformer.from_pretrained('gpt2-small')
             sae_cfg = SAEConfig(...)
@@ -295,8 +299,8 @@ class HookedSAETransformer(HookedTransformer):
             saes = [saes]
         try:
             for sae in saes:
-                act_names_to_reset.append(sae.cfg.hook_name)
-                prev_sae = self.acts_to_saes.get(sae.cfg.hook_name, None)
+                act_names_to_reset.append(sae.cfg.metadata.hook_name)
+                prev_sae = self.acts_to_saes.get(sae.cfg.metadata.hook_name, None)
                 prev_saes.append(prev_sae)
                 self.add_sae(sae, use_error_term=use_error_term)
             yield self

sae-lens 5.7.1__tar.gz → 6.25.1__tar.gz

Potentially problematic release.

sae-lens 5.7.1tar.gz → 6.25.1tar.gz