mi-crow 1.0.0__tar.gz → 1.0.0.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mi_crow-1.0.0.post2/.env.example +9 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.github/workflows/tests.yml +1 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.gitignore +41 -1
- mi_crow-1.0.0.post2/.uv-bin/uv +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/PKG-INFO +191 -5
- mi_crow-1.0.0.post2/README.md +268 -0
- mi_crow-1.0.0.post2/coverage.json +1 -0
- mi_crow-1.0.0.post2/docs/api/datasets.md +56 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/best-practices.md +8 -4
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/core-concepts.md +10 -4
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/advanced.md +5 -5
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/controllers.md +3 -3
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/detectors.md +6 -6
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/fundamentals.md +6 -6
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/index.md +1 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/hooks/registration.md +8 -8
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/installation.md +7 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/quickstart.md +15 -6
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/troubleshooting.md +24 -6
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/activation-control.md +7 -7
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/concept-discovery.md +3 -3
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/concept-manipulation.md +14 -14
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/saving-activations.md +38 -1
- mi_crow-1.0.0.post2/docs/index.md +223 -0
- mi_crow-1.0.0.post2/docs/logo.svg +223 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/mkdocs.yml +16 -2
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/pyproject.toml +28 -3
- mi_crow-1.0.0.post2/slurm/prepare_datasets.sh +63 -0
- mi_crow-1.0.0.post2/slurm/run_baseline_guards_cpu.sh +56 -0
- mi_crow-1.0.0.post2/slurm/run_baseline_guards_gpu.sh +57 -0
- mi_crow-1.0.0.post2/slurm/run_debug_bielik_padding_cpu.sh +53 -0
- mi_crow-1.0.0.post2/slurm/run_debug_llama_chat_template_cpu.sh +49 -0
- mi_crow-1.0.0.post2/slurm/run_direct_prompting.sh +105 -0
- mi_crow-1.0.0.post2/slurm/run_direct_prompting_cpu.sh +103 -0
- mi_crow-1.0.0.post2/slurm/save_activations.sh +73 -0
- mi_crow-1.0.0.post2/slurm/test_activation_saving.sh +29 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507262.err +1 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507262.out +3 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507265.err +1 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507265.out +15 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507266.err +67 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507266.out +12 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507271.err +8 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507271.out +13 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507272.err +59 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507272.out +13 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507277.err +20 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507277.out +6 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507283.err +114 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507283.out +6 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507284.err +647 -0
- mi_crow-1.0.0.post2/slurm-logs/sae_save_activations-1507284.out +7 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/datasets/base_dataset.py +71 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/datasets/classification_dataset.py +136 -30
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/datasets/text_dataset.py +165 -24
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/controller.py +12 -7
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/implementations/layer_activation_detector.py +30 -34
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/implementations/model_input_detector.py +87 -87
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/implementations/model_output_detector.py +43 -42
- mi_crow-1.0.0.post2/src/mi_crow/hooks/utils.py +150 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/activations.py +174 -77
- mi_crow-1.0.0.post2/src/mi_crow/language_model/device_manager.py +119 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/inference.py +18 -5
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/initialization.py +10 -6
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/language_model.py +67 -97
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/layers.py +16 -13
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/persistence.py +4 -2
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/utils.py +5 -5
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/concepts/autoencoder_concepts.py +157 -95
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/concepts/concept_dictionary.py +12 -2
- mi_crow-1.0.0.post2/src/mi_crow/mechanistic/sae/concepts/text_heap.py +161 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/modules/topk_sae.py +29 -22
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/sae.py +3 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/sae_trainer.py +362 -29
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/store/local_store.py +11 -5
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/store/store.py +34 -1
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow.egg-info/PKG-INFO +191 -5
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow.egg-info/SOURCES.txt +31 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow.egg-info/requires.txt +1 -0
- mi_crow-1.0.0.post2/uv.lock +5552 -0
- mi_crow-1.0.0/README.md +0 -86
- mi_crow-1.0.0/docs/api/datasets.md +0 -6
- mi_crow-1.0.0/docs/index.md +0 -105
- mi_crow-1.0.0/src/mi_crow/hooks/utils.py +0 -76
- mi_crow-1.0.0/uv.lock +0 -5149
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.github/workflows/docs.yml +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.github/workflows/publish.yml +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.pre-commit-config.yaml +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/.python-version +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/MANIFEST.in +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api/hooks.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api/index.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api/language_model.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api/sae.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api/store.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/api.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/experiments/index.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/experiments/slurm-pipeline.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/experiments/verify-sae-training.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/examples.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/index.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/index.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/docs/guide/workflows/training-sae.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/raport_implementacji.md +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/setup.cfg +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/404.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/datasets/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/hooks/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/language_model/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/sae/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/api/store/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/_mkdocstrings.css +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/images/favicon.png +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/bundle.e71a0d61.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/bundle.e71a0d61.min.js.map +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/tinyseg.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/lunr/wordcut.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/workers/search.7a47a382.min.js +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/javascripts/workers/search.7a47a382.min.js.map +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/stylesheets/main.618322db.min.css +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/stylesheets/main.618322db.min.css.map +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/index.html +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/objects.inv +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/search/search_index.json +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/sitemap.xml +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/site/sitemap.xml.gz +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/datasets/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/datasets/loading_strategy.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/detector.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/hook.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/implementations/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/hooks/implementations/function_controller.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/context.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/contracts.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/hook_metadata.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/language_model/tokenizer.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/autoencoder_context.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/concepts/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/concepts/concept_models.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/concepts/input_tracker.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/modules/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/modules/l1_sae.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/training/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/mechanistic/sae/training/wandb_logger.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/store/__init__.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/store/store_dataloader.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow/utils.py +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow.egg-info/dependency_links.txt +0 -0
- {mi_crow-1.0.0 → mi_crow-1.0.0.post2}/src/mi_crow.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Copy this file to .env and fill in your actual values
|
|
2
|
+
|
|
3
|
+
# Wandb API Key for experiment tracking
|
|
4
|
+
# Get your API key from https://wandb.ai/settings
|
|
5
|
+
SERVER_WANDB_API_KEY=your_wandb_api_key_here
|
|
6
|
+
|
|
7
|
+
# Default Wandb project name (used if not specified in training config)
|
|
8
|
+
# Can also be set via WANDB_PROJECT environment variable
|
|
9
|
+
SERVER_WANDB_PROJECT=sae-training
|
|
@@ -92,15 +92,29 @@ wandb
|
|
|
92
92
|
outputs
|
|
93
93
|
/store/
|
|
94
94
|
/examples/store/
|
|
95
|
+
/experiments/store/
|
|
95
96
|
/playground/store/
|
|
96
97
|
/tests/unit/store
|
|
97
98
|
/tests/store
|
|
98
99
|
|
|
99
|
-
#
|
|
100
|
+
# Github instructions
|
|
101
|
+
.github/instructions/
|
|
102
|
+
|
|
103
|
+
# Experiments
|
|
100
104
|
.llm_context/
|
|
105
|
+
.local_results/
|
|
101
106
|
results/
|
|
102
107
|
# Exclude data store directories but NOT src/mi_crow/store/ (package code)
|
|
103
108
|
/experiments/*/store/
|
|
109
|
+
/experiments/store/
|
|
110
|
+
|
|
111
|
+
.local_results/
|
|
112
|
+
.local_activations/
|
|
113
|
+
# All csv files from experiments/pl_dataset_curation folder
|
|
114
|
+
/experiments/pl_dataset_curation/*.csv
|
|
115
|
+
/experiments/pl_dataset_curation/*.md
|
|
116
|
+
# Dataset CSV files
|
|
117
|
+
data/
|
|
104
118
|
|
|
105
119
|
# frontend
|
|
106
120
|
src/frontend/node_modules/
|
|
@@ -109,3 +123,29 @@ src/frontend/build/
|
|
|
109
123
|
src/frontend/.next/
|
|
110
124
|
|
|
111
125
|
mi_crow_server
|
|
126
|
+
slurm-logs/
|
|
127
|
+
|
|
128
|
+
# SLURM SAE Pipeline output directories
|
|
129
|
+
experiments/slurm_sae_pipeline/logs/
|
|
130
|
+
experiments/slurm_sae_pipeline/hardware_monitoring_output/
|
|
131
|
+
experiments/slurm_sae_pipeline/inference_results/
|
|
132
|
+
experiments/slurm_sae_pipeline/interactive_naming_results/
|
|
133
|
+
experiments/slurm_sae_pipeline/performance_analysis/
|
|
134
|
+
experiments/slurm_sae_pipeline/results/
|
|
135
|
+
experiments/slurm_sae_pipeline/store/
|
|
136
|
+
experiments/slurm_sae_pipeline/test_inference_output/
|
|
137
|
+
experiments/slurm_sae_pipeline/test_steering_output/
|
|
138
|
+
experiments/slurm_sae_pipeline/test_top_texts/
|
|
139
|
+
|
|
140
|
+
# Generated metrics and reports
|
|
141
|
+
experiments/slurm_sae_pipeline/*_metrics.json
|
|
142
|
+
experiments/slurm_sae_pipeline/dictionaries/**/concepts_report.txt
|
|
143
|
+
experiments/verify_sae_training/*.ipynb
|
|
144
|
+
|
|
145
|
+
# Coverage files
|
|
146
|
+
.coverage.*
|
|
147
|
+
.coverage
|
|
148
|
+
|
|
149
|
+
# uv package manager
|
|
150
|
+
.uv-bin/
|
|
151
|
+
.uv-cache/
|
|
Binary file
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mi-crow
|
|
3
|
-
Version: 1.0.0
|
|
4
|
-
Summary:
|
|
5
|
-
Author
|
|
3
|
+
Version: 1.0.0.post2
|
|
4
|
+
Summary: Python library for mechanistic interpretability research on Large Language Models. Designed for researchers, provides unified interface for SAE training, activation hooks, and concept manipulation.
|
|
5
|
+
Author: Hubert Kowalski
|
|
6
|
+
Author-email: Adam Kaniasty <adam.kaniasty@gmail.com>
|
|
7
|
+
Maintainer: Hubert Kowalski
|
|
8
|
+
Maintainer-email: Adam Kaniasty <adam.kaniasty@gmail.com>
|
|
6
9
|
Requires-Python: >=3.10
|
|
7
10
|
Description-Content-Type: text/markdown
|
|
8
11
|
Requires-Dist: accelerate>=1.10.1
|
|
@@ -19,6 +22,7 @@ Requires-Dist: wandb>=0.22.1
|
|
|
19
22
|
Requires-Dist: pytest>=8.4.2
|
|
20
23
|
Requires-Dist: pytest-xdist>=3.8.0
|
|
21
24
|
Requires-Dist: seaborn>=0.13.2
|
|
25
|
+
Requires-Dist: numpy<2.0,>=1.20.0
|
|
22
26
|
Provides-Extra: dev
|
|
23
27
|
Requires-Dist: pre-commit>=4.3.0; extra == "dev"
|
|
24
28
|
Requires-Dist: ruff>=0.13.2; extra == "dev"
|
|
@@ -36,10 +40,153 @@ Requires-Dist: mkdocs-literate-nav>=0.6; extra == "docs"
|
|
|
36
40
|
Requires-Dist: mkdocs-gen-files>=0.5; extra == "docs"
|
|
37
41
|
Requires-Dist: mike>=2.1; extra == "docs"
|
|
38
42
|
|
|
39
|
-
|
|
43
|
+
<div align="center">
|
|
44
|
+
<img src="docs/logo.svg" alt="Mi-Crow Logo" width="200">
|
|
45
|
+
</div>
|
|
46
|
+
|
|
47
|
+
# Mi-Crow: Mechanistic Interpretability for Large Language Models
|
|
48
|
+
|
|
49
|
+
[](https://github.com/AdamKaniasty/Inzynierka/actions)
|
|
50
|
+
[](https://pypi.org/project/mi-crow/)
|
|
40
51
|
[](https://adamkaniasty.github.io/Inzynierka/)
|
|
41
52
|
|
|
42
|
-
|
|
53
|
+
**Mi-Crow** is a Python library for mechanistic interpretability research on Large Language Models (LLMs). Designed for researchers, it provides a unified interface for analyzing and controlling model behavior through Sparse Autoencoders (SAEs), activation hooks, and concept manipulation.
|
|
54
|
+
|
|
55
|
+
## Features
|
|
56
|
+
|
|
57
|
+
- **Unified Model Interface** - Work with any HuggingFace language model through a consistent API
|
|
58
|
+
- **Sparse Autoencoder Training** - Train SAEs to extract interpretable features from model activations
|
|
59
|
+
- **Hook System** - Intercept and manipulate model activations with minimal performance overhead
|
|
60
|
+
- **Concept Discovery & Manipulation** - Discover and control model behavior through learned concepts
|
|
61
|
+
- **Hierarchical Data Persistence** - Efficient storage and management of large-scale experiment data
|
|
62
|
+
- **Research Focused** - Comprehensive testing (85%+ coverage), extensive documentation, and designed for interpretability research workflows
|
|
63
|
+
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
### Install from PyPI
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install mi-crow
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Install from Source
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
git clone https://github.com/AdamKaniasty/Mi-Crow.git
|
|
76
|
+
cd Mi-Crow
|
|
77
|
+
pip install -e .
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Requirements
|
|
81
|
+
|
|
82
|
+
- **Python 3.12+** (required for modern type hints and features)
|
|
83
|
+
- **PyTorch** - Tensor operations and neural networks
|
|
84
|
+
- **Transformers** - Model loading and tokenization
|
|
85
|
+
- **Accelerate** - Distributed and mixed-precision training
|
|
86
|
+
- **Datasets** - Dataset loading and processing
|
|
87
|
+
- **overcomplete** - SAE implementations
|
|
88
|
+
|
|
89
|
+
## Quick Start
|
|
90
|
+
|
|
91
|
+
### Basic Usage
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from mi_crow.language_model import LanguageModel
|
|
95
|
+
|
|
96
|
+
# Initialize a language model
|
|
97
|
+
lm = LanguageModel(model_id="bielik")
|
|
98
|
+
|
|
99
|
+
# Run inference
|
|
100
|
+
outputs = lm.forwards(["Hello, world!"])
|
|
101
|
+
|
|
102
|
+
# Access activations and outputs
|
|
103
|
+
print(outputs.logits)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Training an SAE
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from mi_crow.language_model import LanguageModel
|
|
110
|
+
from mi_crow.mechanistic.sae import SaeTrainer
|
|
111
|
+
from mi_crow.mechanistic.sae.modules import TopKSae
|
|
112
|
+
|
|
113
|
+
# Load model and collect activations
|
|
114
|
+
lm = LanguageModel(model_id="bielik")
|
|
115
|
+
activations = lm.save_activations(
|
|
116
|
+
dataset=["Your text data here"],
|
|
117
|
+
layers=["transformer_h_0_attn_c_attn"]
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Train SAE
|
|
121
|
+
trainer = SaeTrainer(
|
|
122
|
+
model=lm,
|
|
123
|
+
layer="transformer_h_0_attn_c_attn",
|
|
124
|
+
sae_class=TopKSae,
|
|
125
|
+
hyperparams={"epochs": 10, "batch_size": 256}
|
|
126
|
+
)
|
|
127
|
+
sae = trainer.train(activations)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Concept Manipulation
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
# Load concepts and manipulate model behavior
|
|
134
|
+
concepts = lm.load_concepts(sae_id="your_sae_id")
|
|
135
|
+
concepts.manipulate(neuron_idx=0, scale_factor=1.5)
|
|
136
|
+
|
|
137
|
+
# Run inference with concept manipulation
|
|
138
|
+
outputs = lm.forwards(
|
|
139
|
+
["Your prompt"],
|
|
140
|
+
with_controllers=True,
|
|
141
|
+
concept_config=concepts
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Documentation
|
|
146
|
+
|
|
147
|
+
- **Full Documentation**: [adamkaniasty.github.io/Inzynierka](https://adamkaniasty.github.io/Inzynierka/)
|
|
148
|
+
- **GitHub Repository**: [github.com/AdamKaniasty/Mi-Crow](https://github.com/AdamKaniasty/Mi-Crow/)
|
|
149
|
+
- **Example Notebooks**: See `examples/` directory for Jupyter notebook tutorials
|
|
150
|
+
|
|
151
|
+
## Architecture
|
|
152
|
+
|
|
153
|
+
Mi-Crow follows a modular design with five core components:
|
|
154
|
+
|
|
155
|
+
1. **`language_model/`** - Unified interface for language models
|
|
156
|
+
- Model initialization from HuggingFace Hub or local files
|
|
157
|
+
- Unified inference interface with mixed-precision support
|
|
158
|
+
- Architecture-agnostic layer abstraction
|
|
159
|
+
|
|
160
|
+
2. **`hooks/`** - Flexible hook system for activation interception
|
|
161
|
+
- Detectors for observing activations
|
|
162
|
+
- Controllers for modifying model behavior
|
|
163
|
+
- Support for FORWARD and PRE_FORWARD hooks
|
|
164
|
+
|
|
165
|
+
3. **`mechanistic/`** - SAE training and concept manipulation
|
|
166
|
+
- Sparse Autoencoder training (TopK, L1 variants)
|
|
167
|
+
- Concept dictionary management
|
|
168
|
+
- Concept-based model steering
|
|
169
|
+
|
|
170
|
+
4. **`store/`** - Hierarchical data persistence
|
|
171
|
+
- Efficient tensor storage in safetensors format
|
|
172
|
+
- Batch iteration for large datasets
|
|
173
|
+
- Metadata management
|
|
174
|
+
|
|
175
|
+
5. **`datasets/`** - Dataset loading and processing
|
|
176
|
+
- HuggingFace dataset integration
|
|
177
|
+
- Local file dataset support
|
|
178
|
+
|
|
179
|
+
## Example Workflow
|
|
180
|
+
|
|
181
|
+
See the example notebooks in the `examples/` directory:
|
|
182
|
+
|
|
183
|
+
1. **`01_train_sae_model.ipynb`** - Train an SAE on model activations
|
|
184
|
+
2. **`02_attach_sae_and_save_texts.ipynb`** - Collect top activating texts
|
|
185
|
+
3. **`03_load_concepts.ipynb`** - Load and manipulate concepts
|
|
186
|
+
|
|
187
|
+
## Development
|
|
188
|
+
|
|
189
|
+
### Running Tests
|
|
43
190
|
|
|
44
191
|
The project uses pytest for testing. Tests are organized into unit tests and end-to-end tests.
|
|
45
192
|
|
|
@@ -77,6 +224,45 @@ pytest tests/e2e -q
|
|
|
77
224
|
|
|
78
225
|
The test suite is configured to require at least 85% code coverage. Coverage reports are generated in both terminal and XML formats.
|
|
79
226
|
|
|
227
|
+
The project maintains **85%+ code coverage** requirement.
|
|
228
|
+
|
|
229
|
+
### Code Quality
|
|
230
|
+
|
|
231
|
+
- **Linting**: Ruff for code formatting and linting
|
|
232
|
+
- **Pre-commit Hooks**: Automated quality checks
|
|
233
|
+
- **Type Hints**: Extensive use of Python type annotations
|
|
234
|
+
- **CI/CD**: GitHub Actions for automated testing and deployment
|
|
235
|
+
|
|
236
|
+
## Citation
|
|
237
|
+
|
|
238
|
+
If you use Mi-Crow in your research, please cite:
|
|
239
|
+
|
|
240
|
+
```bibtex
|
|
241
|
+
@thesis{kaniasty2025microw,
|
|
242
|
+
title={Mechanistic Interpretability for Large Language Models: A Production-Ready Framework},
|
|
243
|
+
author={Kaniasty, Adam and Kowalski, Hubert},
|
|
244
|
+
year={2025},
|
|
245
|
+
school={Warsaw University of Technology},
|
|
246
|
+
note={Engineering Thesis}
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## License
|
|
251
|
+
|
|
252
|
+
See the main repository for license information: [Mi-Crow License](https://github.com/AdamKaniasty/Mi-Crow/)
|
|
253
|
+
|
|
254
|
+
## Contact
|
|
255
|
+
|
|
256
|
+
- **Maintainers**: Adam Kaniasty, Hubert Kowalski
|
|
257
|
+
- **Email**: adam.kaniasty@gmail.com
|
|
258
|
+
- **GitHub**: [@AdamKaniasty](https://github.com/AdamKaniasty)
|
|
259
|
+
|
|
260
|
+
## Acknowledgments
|
|
261
|
+
|
|
262
|
+
This work was developed in collaboration with the **Bielik** team and represents a contribution to the open-source mechanistic interpretability community.
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
80
266
|
## Backend (FastAPI) quickstart
|
|
81
267
|
|
|
82
268
|
Install server-only dependencies (kept out of the core library) with uv:
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="docs/logo.svg" alt="Mi-Crow Logo" width="200">
|
|
3
|
+
</div>
|
|
4
|
+
|
|
5
|
+
# Mi-Crow: Mechanistic Interpretability for Large Language Models
|
|
6
|
+
|
|
7
|
+
[](https://github.com/AdamKaniasty/Inzynierka/actions)
|
|
8
|
+
[](https://pypi.org/project/mi-crow/)
|
|
9
|
+
[](https://adamkaniasty.github.io/Inzynierka/)
|
|
10
|
+
|
|
11
|
+
**Mi-Crow** is a Python library for mechanistic interpretability research on Large Language Models (LLMs). Designed for researchers, it provides a unified interface for analyzing and controlling model behavior through Sparse Autoencoders (SAEs), activation hooks, and concept manipulation.
|
|
12
|
+
|
|
13
|
+
## Features
|
|
14
|
+
|
|
15
|
+
- **Unified Model Interface** - Work with any HuggingFace language model through a consistent API
|
|
16
|
+
- **Sparse Autoencoder Training** - Train SAEs to extract interpretable features from model activations
|
|
17
|
+
- **Hook System** - Intercept and manipulate model activations with minimal performance overhead
|
|
18
|
+
- **Concept Discovery & Manipulation** - Discover and control model behavior through learned concepts
|
|
19
|
+
- **Hierarchical Data Persistence** - Efficient storage and management of large-scale experiment data
|
|
20
|
+
- **Research Focused** - Comprehensive testing (85%+ coverage), extensive documentation, and designed for interpretability research workflows
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
### Install from PyPI
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install mi-crow
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Install from Source
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
git clone https://github.com/AdamKaniasty/Mi-Crow.git
|
|
34
|
+
cd Mi-Crow
|
|
35
|
+
pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Requirements
|
|
39
|
+
|
|
40
|
+
- **Python 3.12+** (required for modern type hints and features)
|
|
41
|
+
- **PyTorch** - Tensor operations and neural networks
|
|
42
|
+
- **Transformers** - Model loading and tokenization
|
|
43
|
+
- **Accelerate** - Distributed and mixed-precision training
|
|
44
|
+
- **Datasets** - Dataset loading and processing
|
|
45
|
+
- **overcomplete** - SAE implementations
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
### Basic Usage
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from mi_crow.language_model import LanguageModel
|
|
53
|
+
|
|
54
|
+
# Initialize a language model
|
|
55
|
+
lm = LanguageModel(model_id="bielik")
|
|
56
|
+
|
|
57
|
+
# Run inference
|
|
58
|
+
outputs = lm.forwards(["Hello, world!"])
|
|
59
|
+
|
|
60
|
+
# Access activations and outputs
|
|
61
|
+
print(outputs.logits)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Training an SAE
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from mi_crow.language_model import LanguageModel
|
|
68
|
+
from mi_crow.mechanistic.sae import SaeTrainer
|
|
69
|
+
from mi_crow.mechanistic.sae.modules import TopKSae
|
|
70
|
+
|
|
71
|
+
# Load model and collect activations
|
|
72
|
+
lm = LanguageModel(model_id="bielik")
|
|
73
|
+
activations = lm.save_activations(
|
|
74
|
+
dataset=["Your text data here"],
|
|
75
|
+
layers=["transformer_h_0_attn_c_attn"]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Train SAE
|
|
79
|
+
trainer = SaeTrainer(
|
|
80
|
+
model=lm,
|
|
81
|
+
layer="transformer_h_0_attn_c_attn",
|
|
82
|
+
sae_class=TopKSae,
|
|
83
|
+
hyperparams={"epochs": 10, "batch_size": 256}
|
|
84
|
+
)
|
|
85
|
+
sae = trainer.train(activations)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Concept Manipulation
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# Load concepts and manipulate model behavior
|
|
92
|
+
concepts = lm.load_concepts(sae_id="your_sae_id")
|
|
93
|
+
concepts.manipulate(neuron_idx=0, scale_factor=1.5)
|
|
94
|
+
|
|
95
|
+
# Run inference with concept manipulation
|
|
96
|
+
outputs = lm.forwards(
|
|
97
|
+
["Your prompt"],
|
|
98
|
+
with_controllers=True,
|
|
99
|
+
concept_config=concepts
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Documentation
|
|
104
|
+
|
|
105
|
+
- **Full Documentation**: [adamkaniasty.github.io/Inzynierka](https://adamkaniasty.github.io/Inzynierka/)
|
|
106
|
+
- **GitHub Repository**: [github.com/AdamKaniasty/Mi-Crow](https://github.com/AdamKaniasty/Mi-Crow/)
|
|
107
|
+
- **Example Notebooks**: See `examples/` directory for Jupyter notebook tutorials
|
|
108
|
+
|
|
109
|
+
## Architecture
|
|
110
|
+
|
|
111
|
+
Mi-Crow follows a modular design with five core components:
|
|
112
|
+
|
|
113
|
+
1. **`language_model/`** - Unified interface for language models
|
|
114
|
+
- Model initialization from HuggingFace Hub or local files
|
|
115
|
+
- Unified inference interface with mixed-precision support
|
|
116
|
+
- Architecture-agnostic layer abstraction
|
|
117
|
+
|
|
118
|
+
2. **`hooks/`** - Flexible hook system for activation interception
|
|
119
|
+
- Detectors for observing activations
|
|
120
|
+
- Controllers for modifying model behavior
|
|
121
|
+
- Support for FORWARD and PRE_FORWARD hooks
|
|
122
|
+
|
|
123
|
+
3. **`mechanistic/`** - SAE training and concept manipulation
|
|
124
|
+
- Sparse Autoencoder training (TopK, L1 variants)
|
|
125
|
+
- Concept dictionary management
|
|
126
|
+
- Concept-based model steering
|
|
127
|
+
|
|
128
|
+
4. **`store/`** - Hierarchical data persistence
|
|
129
|
+
- Efficient tensor storage in safetensors format
|
|
130
|
+
- Batch iteration for large datasets
|
|
131
|
+
- Metadata management
|
|
132
|
+
|
|
133
|
+
5. **`datasets/`** - Dataset loading and processing
|
|
134
|
+
- HuggingFace dataset integration
|
|
135
|
+
- Local file dataset support
|
|
136
|
+
|
|
137
|
+
## Example Workflow
|
|
138
|
+
|
|
139
|
+
See the example notebooks in the `examples/` directory:
|
|
140
|
+
|
|
141
|
+
1. **`01_train_sae_model.ipynb`** - Train an SAE on model activations
|
|
142
|
+
2. **`02_attach_sae_and_save_texts.ipynb`** - Collect top activating texts
|
|
143
|
+
3. **`03_load_concepts.ipynb`** - Load and manipulate concepts
|
|
144
|
+
|
|
145
|
+
## Development
|
|
146
|
+
|
|
147
|
+
### Running Tests
|
|
148
|
+
|
|
149
|
+
The project uses pytest for testing. Tests are organized into unit tests and end-to-end tests.
|
|
150
|
+
|
|
151
|
+
### Running All Tests
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pytest
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Running Specific Test Suites
|
|
158
|
+
|
|
159
|
+
Run only unit tests:
|
|
160
|
+
```bash
|
|
161
|
+
pytest --unit -q
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Run only end-to-end tests:
|
|
165
|
+
```bash
|
|
166
|
+
pytest --e2e -q
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
You can also use pytest markers:
|
|
170
|
+
```bash
|
|
171
|
+
pytest -m unit -q
|
|
172
|
+
pytest -m e2e -q
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Or specify the test directory directly:
|
|
176
|
+
```bash
|
|
177
|
+
pytest tests/unit -q
|
|
178
|
+
pytest tests/e2e -q
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Test Coverage
|
|
182
|
+
|
|
183
|
+
The test suite is configured to require at least 85% code coverage. Coverage reports are generated in both terminal and XML formats.
|
|
184
|
+
|
|
185
|
+
The project maintains **85%+ code coverage** requirement.
|
|
186
|
+
|
|
187
|
+
### Code Quality
|
|
188
|
+
|
|
189
|
+
- **Linting**: Ruff for code formatting and linting
|
|
190
|
+
- **Pre-commit Hooks**: Automated quality checks
|
|
191
|
+
- **Type Hints**: Extensive use of Python type annotations
|
|
192
|
+
- **CI/CD**: GitHub Actions for automated testing and deployment
|
|
193
|
+
|
|
194
|
+
## Citation
|
|
195
|
+
|
|
196
|
+
If you use Mi-Crow in your research, please cite:
|
|
197
|
+
|
|
198
|
+
```bibtex
|
|
199
|
+
@thesis{kaniasty2025microw,
|
|
200
|
+
title={Mechanistic Interpretability for Large Language Models: A Production-Ready Framework},
|
|
201
|
+
author={Kaniasty, Adam and Kowalski, Hubert},
|
|
202
|
+
year={2025},
|
|
203
|
+
school={Warsaw University of Technology},
|
|
204
|
+
note={Engineering Thesis}
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## License
|
|
209
|
+
|
|
210
|
+
See the main repository for license information: [Mi-Crow License](https://github.com/AdamKaniasty/Mi-Crow/)
|
|
211
|
+
|
|
212
|
+
## Contact
|
|
213
|
+
|
|
214
|
+
- **Maintainers**: Adam Kaniasty, Hubert Kowalski
|
|
215
|
+
- **Email**: adam.kaniasty@gmail.com
|
|
216
|
+
- **GitHub**: [@AdamKaniasty](https://github.com/AdamKaniasty)
|
|
217
|
+
|
|
218
|
+
## Acknowledgments
|
|
219
|
+
|
|
220
|
+
This work was developed in collaboration with the **Bielik** team and represents a contribution to the open-source mechanistic interpretability community.
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Backend (FastAPI) quickstart
|
|
225
|
+
|
|
226
|
+
Install server-only dependencies (kept out of the core library) with uv:
|
|
227
|
+
```bash
|
|
228
|
+
uv sync --group server
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Run the API:
|
|
232
|
+
```bash
|
|
233
|
+
uv run --group server uvicorn server.main:app --reload
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
Smoke-test the server endpoints:
|
|
237
|
+
```bash
|
|
238
|
+
uv run --group server pytest tests/server/test_api.py --cov=server --cov-fail-under=0
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### SAE API usage
|
|
242
|
+
|
|
243
|
+
- Configure artifact location (optional): `export SERVER_ARTIFACT_BASE_PATH=/path/to/mi_crow_artifacts` (defaults to `~/.cache/mi_crow_server`)
|
|
244
|
+
- Load a model: `curl -X POST http://localhost:8000/models/load -H "Content-Type: application/json" -d '{"model_id":"bielik"}'`
|
|
245
|
+
- Save activations from dataset (stored in `LocalStore` under `activations/<model>/<run_id>`):
|
|
246
|
+
- HF dataset: `{"dataset":{"type":"hf","name":"ag_news","split":"train","text_field":"text"}}`
|
|
247
|
+
- Local files: `{"dataset":{"type":"local","paths":["/path/to/file.txt"]}}`
|
|
248
|
+
- Example: `curl -X POST http://localhost:8000/sae/activations/save -H "Content-Type: application/json" -d '{"model_id":"bielik","layers":["dummy_root"],"dataset":{"type":"local","paths":["/tmp/data.txt"]},"sample_limit":100,"batch_size":4,"shard_size":64}'` → returns a manifest path, run_id, token counts, and batch metadata.
|
|
249
|
+
- List activation runs: `curl "http://localhost:8000/sae/activations?model_id=bielik"`
|
|
250
|
+
- Start SAE training (async job, uses `SaeTrainer`): `curl -X POST http://localhost:8000/sae/train -H "Content-Type: application/json" -d '{"model_id":"bielik","activations_path":"/path/to/manifest.json","layer":"<layer_name>","sae_class":"TopKSae","hyperparams":{"epochs":1,"batch_size":256}}'` → returns `job_id`
|
|
251
|
+
- Check job status: `curl http://localhost:8000/sae/train/status/<job_id>` (returns `sae_id`, `sae_path`, `metadata_path`, progress, and logs)
|
|
252
|
+
- Cancel a job (best-effort): `curl -X POST http://localhost:8000/sae/train/cancel/<job_id>`
|
|
253
|
+
- Load an SAE: `curl -X POST http://localhost:8000/sae/load -H "Content-Type: application/json" -d '{"model_id":"bielik","sae_path":"/path/to/sae.json"}'`
|
|
254
|
+
- List SAEs: `curl "http://localhost:8000/sae/saes?model_id=bielik"`
|
|
255
|
+
- Run SAE inference (optionally save top texts and apply concept config): `curl -X POST http://localhost:8000/sae/infer -H "Content-Type: application/json" -d '{"model_id":"bielik","sae_id":"<sae_id>","save_top_texts":true,"top_k_neurons":5,"concept_config_path":"/path/to/concepts.json","inputs":[{"prompt":"hi"}]}'` → returns outputs, top neuron summary, sae metadata, and saved top-texts path when requested.
|
|
256
|
+
- Per-token latents: add `"return_token_latents": true` (default off) to include top-k neuron activations per token.
|
|
257
|
+
- List concepts: `curl "http://localhost:8000/sae/concepts?model_id=bielik&sae_id=<sae_id>"`
|
|
258
|
+
- Load concepts from a file (validated against SAE latents): `curl -X POST http://localhost:8000/sae/concepts/load -H "Content-Type: application/json" -d '{"model_id":"bielik","sae_id":"<sae_id>","source_path":"/path/to/concepts.json"}'`
|
|
259
|
+
- Manipulate concepts (saves a config file for inference-time scaling): `curl -X POST http://localhost:8000/sae/concepts/manipulate -H "Content-Type: application/json" -d '{"model_id":"bielik","sae_id":"<sae_id>","edits":{"0":1.2}}'`
|
|
260
|
+
- List concept configs: `curl "http://localhost:8000/sae/concepts/configs?model_id=bielik&sae_id=<sae_id>"`
|
|
261
|
+
- Preview concept config (validate without saving): `curl -X POST http://localhost:8000/sae/concepts/preview -H "Content-Type: application/json" -d '{"model_id":"bielik","sae_id":"<sae_id>","edits":{"0":1.2}}'`
|
|
262
|
+
- Delete activation run or SAE (requires API key if set): `curl -X DELETE "http://localhost:8000/sae/activations/<run_id>?model_id=bielik" -H "X-API-Key: <key>"` and `curl -X DELETE "http://localhost:8000/sae/saes/<sae_id>?model_id=bielik" -H "X-API-Key: <key>"`
|
|
263
|
+
- Health/metrics summary: `curl http://localhost:8000/health/metrics` (in-memory job counts; no persistence, no auth)
|
|
264
|
+
|
|
265
|
+
Notes:
|
|
266
|
+
- Job manager is in-memory/lightweight: jobs disappear on process restart; idempotency is best-effort via payload key.
|
|
267
|
+
- Training/inference currently run in-process threads; add your own resource guards when running heavy models.
|
|
268
|
+
- Optional API key protection: set `SERVER_API_KEY=<value>` to require `X-API-Key` on protected endpoints (delete).
|