mi-crow 0.1.2__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.gitignore +7 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/PKG-INFO +1 -1
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/best-practices.md +1 -1
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/examples.md +6 -6
- {mi_crow-0.1.2 → mi_crow-1.0.0}/mkdocs.yml +1 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow.egg-info/PKG-INFO +1 -1
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow.egg-info/SOURCES.txt +0 -7
- mi_crow-0.1.2/.cursor/commands/fix-and-add-unit-tests.md +0 -55
- mi_crow-0.1.2/.cursor/commands/refactor-given-code.md +0 -18
- mi_crow-0.1.2/.cursor/plans/dokumentacja-implementacji-modu-w-i-automatyzacji-83a10087.plan.md +0 -119
- mi_crow-0.1.2/.cursor/plans/server-sae-full-metadata_b869cacf.plan.md +0 -55
- mi_crow-0.1.2/.cursor/rules/coding-rules.mdc +0 -195
- mi_crow-0.1.2/.cursor/rules/comments.mdc +0 -0
- mi_crow-0.1.2/.run/Unit tests.run.xml +0 -17
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.github/workflows/docs.yml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.github/workflows/publish.yml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.github/workflows/tests.yml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.pre-commit-config.yaml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/.python-version +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/MANIFEST.in +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/README.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/datasets.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/hooks.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/language_model.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/sae.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api/store.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/api.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/experiments/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/experiments/slurm-pipeline.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/experiments/verify-sae-training.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/core-concepts.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/advanced.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/controllers.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/detectors.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/fundamentals.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/hooks/registration.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/installation.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/quickstart.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/troubleshooting.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/activation-control.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/concept-discovery.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/concept-manipulation.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/saving-activations.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/guide/workflows/training-sae.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/docs/index.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/pyproject.toml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/raport_implementacji.md +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/setup.cfg +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/404.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/datasets/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/hooks/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/language_model/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/sae/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/api/store/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/_mkdocstrings.css +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/images/favicon.png +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/bundle.e71a0d61.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/bundle.e71a0d61.min.js.map +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/tinyseg.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/wordcut.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/workers/search.7a47a382.min.js +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/workers/search.7a47a382.min.js.map +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/stylesheets/main.618322db.min.css +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/stylesheets/main.618322db.min.css.map +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/index.html +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/objects.inv +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/search/search_index.json +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/sitemap.xml +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/site/sitemap.xml.gz +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/datasets/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/datasets/base_dataset.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/datasets/classification_dataset.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/datasets/loading_strategy.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/datasets/text_dataset.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/controller.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/detector.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/hook.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/function_controller.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/layer_activation_detector.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/model_input_detector.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/model_output_detector.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/utils.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/activations.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/context.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/contracts.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/hook_metadata.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/inference.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/initialization.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/language_model.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/layers.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/persistence.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/tokenizer.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/language_model/utils.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/autoencoder_context.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/autoencoder_concepts.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/concept_dictionary.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/concept_models.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/input_tracker.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/modules/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/modules/l1_sae.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/modules/topk_sae.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/sae.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/sae_trainer.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/training/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/training/wandb_logger.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/store/__init__.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/store/local_store.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/store/store.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/store/store_dataloader.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/utils.py +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow.egg-info/dependency_links.txt +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow.egg-info/requires.txt +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow.egg-info/top_level.txt +0 -0
- {mi_crow-0.1.2 → mi_crow-1.0.0}/uv.lock +0 -0
|
@@ -36,12 +36,16 @@ venv.bak/
|
|
|
36
36
|
*.iws
|
|
37
37
|
*.iml
|
|
38
38
|
*.ipr
|
|
39
|
+
.run/
|
|
39
40
|
out/
|
|
40
41
|
|
|
41
42
|
# VS Code
|
|
42
43
|
.vscode/
|
|
43
44
|
*.code-workspace
|
|
44
45
|
|
|
46
|
+
# Cursor IDE
|
|
47
|
+
.cursor/
|
|
48
|
+
|
|
45
49
|
# macOS
|
|
46
50
|
.DS_Store
|
|
47
51
|
.AppleDouble
|
|
@@ -67,6 +71,9 @@ Temporary Items
|
|
|
67
71
|
coverage.xml
|
|
68
72
|
htmlcov/
|
|
69
73
|
|
|
74
|
+
# MkDocs generated site
|
|
75
|
+
site/
|
|
76
|
+
|
|
70
77
|
|
|
71
78
|
# Jupyter
|
|
72
79
|
.ipynb_checkpoints
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mi-crow
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Engineer Thesis: Explaining and modifying LLM responses using SAE and concepts.
|
|
5
5
|
Author-email: Hubert Kowalski <your.email@example.com>, Adam Kaniasty <adam.kaniasty@gmail.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -386,5 +386,5 @@ assert len(top_texts) > 0 # Found some concepts
|
|
|
386
386
|
|
|
387
387
|
- **[Troubleshooting](troubleshooting.md)** - Common issues and solutions
|
|
388
388
|
- **[Examples](examples.md)** - Example code patterns
|
|
389
|
-
- **[Hooks: Advanced](
|
|
389
|
+
- **[Hooks: Advanced](hooks/advanced.md)** - Advanced patterns
|
|
390
390
|
|
|
@@ -77,7 +77,7 @@ The examples directory contains Jupyter notebooks demonstrating mi-crow function
|
|
|
77
77
|
|
|
78
78
|
**Related guides**:
|
|
79
79
|
- [Concept Discovery](workflows/concept-discovery.md)
|
|
80
|
-
- [Hooks: Detectors](
|
|
80
|
+
- [Hooks: Detectors](hooks/detectors.md)
|
|
81
81
|
|
|
82
82
|
---
|
|
83
83
|
|
|
@@ -112,7 +112,7 @@ The examples directory contains Jupyter notebooks demonstrating mi-crow function
|
|
|
112
112
|
**Related guides**:
|
|
113
113
|
- [Concept Manipulation](workflows/concept-manipulation.md)
|
|
114
114
|
- [Activation Control](workflows/activation-control.md)
|
|
115
|
-
- [Hooks: Controllers](
|
|
115
|
+
- [Hooks: Controllers](hooks/controllers.md)
|
|
116
116
|
|
|
117
117
|
---
|
|
118
118
|
|
|
@@ -133,7 +133,7 @@ The examples directory contains Jupyter notebooks demonstrating mi-crow function
|
|
|
133
133
|
|
|
134
134
|
**Related guides**:
|
|
135
135
|
- [Saving Activations](workflows/saving-activations.md)
|
|
136
|
-
- [Hooks: Detectors](
|
|
136
|
+
- [Hooks: Detectors](hooks/detectors.md)
|
|
137
137
|
|
|
138
138
|
---
|
|
139
139
|
|
|
@@ -215,8 +215,8 @@ The examples directory contains Jupyter notebooks demonstrating mi-crow function
|
|
|
215
215
|
|
|
216
216
|
**Related guides**:
|
|
217
217
|
- [Activation Control](workflows/activation-control.md)
|
|
218
|
-
- [Hooks: Fundamentals](
|
|
219
|
-
- [Hooks: Controllers](
|
|
218
|
+
- [Hooks: Fundamentals](hooks/fundamentals.md)
|
|
219
|
+
- [Hooks: Controllers](hooks/controllers.md)
|
|
220
220
|
|
|
221
221
|
---
|
|
222
222
|
|
|
@@ -319,6 +319,6 @@ If you encounter issues:
|
|
|
319
319
|
After working through examples:
|
|
320
320
|
|
|
321
321
|
- **[Workflows](workflows/index.md)** - Detailed workflow guides
|
|
322
|
-
- **[Hooks System](
|
|
322
|
+
- **[Hooks System](hooks/index.md)** - Deep dive into hooks
|
|
323
323
|
- **[Experiments](../experiments/index.md)** - Real-world experiments
|
|
324
324
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mi-crow
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Engineer Thesis: Explaining and modifying LLM responses using SAE and concepts.
|
|
5
5
|
Author-email: Hubert Kowalski <your.email@example.com>, Adam Kaniasty <adam.kaniasty@gmail.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -7,16 +7,9 @@ mkdocs.yml
|
|
|
7
7
|
pyproject.toml
|
|
8
8
|
raport_implementacji.md
|
|
9
9
|
uv.lock
|
|
10
|
-
.cursor/commands/fix-and-add-unit-tests.md
|
|
11
|
-
.cursor/commands/refactor-given-code.md
|
|
12
|
-
.cursor/plans/dokumentacja-implementacji-modu-w-i-automatyzacji-83a10087.plan.md
|
|
13
|
-
.cursor/plans/server-sae-full-metadata_b869cacf.plan.md
|
|
14
|
-
.cursor/rules/coding-rules.mdc
|
|
15
|
-
.cursor/rules/comments.mdc
|
|
16
10
|
.github/workflows/docs.yml
|
|
17
11
|
.github/workflows/publish.yml
|
|
18
12
|
.github/workflows/tests.yml
|
|
19
|
-
.run/Unit tests.run.xml
|
|
20
13
|
docs/api.md
|
|
21
14
|
docs/index.md
|
|
22
15
|
docs/api/datasets.md
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# fix-and-add-unit-tests
|
|
2
|
-
|
|
3
|
-
Your role:
|
|
4
|
-
You are a Senior Python Engineer responsible for designing and maintaining high-quality unit tests for a production system.
|
|
5
|
-
All tests must meet or exceed 90% coverage and follow industry-grade best practices.
|
|
6
|
-
|
|
7
|
-
When I send you any Python module, class, function, or code fragment:
|
|
8
|
-
1. Analyze the Code
|
|
9
|
-
Identify responsibilities, edge-cases, invariants, and failure modes
|
|
10
|
-
Detect missing validations, ambiguous cases, undefined behaviors
|
|
11
|
-
Identify all branches, paths, and interactions
|
|
12
|
-
Determine which parts are not testable or require refactoring
|
|
13
|
-
2. Produce Senior-Level Unit Tests
|
|
14
|
-
All test code MUST:
|
|
15
|
-
Use pytest style
|
|
16
|
-
Be fully isolated
|
|
17
|
-
Avoid side effects
|
|
18
|
-
Use parametrization extensively
|
|
19
|
-
Use fixtures where appropriate
|
|
20
|
-
Cover success, failure, and boundary conditions
|
|
21
|
-
Include negative tests (exceptions, invalid inputs)
|
|
22
|
-
Mock external systems only when needed
|
|
23
|
-
Be deterministic (no randomness unless seeded)
|
|
24
|
-
Ensure ≥ 90% coverage, including branches and error handling
|
|
25
|
-
Follow clean naming:
|
|
26
|
-
test_<function>_<scenario>_<expected>()
|
|
27
|
-
3. Fix & Improve Existing Tests (if provided)
|
|
28
|
-
Rewrite poor tests
|
|
29
|
-
Remove duplication
|
|
30
|
-
Replace broad mocks with specific ones
|
|
31
|
-
Improve assertions
|
|
32
|
-
Add missing cases
|
|
33
|
-
Ensure readability, clarity, and intent disclosure
|
|
34
|
-
Apply red-green-refactor test discipline
|
|
35
|
-
4. Suggest Improvements to Production Code
|
|
36
|
-
If required to achieve proper testability, propose:
|
|
37
|
-
Dependency injection
|
|
38
|
-
Splitting functions
|
|
39
|
-
Returning richer objects
|
|
40
|
-
Making side effects explicit
|
|
41
|
-
Better error handling
|
|
42
|
-
Removing hidden globals
|
|
43
|
-
Eliminating nondeterministic behaviors
|
|
44
|
-
Never modify production code unless explicitly asked — but always propose improvements.
|
|
45
|
-
5. Output Format
|
|
46
|
-
Always reply with:
|
|
47
|
-
A) Summary of issues found
|
|
48
|
-
Missing coverage areas
|
|
49
|
-
Undetected edge-cases
|
|
50
|
-
Logical gaps
|
|
51
|
-
Suggested refactors
|
|
52
|
-
Anything blocking full testability
|
|
53
|
-
|
|
54
|
-
Ensure tests run with:
|
|
55
|
-
uv run pytest -q --unit -n auto
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# refactor-given-code
|
|
2
|
-
|
|
3
|
-
You are a Senior Python Engineer specializing in refactoring and code quality.
|
|
4
|
-
When I send you any code file, module, or class, your task is to refactor it for clarity, maintainability, testability, and correctness, while preserving functionality.
|
|
5
|
-
Your responsibilities:
|
|
6
|
-
Apply single responsibility principle and extract private helper functions when needed
|
|
7
|
-
Break large modules/classes into smaller files with clear boundaries
|
|
8
|
-
Improve naming, API clarity, and separation of concerns
|
|
9
|
-
Remove duplication and inline complex logic into clean reusable functions
|
|
10
|
-
Replace implicit behaviors with explicit ones
|
|
11
|
-
Improve error handling and edge-case coverage
|
|
12
|
-
Keep refactoring safe, deterministic, and production-friendly
|
|
13
|
-
Ensure final structure is testable and follows clean code principles
|
|
14
|
-
Output format:
|
|
15
|
-
Short explanation of key improvements
|
|
16
|
-
Optional suggestions for further enhancements
|
|
17
|
-
When ready, say:
|
|
18
|
-
“Send me the code you want to refactor.”
|
mi_crow-0.1.2/.cursor/plans/dokumentacja-implementacji-modu-w-i-automatyzacji-83a10087.plan.md
DELETED
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: Plan raportu implementacji modułów i automatyzacji
|
|
3
|
-
overview: ""
|
|
4
|
-
todos:
|
|
5
|
-
- id: 41a71e17-1a66-482a-927c-8b8739cedd36
|
|
6
|
-
content: Utworzenie pliku docs/automation.md z opisem wszystkich procedur automatyzacji (UV, pre-commit, ruff, pytest, GitHub Actions, MkD
|
|
7
|
-
status: pending
|
|
8
|
-
- id: ab21d40e-3967-484a-8a10-8dd60a424cb5
|
|
9
|
-
content: Utworzenie pliku docs/modules.md z dokumentacją wszystkich modułów, ich parametrów i konfiguracjiAktualizacja mkdocs.yml - dodanie nowych sekcji do nawigacjiAktualizacja docs/index.md - dodanie linków do nowych sekcji dokumentacjiUtworzenie pliku docs/automation.md z opisem wszystkich procedur automatyzacji (UV, pre-commit, ruff, pytest, GitHub Actions, MkDocs)Utworzenie pliku docs/modules.md z dokumentacją wszystkich modułów, ich parametrów i konfiguracjiAktualizacja mkdocs.yml - dodanie nowych sekcji do nawigacjiAktualizacja docs/index.md - dodanie linków do nowych sekcji dokumentacUtworzenie pliku docs/automation.md z opisem wszystkich procedur automatyzacji (UV, pre-commit, ruff, pytest, GitHub Actions, MkDocs)Utworzenie pliku docs/modules.md z dokumentacją wszystkich modułów, ich parametrów i konfiguracjiAktualizacja mkdocs.yml - dodanie nowych sekcji do nawigacjiAktualizacja docs/index.md - dodanie linków do nowych sekcji dokumentacji
|
|
10
|
-
status: pending
|
|
11
|
-
---
|
|
12
|
-
|
|
13
|
-
# Plan raportu implementacji modułów i automatyzacji
|
|
14
|
-
|
|
15
|
-
## Cel
|
|
16
|
-
|
|
17
|
-
Utworzenie jednego pliku Markdown z raportem na zajęcia opisującym:
|
|
18
|
-
|
|
19
|
-
1. Procedury automatyzacji (GitHub Actions, UV, pre-commit, ruff, pytest, środowisko testowe)
|
|
20
|
-
2. Implementację modułów (stan implementacji, parametry, opisy modeli ML)
|
|
21
|
-
|
|
22
|
-
## Struktura raportu
|
|
23
|
-
|
|
24
|
-
Jeden plik Markdown (np. `raport.md` lub `raport_implementacji.md`) w katalogu głównym projektu z następującymi sekcjami:
|
|
25
|
-
|
|
26
|
-
## 1. Procedury automatyzacji
|
|
27
|
-
|
|
28
|
-
### 1.1 Zarządzanie zależnościami (UV)
|
|
29
|
-
|
|
30
|
-
- Opis użycia UV jako menedżera pakietów
|
|
31
|
-
- Konfiguracja w `pyproject.toml`
|
|
32
|
-
- Komendy: `uv sync`, `uv add`, `uv lock`
|
|
33
|
-
- Grupy zależności: `dev`, `docs`
|
|
34
|
-
- Plik `uv.lock` i synchronizacja wersji
|
|
35
|
-
|
|
36
|
-
### 1.2 Pre-commit hooks
|
|
37
|
-
|
|
38
|
-
- Konfiguracja w `.pre-commit-config.yaml`
|
|
39
|
-
- Hooks: `ruff` (linter) i `ruff-format` (formatter)
|
|
40
|
-
- Automatyczne uruchamianie przed commitami
|
|
41
|
-
- Instalacja: `pre-commit install`
|
|
42
|
-
|
|
43
|
-
### 1.3 Ruff - linting i formatowanie
|
|
44
|
-
|
|
45
|
-
- Konfiguracja w `pyproject.toml` sekcja `[tool.ruff]`
|
|
46
|
-
- Ustawienia: line-length=120, target-version, select rules
|
|
47
|
-
- Uruchamianie: `uv run ruff check`, `uv run ruff format`
|
|
48
|
-
|
|
49
|
-
### 1.4 Testy i coverage (pytest)
|
|
50
|
-
|
|
51
|
-
- Konfiguracja w `pyproject.toml` sekcja `[tool.pytest.ini_options]`
|
|
52
|
-
- Wymagane pokrycie: 90% (fail-under=90)
|
|
53
|
-
- Markery testów: `--unit`, `--e2e`
|
|
54
|
-
- Raporty: terminal, XML, HTML
|
|
55
|
-
- Środowisko testowe: fixtures w `conftest.py`, testy jednostkowe i e2e
|
|
56
|
-
|
|
57
|
-
### 1.5 GitHub Actions
|
|
58
|
-
|
|
59
|
-
- Opis workflow CI/CD (jeśli dostępne)
|
|
60
|
-
- Badge CI widoczny w README.md
|
|
61
|
-
- Automatyczne testy i deployment (jeśli skonfigurowane)
|
|
62
|
-
|
|
63
|
-
### 1.6 Dokumentacja (MkDocs)
|
|
64
|
-
|
|
65
|
-
- Konfiguracja w `mkdocs.yml`
|
|
66
|
-
- Theme: Material
|
|
67
|
-
- Plugins: mkdocstrings, search, section-index
|
|
68
|
-
- Deploy: GitHub Pages (https://adamkaniasty.github.io/Inzynierka/)
|
|
69
|
-
|
|
70
|
-
## 2. Implementacja modułów
|
|
71
|
-
|
|
72
|
-
### 2.1 Moduł `mi_crow.datasets`
|
|
73
|
-
|
|
74
|
-
- **BaseDataset**: Klasa abstrakcyjna z LoadingStrategy (MEMORY, DYNAMIC_LOAD, ITERABLE_ONLY)
|
|
75
|
-
- **TextDataset**: Dataset tekstowy z parametrem `text_field`
|
|
76
|
-
- **ClassificationDataset**: Dataset z kategoriami, parametry `text_field`, `category_field` (single/multiple)
|
|
77
|
-
- **LoadingStrategy**: Enum z trzema strategiami ładowania
|
|
78
|
-
|
|
79
|
-
### 2.2 Moduł `mi_crow.hooks`
|
|
80
|
-
|
|
81
|
-
- **Hook**: Klasa abstrakcyjna, parametry: `layer_signature`, `hook_type` (FORWARD/PRE_FORWARD), `hook_id`
|
|
82
|
-
- **Detector**: Hook do wykrywania/zapisywania aktywacji
|
|
83
|
-
- **Controller**: Hook do modyfikacji aktywacji
|
|
84
|
-
- **LayerActivationDetector**: Implementacja zapisywania aktywacji warstw
|
|
85
|
-
- **FunctionController**: Implementacja kontrolera funkcji
|
|
86
|
-
|
|
87
|
-
### 2.3 Moduł `mi_crow.language_model`
|
|
88
|
-
|
|
89
|
-
- **LanguageModel**: Główna klasa wrappera modelu
|
|
90
|
-
- Parametry inicjalizacji: `model`, `tokenizer`, `store`, `model_id`
|
|
91
|
-
- Metody: `tokenize()`, `forwards()`, `generate()`, `save_model()`
|
|
92
|
-
- Factory methods: `from_huggingface()`, `from_local_torch()`, `from_local()`
|
|
93
|
-
- **LanguageModelLayers**: Zarządzanie warstwami i hookami
|
|
94
|
-
- **LanguageModelActivations**: Zarządzanie aktywacjami
|
|
95
|
-
- **LanguageModelTokenizer**: Wrapper tokenizera
|
|
96
|
-
- **InferenceEngine**: Silnik inferencji z parametrami `autocast`, `autocast_dtype`, `with_controllers`
|
|
97
|
-
|
|
98
|
-
### 2.4 Moduł `mi_crow.mechanistic.sae` (moduły uczenia maszynowego)
|
|
99
|
-
|
|
100
|
-
- **Sae**: Klasa abstrakcyjna SAE
|
|
101
|
-
- Parametry: `n_latents`, `n_inputs`, `hook_id`, `device`, `store`
|
|
102
|
-
- Metody abstrakcyjne: `encode()`, `decode()`, `forward()`, `modify_activations()`, `save()`
|
|
103
|
-
- **TopKSae**: Implementacja TopK SAE
|
|
104
|
-
- Dodatkowy parametr: `k` (liczba aktywnych neuronów)
|
|
105
|
-
- Metody: `train()`, `save()`, `load()`
|
|
106
|
-
- **SaeTrainer**: Klasa trenująca SAE
|
|
107
|
-
- **SaeTrainingConfig**: Konfiguracja treningu
|
|
108
|
-
- Parametry treningu: `epochs`, `batch_size`, `lr`, `l1_lambda`, `device`, `dtype`
|
|
109
|
-
- Parametry zaawansowane: `use_amp`, `amp_dtype`, `grad_accum_steps`, `clip_grad`, `monitoring`
|
|
110
|
-
- Parametry wandb: `use_wandb`, `wandb_project`, `wandb_entity`, `wandb_name`, `wandb_tags`, `wandb_mode`
|
|
111
|
-
- **AutoencoderContext**: Kontekst SAE z parametrami: `n_latents`, `n_inputs`, `device`, `text_tracking_enabled`, `text_tracking_k`
|
|
112
|
-
- **AutoencoderConcepts**: Zarządzanie konceptami (multiplication, bias)
|
|
113
|
-
|
|
114
|
-
### 2.5 Moduł `mi_crow.store`
|
|
115
|
-
|
|
116
|
-
- **Store**: Klasa abstrakcyjna do przechowywania tensorów
|
|
117
|
-
- Parametry: `base_path`, `runs_prefix`, `dataset_prefix`, `model_prefix`
|
|
118
|
-
- Organizacja: runs -> batches -> layers -> keys
|
|
119
|
-
- **LocalStore**: Implementacja lokalnego przechowywania (safetensors)
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: server-sae-full-metadata
|
|
3
|
-
overview: "Wire server to mi_crow SAE without touching core library: LocalStore-based activations, real SAE training/inference, concept application, and enriched responses."
|
|
4
|
-
todos: []
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# Server SAE Integration (No Core Library Changes)
|
|
8
|
-
|
|
9
|
-
## Scope
|
|
10
|
-
|
|
11
|
-
b
|
|
12
|
-
|
|
13
|
-
- Only modify server code; reuse mi_crow APIs as-is.
|
|
14
|
-
- Use LocalStore for activations, real SaeTrainer for training, and Sae hook for inference/top-texts.
|
|
15
|
-
- Concept handling via existing mi_crow concept classes; no edits to mi_crow package.
|
|
16
|
-
|
|
17
|
-
## Plan
|
|
18
|
-
|
|
19
|
-
1) **Activation Pipeline → LocalStore**
|
|
20
|
-
|
|
21
|
-
- Update `/sae/activations/save` to write activations into `mi_crow.store.LocalStore` under `artifact_base_path` using `run_id`/batch/layer keys.
|
|
22
|
-
- Keep manifest with run_id, layers, dataset info, token counts, batch indices.
|
|
23
|
-
- Add layer-existence check (already present) and bounds on batch/sample size.
|
|
24
|
-
|
|
25
|
-
2) **SAE Training (real)**
|
|
26
|
-
|
|
27
|
-
- In `/sae/train`, load LocalStore with run_id/layer from manifest; build `SaeTrainingConfig` from payload; call `SaeTrainer.train` on the selected SAE class (chosen by payload or default) without modifying mi_crow code.
|
|
28
|
-
- Save SAE checkpoint via its `save` method alongside training metadata (history, config, metrics) in `artifact_base_path/sae/<model>/<run>/`.
|
|
29
|
-
- Return sae_id/sae_path/metadata_path from the job result.
|
|
30
|
-
|
|
31
|
-
3) **SAE Inference & Top Texts**
|
|
32
|
-
|
|
33
|
-
- In `/sae/infer`, load SAE via `Sae.load`, register the SAE hook on target layer, run the LM forward; capture neuron activations from the SAE detector metadata.
|
|
34
|
-
- Compute top-N neurons per prompt and write top-texts JSON (neuron_id, activation stats, example texts) to `top_texts_dir`.
|
|
35
|
-
- Response: generated text/tokens, optional logits/probs, top-neuron summary + path to top-texts file, sae_id reference.
|
|
36
|
-
|
|
37
|
-
4) **Concepts Integration**
|
|
38
|
-
|
|
39
|
-
- `/sae/concepts/load`: parse user file into mi_crow concepts/dictionary; validate against SAE dims; persist normalized concept file under concepts dir.
|
|
40
|
-
- `/sae/concepts/manipulate`: accept weights/edits; build a concept-config mapping to neuron weights; persist config.
|
|
41
|
-
- `/sae/infer` accepts concept_config to adjust activations/decoder via SAE hook before generation (no mi_crow code changes—use available hook points).
|
|
42
|
-
|
|
43
|
-
5) **Jobs & Observability**
|
|
44
|
-
|
|
45
|
-
- Keep in-memory JobManager; ensure train job result carries sae_id/paths/metadata. Add minimal logging around train start/end and inference timings.
|
|
46
|
-
|
|
47
|
-
6) **Testing & Docs**
|
|
48
|
-
|
|
49
|
-
- Extend server tests with small dummy SAE class implementing `save/load/encode/decode` to cover training/inference flows without touching mi_crow; mock LocalStore I/O.
|
|
50
|
-
- Update README with new expectations: activations in LocalStore, real training path, concept flow, top-text outputs, and job responses.
|
|
51
|
-
|
|
52
|
-
## Notes
|
|
53
|
-
|
|
54
|
-
- No changes to mi_crow core; all integrations via its public classes (LocalStore, SaeTrainer, Sae, concepts).
|
|
55
|
-
- Keep payload schemas mostly stable; only enrich responses with sae/metadata paths and top-text summaries.
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
globs: *.py
|
|
3
|
-
alwaysApply: false
|
|
4
|
-
---
|
|
5
|
-
# Python Programming Rules
|
|
6
|
-
|
|
7
|
-
This file defines coding standards and best practices for Python files in this codebase.
|
|
8
|
-
|
|
9
|
-
## 1. Comments in Docstrings Only
|
|
10
|
-
|
|
11
|
-
- **All comments must be in docstrings** using Google-style format
|
|
12
|
-
- **No inline comments** (`#`) except for type ignore comments (e.g., `# type: ignore`)
|
|
13
|
-
- Docstrings should explain **"why"** not just **"what"**
|
|
14
|
-
- Use docstrings for:
|
|
15
|
-
- Module-level documentation
|
|
16
|
-
- Class documentation
|
|
17
|
-
- Function/method documentation
|
|
18
|
-
- Complex logic explanations
|
|
19
|
-
|
|
20
|
-
### Example
|
|
21
|
-
|
|
22
|
-
```python
|
|
23
|
-
def process_activations(activations: Tensor) -> Tensor:
|
|
24
|
-
"""
|
|
25
|
-
Normalize and scale activations for downstream processing.
|
|
26
|
-
|
|
27
|
-
This normalization is required because raw activations can have
|
|
28
|
-
extreme values that cause numerical instability in the SAE.
|
|
29
|
-
We use layer normalization to ensure consistent scale across batches.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
activations: Raw activation tensor of shape (batch, seq_len, hidden_dim)
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
Normalized activation tensor with same shape
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
ValueError: If activations contain NaN or Inf values
|
|
39
|
-
"""
|
|
40
|
-
if torch.isnan(activations).any() or torch.isinf(activations).any():
|
|
41
|
-
raise ValueError("Activations contain invalid values")
|
|
42
|
-
return self._normalize(activations)
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
## 2. Modular Code with Private Methods
|
|
46
|
-
|
|
47
|
-
- **Prefer small, focused private methods** (prefixed with `_`) over large monolithic functions
|
|
48
|
-
- Break down complex logic into smaller, testable units
|
|
49
|
-
- Public methods should be **thin orchestration layers** that delegate to private methods
|
|
50
|
-
- Methods should have **single responsibility**
|
|
51
|
-
- If a method exceeds ~50 lines, consider breaking it into smaller methods
|
|
52
|
-
- Private methods should be well-named and self-documenting
|
|
53
|
-
|
|
54
|
-
### Example
|
|
55
|
-
|
|
56
|
-
```python
|
|
57
|
-
class ActivationProcessor:
|
|
58
|
-
def process(self, activations: Tensor) -> Tensor:
|
|
59
|
-
"""Process activations through normalization and scaling pipeline."""
|
|
60
|
-
normalized = self._normalize(activations)
|
|
61
|
-
scaled = self._apply_scaling(normalized)
|
|
62
|
-
return self._validate_output(scaled)
|
|
63
|
-
|
|
64
|
-
def _normalize(self, activations: Tensor) -> Tensor:
|
|
65
|
-
"""Apply layer normalization to activations."""
|
|
66
|
-
# Implementation here
|
|
67
|
-
pass
|
|
68
|
-
|
|
69
|
-
def _apply_scaling(self, activations: Tensor) -> Tensor:
|
|
70
|
-
"""Scale activations by learned factor."""
|
|
71
|
-
# Implementation here
|
|
72
|
-
pass
|
|
73
|
-
|
|
74
|
-
def _validate_output(self, activations: Tensor) -> Tensor:
|
|
75
|
-
"""Validate processed activations meet quality criteria."""
|
|
76
|
-
# Implementation here
|
|
77
|
-
pass
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
## 3. Test Refactoring Requirements
|
|
81
|
-
|
|
82
|
-
- **When refactoring code, always refactor corresponding tests**
|
|
83
|
-
- Tests should reflect the new structure and implementation
|
|
84
|
-
- Update test names and assertions to match refactored code
|
|
85
|
-
- Remove tests for implementation details that no longer exist
|
|
86
|
-
- Add tests for new private methods if they expose new behavior
|
|
87
|
-
- Ensure test coverage remains at or above 85%
|
|
88
|
-
|
|
89
|
-
### Example
|
|
90
|
-
|
|
91
|
-
If you refactor a method `process_data()` into `_validate()`, `_transform()`, and `_save()`:
|
|
92
|
-
- Update the test to verify the public interface still works
|
|
93
|
-
- Add tests for the new private methods if they have complex logic
|
|
94
|
-
- Remove tests that were checking internal implementation details
|
|
95
|
-
|
|
96
|
-
## 4. Meaningful Tests Only
|
|
97
|
-
|
|
98
|
-
- Write tests that **verify behavior, not implementation details**
|
|
99
|
-
- Avoid trivial tests that don't add value (e.g., testing simple getters/setters)
|
|
100
|
-
- Focus on:
|
|
101
|
-
- Edge cases and boundary conditions
|
|
102
|
-
- Error conditions and exception handling
|
|
103
|
-
- Integration points between components
|
|
104
|
-
- Complex business logic
|
|
105
|
-
- Tests should be **readable and self-documenting**
|
|
106
|
-
- Use descriptive test names that explain what is being tested
|
|
107
|
-
|
|
108
|
-
### Example
|
|
109
|
-
|
|
110
|
-
```python
|
|
111
|
-
# Good: Tests meaningful behavior
|
|
112
|
-
def test_process_activations_raises_on_invalid_input():
|
|
113
|
-
"""Test that invalid activations raise appropriate errors."""
|
|
114
|
-
processor = ActivationProcessor()
|
|
115
|
-
invalid_activations = torch.tensor([float('nan')])
|
|
116
|
-
with pytest.raises(ValueError, match="invalid values"):
|
|
117
|
-
processor.process(invalid_activations)
|
|
118
|
-
|
|
119
|
-
# Bad: Trivial test that doesn't add value
|
|
120
|
-
def test_get_enabled_returns_boolean():
|
|
121
|
-
"""Test that get_enabled returns a boolean."""
|
|
122
|
-
hook = Hook()
|
|
123
|
-
assert isinstance(hook.enabled, bool) # Too trivial
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
## 5. No Backward Compatibility
|
|
127
|
-
|
|
128
|
-
- **Do not maintain backward compatibility** when refactoring
|
|
129
|
-
- Remove deprecated code paths immediately
|
|
130
|
-
- Update all call sites when changing APIs
|
|
131
|
-
- Breaking changes are acceptable for code quality improvements
|
|
132
|
-
- If an API change is needed, update all usages throughout the codebase
|
|
133
|
-
- Do not add deprecation warnings or compatibility shims
|
|
134
|
-
|
|
135
|
-
### Example
|
|
136
|
-
|
|
137
|
-
If you need to change a method signature:
|
|
138
|
-
```python
|
|
139
|
-
# Old API
|
|
140
|
-
def process(data: Tensor, batch_size: int = 32) -> Tensor:
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
# New API - just change it, don't keep the old one
|
|
144
|
-
def process(data: Tensor, config: ProcessingConfig) -> Tensor:
|
|
145
|
-
pass
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
Then update all call sites immediately - do not provide both versions.
|
|
149
|
-
|
|
150
|
-
## Additional Best Practices
|
|
151
|
-
|
|
152
|
-
### Type Hints
|
|
153
|
-
|
|
154
|
-
- **Type hints required** for all function signatures
|
|
155
|
-
- Use `from __future__ import annotations` for forward references
|
|
156
|
-
- Prefer `|` union syntax over `Union` (Python 3.10+)
|
|
157
|
-
- Use `TypeAlias` for complex type definitions
|
|
158
|
-
|
|
159
|
-
### Code Style
|
|
160
|
-
|
|
161
|
-
- Follow existing code style (ruff configuration, 120 char line length)
|
|
162
|
-
- Use double quotes for strings (per ruff configuration)
|
|
163
|
-
- Follow PEP 8 naming conventions:
|
|
164
|
-
- Classes: `PascalCase`
|
|
165
|
-
- Functions/methods: `snake_case`
|
|
166
|
-
- Constants: `UPPER_SNAKE_CASE`
|
|
167
|
-
- Private: `_leading_underscore`
|
|
168
|
-
|
|
169
|
-
### Design Principles
|
|
170
|
-
|
|
171
|
-
- **Prefer composition over inheritance**
|
|
172
|
-
- Use abstract base classes (`abc.ABC`) for interfaces
|
|
173
|
-
- Handle errors explicitly with custom exceptions when appropriate
|
|
174
|
-
- Use dependency injection for testability
|
|
175
|
-
- Keep functions pure when possible (no side effects)
|
|
176
|
-
|
|
177
|
-
### Error Handling
|
|
178
|
-
|
|
179
|
-
- Create custom exception classes for domain-specific errors
|
|
180
|
-
- Use exception chaining (`raise ... from e`) when appropriate
|
|
181
|
-
- Provide clear error messages that help with debugging
|
|
182
|
-
|
|
183
|
-
### Performance
|
|
184
|
-
|
|
185
|
-
- Profile before optimizing
|
|
186
|
-
- Use appropriate data structures (e.g., `dict` for O(1) lookups)
|
|
187
|
-
- Consider memory usage for large tensors
|
|
188
|
-
- Use generators for large datasets
|
|
189
|
-
|
|
190
|
-
### Documentation
|
|
191
|
-
|
|
192
|
-
- All public APIs must have docstrings
|
|
193
|
-
- Include Args, Returns, and Raises sections in docstrings
|
|
194
|
-
- Use type hints in docstrings only if they add clarity beyond type hints
|
|
195
|
-
- Keep docstrings up-to-date with code changes
|
|
File without changes
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
<component name="ProjectRunConfigurationManager">
|
|
2
|
-
<configuration default="false" name="Unit tests" type="ShConfigurationType">
|
|
3
|
-
<option name="SCRIPT_TEXT" value="uv run pytest -q --unit -n auto" />
|
|
4
|
-
<option name="INDEPENDENT_SCRIPT_PATH" value="true" />
|
|
5
|
-
<option name="SCRIPT_PATH" value="" />
|
|
6
|
-
<option name="SCRIPT_OPTIONS" value="" />
|
|
7
|
-
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
|
|
8
|
-
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
|
9
|
-
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
|
|
10
|
-
<option name="INTERPRETER_PATH" value="/bin/zsh" />
|
|
11
|
-
<option name="INTERPRETER_OPTIONS" value="" />
|
|
12
|
-
<option name="EXECUTE_IN_TERMINAL" value="true" />
|
|
13
|
-
<option name="EXECUTE_SCRIPT_FILE" value="false" />
|
|
14
|
-
<envs />
|
|
15
|
-
<method v="2" />
|
|
16
|
-
</configuration>
|
|
17
|
-
</component>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mi_crow-0.1.2 → mi_crow-1.0.0}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/hooks/implementations/layer_activation_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mi_crow-0.1.2 → mi_crow-1.0.0}/src/mi_crow/mechanistic/sae/concepts/autoencoder_concepts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|