PyPI - interpkit - Versions diffs - 0.4.0__tar.gz → 0.6.0__tar.gz - Mend

interpkit 0.4.0tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

{interpkit-0.4.0 → interpkit-0.6.0}/PKG-INFO +85 -7
{interpkit-0.4.0 → interpkit-0.6.0}/README.md +79 -5
interpkit-0.6.0/interpkit/__init__.py +84 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/__main__.py +8 -4
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/cli/main.py +506 -11
interpkit-0.6.0/interpkit/core/arch/__init__.py +102 -0
interpkit-0.6.0/interpkit/core/arch/blocks.py +257 -0
interpkit-0.6.0/interpkit/core/arch/family.py +421 -0
interpkit-0.6.0/interpkit/core/arch/heads.py +583 -0
interpkit-0.6.0/interpkit/core/arch/layers.py +462 -0
interpkit-0.6.0/interpkit/core/arch/names.py +60 -0
interpkit-0.6.0/interpkit/core/arch/probe.py +241 -0
interpkit-0.6.0/interpkit/core/arch/residual.py +653 -0
interpkit-0.6.0/interpkit/core/arch/resolve.py +679 -0
interpkit-0.6.0/interpkit/core/arch/tree.py +190 -0
interpkit-0.6.0/interpkit/core/arch/types.py +486 -0
interpkit-0.6.0/interpkit/core/enums.py +121 -0
interpkit-0.6.0/interpkit/core/exceptions.py +83 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/html.py +5 -2
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/inputs.py +70 -8
interpkit-0.6.0/interpkit/core/interventions.py +492 -0
interpkit-0.6.0/interpkit/core/loader.py +704 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/model.py +610 -36
interpkit-0.6.0/interpkit/core/paths.py +88 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/render.py +239 -7
interpkit-0.6.0/interpkit/core/support_matrix.py +698 -0
interpkit-0.6.0/interpkit/core/tl_compat.py +297 -0
interpkit-0.6.0/interpkit/core/topk.py +63 -0
interpkit-0.6.0/interpkit/ops/_atp.py +13 -0
interpkit-0.6.0/interpkit/ops/_hooks.py +272 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/ablate.py +23 -39
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/activations.py +9 -1
interpkit-0.6.0/interpkit/ops/atp.py +230 -0
interpkit-0.6.0/interpkit/ops/attention.py +334 -0
interpkit-0.6.0/interpkit/ops/attribute.py +844 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/circuits.py +219 -108
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/diff.py +22 -2
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/dla.py +309 -190
interpkit-0.6.0/interpkit/ops/eap.py +355 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/find_circuit.py +135 -76
interpkit-0.6.0/interpkit/ops/generate.py +292 -0
interpkit-0.6.0/interpkit/ops/heads.py +282 -0
interpkit-0.6.0/interpkit/ops/lens.py +442 -0
interpkit-0.6.0/interpkit/ops/maxact.py +347 -0
interpkit-0.6.0/interpkit/ops/patch.py +328 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/probe.py +14 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/sae.py +142 -22
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/steer.py +16 -24
interpkit-0.6.0/interpkit/ops/trace.py +456 -0
interpkit-0.6.0/interpkit/ops/tuned_lens.py +437 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit.egg-info/PKG-INFO +85 -7
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit.egg-info/SOURCES.txt +41 -2
interpkit-0.6.0/interpkit.egg-info/entry_points.txt +2 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit.egg-info/requires.txt +6 -1
{interpkit-0.4.0 → interpkit-0.6.0}/pyproject.toml +30 -4
interpkit-0.6.0/tests/test_archinfo_serialization.py +61 -0
interpkit-0.6.0/tests/test_atp.py +68 -0
interpkit-0.6.0/tests/test_attention.py +112 -0
interpkit-0.6.0/tests/test_audit_regressions.py +1891 -0
interpkit-0.6.0/tests/test_cache_invalidation.py +66 -0
interpkit-0.6.0/tests/test_capabilities.py +227 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_cli.py +210 -1
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_discovery.py +1 -1
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_discovery_units.py +21 -21
interpkit-0.6.0/tests/test_eap.py +138 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_error_handling.py +11 -0
interpkit-0.6.0/tests/test_generate.py +186 -0
interpkit-0.6.0/tests/test_interventions.py +241 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_invariants.py +22 -8
interpkit-0.6.0/tests/test_lens.py +53 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_load_params.py +12 -2
interpkit-0.6.0/tests/test_maxact.py +149 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_multi_arch.py +12 -5
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_ops.py +6 -1
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_param_variants.py +4 -2
interpkit-0.6.0/tests/test_phase3_regressions.py +121 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_regressions.py +5 -2
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_render_internals.py +34 -6
interpkit-0.6.0/tests/test_resolver.py +268 -0
interpkit-0.6.0/tests/test_resolver_golden.py +131 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_robustness_audit.py +56 -29
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_sae.py +6 -1
interpkit-0.6.0/tests/test_seq2seq_contract.py +119 -0
interpkit-0.6.0/tests/test_topk.py +58 -0
interpkit-0.6.0/tests/test_trace.py +76 -0
interpkit-0.6.0/tests/test_tuned_lens.py +140 -0
interpkit-0.6.0/tests/test_validation.py +130 -0
interpkit-0.4.0/interpkit/__init__.py +0 -27
interpkit-0.4.0/interpkit/core/discovery.py +0 -810
interpkit-0.4.0/interpkit/core/loader.py +0 -322
interpkit-0.4.0/interpkit/core/tl_compat.py +0 -174
interpkit-0.4.0/interpkit/ops/attention.py +0 -365
interpkit-0.4.0/interpkit/ops/attribute.py +0 -377
interpkit-0.4.0/interpkit/ops/heads.py +0 -175
interpkit-0.4.0/interpkit/ops/lens.py +0 -243
interpkit-0.4.0/interpkit/ops/patch.py +0 -261
interpkit-0.4.0/interpkit/ops/trace.py +0 -349
interpkit-0.4.0/interpkit.egg-info/entry_points.txt +0 -2
interpkit-0.4.0/tests/test_attention.py +0 -44
interpkit-0.4.0/tests/test_lens.py +0 -25
interpkit-0.4.0/tests/test_trace.py +0 -35
{interpkit-0.4.0 → interpkit-0.6.0}/LICENSE +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/cli/__init__.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/__init__.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/cache.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/plot.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/registry.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/core/theme.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/__init__.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/batch.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/inspect.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/report.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/ops/scan.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit.egg-info/dependency_links.txt +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/interpkit.egg-info/top_level.txt +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/setup.cfg +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_ablate.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_activations.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_architectures.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_attribute.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_cache.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_chat.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_diff.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_html.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_inputs.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_inspect.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_patch.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_plot_internals.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_plots.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_probe.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_registry.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_steer.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_tl_compat.py +0 -0
{interpkit-0.4.0 → interpkit-0.6.0}/tests/test_tl_ops.py +0 -0

{interpkit-0.4.0 → interpkit-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: interpkit
-Version: 0.4.0
+Version: 0.6.0
 Summary: Mech interp for any HuggingFace model.
 Author: Davide Zani
 License-Expression: MIT
@@ -20,7 +20,8 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: torch>=2.1
-Requires-Dist: transformers>=4.36
+Requires-Dist: numpy>=1.24
+Requires-Dist: transformers<6,>=4.36
 Requires-Dist: safetensors>=0.4
 Requires-Dist: rich>=13.0
 Requires-Dist: rich-gradient>=0.3
@@ -28,10 +29,13 @@ Requires-Dist: typer>=0.9
 Requires-Dist: Pillow>=10.0
 Requires-Dist: matplotlib>=3.8
 Requires-Dist: huggingface-hub>=0.20
+Requires-Dist: sentencepiece>=0.1.99
 Provides-Extra: vision
 Requires-Dist: torchvision>=0.16; extra == "vision"
 Provides-Extra: probe
 Requires-Dist: scikit-learn>=1.3; extra == "probe"
+Provides-Extra: data
+Requires-Dist: datasets>=2.14; extra == "data"
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0; extra == "dev"
 Requires-Dist: pytest-timeout>=2.2; extra == "dev"
@@ -60,27 +64,55 @@ Dynamic: license-file
 Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
-InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
+InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
 ---
 ## Install
+We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
+Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
 ```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv pip install interpkit
+# For linear probe support:
+uv pip install "interpkit[probe]"
+```
+Or with plain `venv` + `pip`:
+```bash
+python3.11 -m venv .venv
+source .venv/bin/activate
 pip install interpkit
 # For linear probe support:
-pip install interpkit[probe]
+pip install "interpkit[probe]"
+```
+Or with `conda`:
+```bash
+conda create -n interpkit python=3.11 -y
+conda activate interpkit
+pip install interpkit
 ```
-Or install from source for development:
+Installing from source for development:
 ```bash
 git clone https://github.com/z4nix/interpkit.git
 cd interpkit
-pip install -e ".[dev]"
+uv venv --python 3.11 && source .venv/bin/activate
+uv pip install -e ".[dev]"
 ```
+> Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
 ---
 ## Quickstart
@@ -156,7 +188,13 @@ See [examples/10_chat_models.ipynb](examples/10_chat_models.ipynb) for a full wa
 | **`ov_scores`** | OV circuit analysis — W_OV matrix per head | Transformers |
 | **`qk_scores`** | QK circuit analysis — W_QK matrix per head | Transformers |
 | **`composition`** | Q/K/V composition scores between heads in two layers | Transformers |
-| **`find_circuit`** | Automated circuit discovery via iterative ablation | Transformers |
+| **`find_circuit`** | Automated circuit discovery — iterative ablation or EAP-based selection with causal verification | Transformers |
+| **`generate`** | Generation with interventions active across every decode step + per-token lens capture | Generative LMs |
+| **`intervene`** | Context manager applying steer/ablate/patch interventions to any op | Any model |
+| **`atp`** | Attribution Patching — first-order patch-effect scores for all modules in 3 passes | Any model |
+| **`eap`** | Edge Attribution Patching — gradient-based component → residual-stream edge scores (EAP-IG via `ig_steps`) | Causal LMs |
+| **`train_tuned_lens`** | Train per-layer tuned-lens translators (Belrose et al. 2023); use via `lens(kind="tuned")` | LMs |
+| **`max_activating`** | Scan a corpus for the examples that most activate a neuron / SAE feature / head | Any model |
 | **`batch`** | Run any operation over a dataset with result aggregation | Any model |
 ---
@@ -452,6 +490,20 @@ interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae jb
 interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae ./my_sae.safetensors
 interpkit dla gpt2 "The capital of France is" --sae jbloom/GPT2-Small-SAEs-Reformatted --sae-at transformer.h.11.attn
+# Generation-time interventions + per-token lens trajectories
+interpkit generate gpt2 "I feel" --positive " joy" --negative " fear" --at transformer.h.6 --scale 8
+interpkit generate gpt2 "The capital of France is" --capture lens
+# Gradient-based circuit discovery
+interpkit atp gpt2 --clean "The capital of France is" --corrupted "The capital of Germany is"
+interpkit eap gpt2 --clean "..." --corrupted "..." --ig-steps 5
+interpkit find-circuit gpt2 --clean "..." --corrupted "..." --method eap --threshold 0.3
+# Tuned lens + max-activating examples
+interpkit train-tuned-lens gpt2 --corpus-file texts.txt --save lens_dir/
+interpkit lens gpt2 "The capital of France is" --tuned-lens lens_dir/
+interpkit maxact gpt2 --at transformer.h.6.mlp --neuron 42 --texts-file corpus.txt
 # Chat / instruct models — applies the tokenizer's chat template automatically
 interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Write a haiku about cats." --max-new-tokens 64
 interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "What is 2+2?" --system "You are terse." --show-prompt
@@ -522,6 +574,30 @@ model.trace(input_a, input_b, top_k=10)
 ---
+## Known limitations
+- **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
+  HuggingFace transformers' relative-position-bias path triggers on
+  forward hooks for any DeBERTa-v3 model (e.g.
+  `microsoft/deberta-v3-small`). interpkit detects this at load time
+  and gates `trace`, `decompose`, `attribute`, `head_activations`,
+  `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
+  `OperationNotSupportedForArchitecture` rather than the cryptic
+  upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
+  `attention` still work. Use `bert`, `roberta`, `electra`, or
+  `albert` for the gated ops on encoder-only inputs.
+- **Integrated-gradients completeness on some modern decoders.** On
+  Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
+  sum does not converge to model-output completeness even at large
+  `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
+  reliable as a token-importance **ranking** but cannot be interpreted as
+  additive contribution **magnitudes** on these models. `attribute()`
+  reports this programmatically: `result["interpretation"]` is
+  `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
+  which are saliency methods), versus `"quantitative"` when IG completeness
+  holds. Branch on that field rather than parsing the warning text.
 ## Examples
 See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -538,6 +614,8 @@ See the [`examples/`](examples/) directory for Jupyter notebooks:
 | `08_dla_and_circuits` | DLA, head activations, residual decomposition, OV/QK analysis, composition, circuit discovery |
 | `09_scan_and_batch` | Auto-scan, batch operations, dataset workflows |
 | `10_chat_models` | Chat-template handling, `model.chat()`, message-list inputs, chat-style steering |
+| `11_generation_interventions` | Steering/ablation active across every decode step, per-token lens trajectories, positional interventions, `model.intervene()` |
+| `12_circuit_discovery_and_lenses` | Attribution Patching, Edge Attribution Patching, EAP-driven `find_circuit`, tuned lens, max-activating examples |
 ---

{interpkit-0.4.0 → interpkit-0.6.0}/README.md RENAMED Viewed

@@ -12,27 +12,55 @@
 Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
-InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
+InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
 ---
 ## Install
+We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
+Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
 ```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv pip install interpkit
+# For linear probe support:
+uv pip install "interpkit[probe]"
+```
+Or with plain `venv` + `pip`:
+```bash
+python3.11 -m venv .venv
+source .venv/bin/activate
 pip install interpkit
 # For linear probe support:
-pip install interpkit[probe]
+pip install "interpkit[probe]"
+```
+Or with `conda`:
+```bash
+conda create -n interpkit python=3.11 -y
+conda activate interpkit
+pip install interpkit
 ```
-Or install from source for development:
+Installing from source for development:
 ```bash
 git clone https://github.com/z4nix/interpkit.git
 cd interpkit
-pip install -e ".[dev]"
+uv venv --python 3.11 && source .venv/bin/activate
+uv pip install -e ".[dev]"
 ```
+> Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
 ---
 ## Quickstart
@@ -108,7 +136,13 @@ See [examples/10_chat_models.ipynb](examples/10_chat_models.ipynb) for a full wa
 | **`ov_scores`** | OV circuit analysis — W_OV matrix per head | Transformers |
 | **`qk_scores`** | QK circuit analysis — W_QK matrix per head | Transformers |
 | **`composition`** | Q/K/V composition scores between heads in two layers | Transformers |
-| **`find_circuit`** | Automated circuit discovery via iterative ablation | Transformers |
+| **`find_circuit`** | Automated circuit discovery — iterative ablation or EAP-based selection with causal verification | Transformers |
+| **`generate`** | Generation with interventions active across every decode step + per-token lens capture | Generative LMs |
+| **`intervene`** | Context manager applying steer/ablate/patch interventions to any op | Any model |
+| **`atp`** | Attribution Patching — first-order patch-effect scores for all modules in 3 passes | Any model |
+| **`eap`** | Edge Attribution Patching — gradient-based component → residual-stream edge scores (EAP-IG via `ig_steps`) | Causal LMs |
+| **`train_tuned_lens`** | Train per-layer tuned-lens translators (Belrose et al. 2023); use via `lens(kind="tuned")` | LMs |
+| **`max_activating`** | Scan a corpus for the examples that most activate a neuron / SAE feature / head | Any model |
 | **`batch`** | Run any operation over a dataset with result aggregation | Any model |
 ---
@@ -404,6 +438,20 @@ interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae jb
 interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae ./my_sae.safetensors
 interpkit dla gpt2 "The capital of France is" --sae jbloom/GPT2-Small-SAEs-Reformatted --sae-at transformer.h.11.attn
+# Generation-time interventions + per-token lens trajectories
+interpkit generate gpt2 "I feel" --positive " joy" --negative " fear" --at transformer.h.6 --scale 8
+interpkit generate gpt2 "The capital of France is" --capture lens
+# Gradient-based circuit discovery
+interpkit atp gpt2 --clean "The capital of France is" --corrupted "The capital of Germany is"
+interpkit eap gpt2 --clean "..." --corrupted "..." --ig-steps 5
+interpkit find-circuit gpt2 --clean "..." --corrupted "..." --method eap --threshold 0.3
+# Tuned lens + max-activating examples
+interpkit train-tuned-lens gpt2 --corpus-file texts.txt --save lens_dir/
+interpkit lens gpt2 "The capital of France is" --tuned-lens lens_dir/
+interpkit maxact gpt2 --at transformer.h.6.mlp --neuron 42 --texts-file corpus.txt
 # Chat / instruct models — applies the tokenizer's chat template automatically
 interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Write a haiku about cats." --max-new-tokens 64
 interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "What is 2+2?" --system "You are terse." --show-prompt
@@ -474,6 +522,30 @@ model.trace(input_a, input_b, top_k=10)
 ---
+## Known limitations
+- **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
+  HuggingFace transformers' relative-position-bias path triggers on
+  forward hooks for any DeBERTa-v3 model (e.g.
+  `microsoft/deberta-v3-small`). interpkit detects this at load time
+  and gates `trace`, `decompose`, `attribute`, `head_activations`,
+  `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
+  `OperationNotSupportedForArchitecture` rather than the cryptic
+  upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
+  `attention` still work. Use `bert`, `roberta`, `electra`, or
+  `albert` for the gated ops on encoder-only inputs.
+- **Integrated-gradients completeness on some modern decoders.** On
+  Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
+  sum does not converge to model-output completeness even at large
+  `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
+  reliable as a token-importance **ranking** but cannot be interpreted as
+  additive contribution **magnitudes** on these models. `attribute()`
+  reports this programmatically: `result["interpretation"]` is
+  `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
+  which are saliency methods), versus `"quantitative"` when IG completeness
+  holds. Branch on that field rather than parsing the warning text.
 ## Examples
 See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -490,6 +562,8 @@ See the [`examples/`](examples/) directory for Jupyter notebooks:
 | `08_dla_and_circuits` | DLA, head activations, residual decomposition, OV/QK analysis, composition, circuit discovery |
 | `09_scan_and_batch` | Auto-scan, batch operations, dataset workflows |
 | `10_chat_models` | Chat-template handling, `model.chat()`, message-list inputs, chat-style steering |
+| `11_generation_interventions` | Steering/ablation active across every decode step, per-token lens trajectories, positional interventions, `model.intervene()` |
+| `12_circuit_discovery_and_lenses` | Attribution Patching, Edge Attribution Patching, EAP-driven `find_circuit`, tuned lens, max-activating examples |
 ---

interpkit-0.6.0/interpkit/__init__.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""interpkit — mech interp for any HuggingFace model."""
+from interpkit.core.arch import (
+    ArchFamily,
+    ArchInfo,
+    BlockSpec,
+    LayerInfo,
+    ModuleInfo,
+    resolve_arch,
+)
+from interpkit.core.exceptions import (
+    ArchitectureNotSupported,
+    AttentionBackendUnavailable,
+    InterpkitError,
+    LensPipelineMismatch,
+    OperationNotSupportedForArchitecture,
+    WrongInputType,
+)
+from interpkit.core.interventions import (
+    AblateIntervention,
+    CaptureProbe,
+    FnIntervention,
+    GenerationContext,
+    Intervention,
+    PatchIntervention,
+    SteerIntervention,
+    apply_interventions,
+)
+from interpkit.core.loader import load, load_module
+from interpkit.core.model import Model
+from interpkit.core.registry import register
+from interpkit.core.tl_compat import (
+    list_roundtrippable_hooks,
+    list_tl_hooks,
+    to_native_name,
+    to_tl_name,
+)
+def diff(model_a, model_b, input_data, *, save=None):
+    """Compare activations between two models on the same input."""
+    from interpkit.ops.diff import run_diff
+    return run_diff(model_a, model_b, input_data, save=save)
+__all__ = [
+    # Loaders
+    "load",
+    "load_module",
+    "Model",
+    # Architecture types
+    "ArchInfo",
+    "ArchFamily",
+    "BlockSpec",
+    "resolve_arch",
+    # Per-layer structural types
+    "LayerInfo",
+    "ModuleInfo",
+    # Exception types
+    "InterpkitError",
+    "ArchitectureNotSupported",
+    "AttentionBackendUnavailable",
+    "LensPipelineMismatch",
+    "OperationNotSupportedForArchitecture",
+    "WrongInputType",
+    # Interventions
+    "Intervention",
+    "SteerIntervention",
+    "AblateIntervention",
+    "PatchIntervention",
+    "FnIntervention",
+    "CaptureProbe",
+    "GenerationContext",
+    "apply_interventions",
+    # Operations
+    "register",
+    "diff",
+    # TL compat
+    "to_tl_name",
+    "to_native_name",
+    "list_tl_hooks",
+    "list_roundtrippable_hooks",
+]

{interpkit-0.4.0 → interpkit-0.6.0}/interpkit/__main__.py RENAMED Viewed

@@ -1,18 +1,22 @@
 """Entry point so ``python -m interpkit`` invokes the Typer CLI.
-Mirrors the ``[project.scripts] interpkit = "interpkit.cli.main:app"``
+Mirrors the ``[project.scripts] interpkit = "interpkit.cli.main:run"``
 console script declared in :file:`pyproject.toml`, so users without the
 console script on their ``$PATH`` (e.g. just-installed in a fresh
 environment, vendored copies, ad-hoc subprocess invocations) can still
 reach every CLI command via ``python -m interpkit ...``.
 """
-from interpkit.cli.main import app
+from interpkit.cli.main import run
 def main() -> None:
-    """Invoke the Typer app — separate function makes patching easier in tests."""
-    app()
+    """Invoke the CLI — separate function makes patching easier in tests.
+    Uses ``run`` (not ``app`` directly) so interpkit's fail-loud errors are
+    rendered as clean one-line messages instead of tracebacks.
+    """
+    run()
 if __name__ == "__main__":

interpkit 0.4.0__tar.gz → 0.6.0__tar.gz

interpkit 0.4.0tar.gz → 0.6.0tar.gz