PyPI - interpkit - Versions diffs - 0.3.0__tar.gz → 0.5.0__tar.gz - Mend

interpkit 0.3.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

{interpkit-0.3.0 → interpkit-0.5.0}/PKG-INFO +100 -9
{interpkit-0.3.0 → interpkit-0.5.0}/README.md +96 -7
interpkit-0.5.0/interpkit/__init__.py +65 -0
interpkit-0.5.0/interpkit/__main__.py +23 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/cli/main.py +274 -10
interpkit-0.5.0/interpkit/core/arch/__init__.py +102 -0
interpkit-0.5.0/interpkit/core/arch/blocks.py +257 -0
interpkit-0.5.0/interpkit/core/arch/family.py +421 -0
interpkit-0.5.0/interpkit/core/arch/heads.py +583 -0
interpkit-0.5.0/interpkit/core/arch/layers.py +462 -0
interpkit-0.5.0/interpkit/core/arch/names.py +60 -0
interpkit-0.5.0/interpkit/core/arch/probe.py +241 -0
interpkit-0.5.0/interpkit/core/arch/residual.py +653 -0
interpkit-0.5.0/interpkit/core/arch/resolve.py +679 -0
interpkit-0.5.0/interpkit/core/arch/tree.py +190 -0
interpkit-0.5.0/interpkit/core/arch/types.py +486 -0
interpkit-0.5.0/interpkit/core/enums.py +105 -0
interpkit-0.5.0/interpkit/core/exceptions.py +83 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/html.py +5 -2
interpkit-0.5.0/interpkit/core/inputs.py +447 -0
interpkit-0.5.0/interpkit/core/loader.py +704 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/model.py +537 -38
interpkit-0.5.0/interpkit/core/paths.py +71 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/render.py +74 -18
interpkit-0.5.0/interpkit/core/support_matrix.py +690 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/theme.py +11 -8
interpkit-0.5.0/interpkit/core/tl_compat.py +297 -0
interpkit-0.5.0/interpkit/ops/_atp.py +182 -0
interpkit-0.5.0/interpkit/ops/_hooks.py +233 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/ablate.py +14 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/activations.py +9 -1
interpkit-0.5.0/interpkit/ops/attention.py +334 -0
interpkit-0.5.0/interpkit/ops/attribute.py +844 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/batch.py +4 -4
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/circuits.py +221 -110
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/diff.py +22 -2
interpkit-0.5.0/interpkit/ops/dla.py +628 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/find_circuit.py +15 -17
interpkit-0.5.0/interpkit/ops/heads.py +282 -0
interpkit-0.5.0/interpkit/ops/lens.py +397 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/patch.py +113 -22
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/probe.py +14 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/report.py +55 -10
interpkit-0.5.0/interpkit/ops/sae.py +739 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/scan.py +28 -6
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/steer.py +59 -2
interpkit-0.5.0/interpkit/ops/trace.py +502 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit.egg-info/PKG-INFO +100 -9
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit.egg-info/SOURCES.txt +31 -2
interpkit-0.5.0/interpkit.egg-info/entry_points.txt +2 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit.egg-info/requires.txt +3 -1
{interpkit-0.3.0 → interpkit-0.5.0}/pyproject.toml +27 -4
interpkit-0.5.0/tests/test_archinfo_serialization.py +61 -0
interpkit-0.5.0/tests/test_attention.py +112 -0
interpkit-0.5.0/tests/test_audit_regressions.py +1891 -0
interpkit-0.5.0/tests/test_cache_invalidation.py +66 -0
interpkit-0.5.0/tests/test_capabilities.py +227 -0
interpkit-0.5.0/tests/test_chat.py +217 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_cli.py +77 -1
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_discovery.py +1 -1
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_discovery_units.py +21 -21
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_error_handling.py +11 -0
interpkit-0.5.0/tests/test_inputs.py +251 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_invariants.py +22 -8
interpkit-0.5.0/tests/test_lens.py +53 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_load_params.py +12 -2
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_multi_arch.py +12 -5
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_ops.py +6 -1
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_param_variants.py +4 -2
interpkit-0.5.0/tests/test_phase3_regressions.py +121 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_regressions.py +5 -2
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_render_internals.py +34 -6
interpkit-0.5.0/tests/test_resolver.py +268 -0
interpkit-0.5.0/tests/test_resolver_golden.py +131 -0
interpkit-0.5.0/tests/test_robustness_audit.py +790 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_sae.py +161 -1
interpkit-0.5.0/tests/test_seq2seq_contract.py +119 -0
interpkit-0.5.0/tests/test_steer.py +91 -0
interpkit-0.5.0/tests/test_trace.py +76 -0
interpkit-0.5.0/tests/test_validation.py +130 -0
interpkit-0.3.0/interpkit/__init__.py +0 -27
interpkit-0.3.0/interpkit/core/discovery.py +0 -810
interpkit-0.3.0/interpkit/core/inputs.py +0 -130
interpkit-0.3.0/interpkit/core/loader.py +0 -292
interpkit-0.3.0/interpkit/core/tl_compat.py +0 -174
interpkit-0.3.0/interpkit/ops/attention.py +0 -365
interpkit-0.3.0/interpkit/ops/attribute.py +0 -308
interpkit-0.3.0/interpkit/ops/dla.py +0 -488
interpkit-0.3.0/interpkit/ops/heads.py +0 -175
interpkit-0.3.0/interpkit/ops/lens.py +0 -243
interpkit-0.3.0/interpkit/ops/sae.py +0 -439
interpkit-0.3.0/interpkit/ops/trace.py +0 -349
interpkit-0.3.0/interpkit.egg-info/entry_points.txt +0 -2
interpkit-0.3.0/tests/test_attention.py +0 -44
interpkit-0.3.0/tests/test_lens.py +0 -25
interpkit-0.3.0/tests/test_steer.py +0 -30
interpkit-0.3.0/tests/test_trace.py +0 -35
{interpkit-0.3.0 → interpkit-0.5.0}/LICENSE +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/cli/__init__.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/__init__.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/cache.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/plot.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/core/registry.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/__init__.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit/ops/inspect.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit.egg-info/dependency_links.txt +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/interpkit.egg-info/top_level.txt +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/setup.cfg +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_ablate.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_activations.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_architectures.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_attribute.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_cache.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_diff.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_html.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_inspect.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_patch.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_plot_internals.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_plots.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_probe.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_registry.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_tl_compat.py +0 -0
{interpkit-0.3.0 → interpkit-0.5.0}/tests/test_tl_ops.py +0 -0

{interpkit-0.3.0 → interpkit-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: interpkit
-Version: 0.3.0
+Version: 0.5.0
 Summary: Mech interp for any HuggingFace model.
 Author: Davide Zani
 License-Expression: MIT
@@ -20,7 +20,8 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: torch>=2.1
-Requires-Dist: transformers>=4.36
+Requires-Dist: numpy>=1.24
+Requires-Dist: transformers<6,>=4.36
 Requires-Dist: safetensors>=0.4
 Requires-Dist: rich>=13.0
 Requires-Dist: rich-gradient>=0.3
@@ -28,6 +29,7 @@ Requires-Dist: typer>=0.9
 Requires-Dist: Pillow>=10.0
 Requires-Dist: matplotlib>=3.8
 Requires-Dist: huggingface-hub>=0.20
+Requires-Dist: sentencepiece>=0.1.99
 Provides-Extra: vision
 Requires-Dist: torchvision>=0.16; extra == "vision"
 Provides-Extra: probe
@@ -60,27 +62,55 @@ Dynamic: license-file
 Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
-InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
+InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
 ---
 ## Install
+We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
+Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
+```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv pip install interpkit
+# For linear probe support:
+uv pip install "interpkit[probe]"
+```
+Or with plain `venv` + `pip`:
 ```bash
+python3.11 -m venv .venv
+source .venv/bin/activate
 pip install interpkit
 # For linear probe support:
-pip install interpkit[probe]
+pip install "interpkit[probe]"
 ```
-Or install from source for development:
+Or with `conda`:
+```bash
+conda create -n interpkit python=3.11 -y
+conda activate interpkit
+pip install interpkit
+```
+Installing from source for development:
 ```bash
 git clone https://github.com/z4nix/interpkit.git
 cd interpkit
-pip install -e ".[dev]"
+uv venv --python 3.11 && source .venv/bin/activate
+uv pip install -e ".[dev]"
 ```
+> Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
 ---
 ## Quickstart
@@ -111,6 +141,25 @@ model = interpkit.load("google/vit-base-patch16-224")
 model = interpkit.load("bert-base-uncased")
 ```
+### Chat models
+Instruction-tuned models work too — interpkit applies the tokenizer's chat template automatically.
+```python
+chat = interpkit.load("HuggingFaceTB/SmolLM2-360M-Instruct")
+result = chat.chat("Write a haiku about cats.", max_new_tokens=64)
+print(result["response"])
+# Run any other op on the templated prompt
+chat.dla(result["prompt"])
+# Or pass a message list directly to any op
+chat.dla([{"role": "user", "content": "Capital of France?"}])
+```
+See [examples/10_chat_models.ipynb](examples/10_chat_models.ipynb) for a full walkthrough including chat-style steering.
 ---
 ## Operations
@@ -118,6 +167,7 @@ model = interpkit.load("bert-base-uncased")
 | Operation | What it does | Works on |
 |-----------|-------------|----------|
 | **`scan`** | One-command model overview: runs DLA, lens, attention, attribution and surfaces key findings | LMs |
+| **`chat`** | Send a message through the tokenizer's chat template and generate a reply | Chat / instruct LMs |
 | **`dla`** | Direct Logit Attribution — decompose output logits by head and MLP contribution; optionally decompose through an SAE into per-feature attributions | LMs |
 | `inspect` | Module tree with types, param counts, shapes | Any model |
 | `patch` | Activation patching at a module, head, or position | Any model |
@@ -328,10 +378,12 @@ results = model.dla_batch(["The capital of France is", "The CEO of Apple is"])
 ## Steering
 ```python
-vector = model.steer_vector("Love", "Hate", at="transformer.h.8")
+vector = model.steer_vector(" love", " hate", at="transformer.h.8")
 model.steer("The weather today is", vector=vector, at="transformer.h.8", scale=2.0)
 ```
+> Note the leading spaces. BPE tokenizers (GPT-2, Llama, ...) treat `" love"` and `"love"` as different tokens, and the leading-space variant is the one the model actually sees in normal text. interpkit prints a warning if you forget.
 ## Linear Probe
 ```python
@@ -422,7 +474,7 @@ interpkit lens gpt2 "The capital of France is"
 interpkit lens gpt2 "The capital of France is" --position -1
 interpkit attention gpt2 "The capital of France is" --layer 8 --save attention.png
 interpkit attribute gpt2 "The capital of France is"
-interpkit steer gpt2 "The weather is" --positive Love --negative Hate --at transformer.h.8
+interpkit steer gpt2 "The weather is" --positive " love" --negative " hate" --at transformer.h.8
 interpkit ablate gpt2 "The capital of France is" --at transformer.h.8.mlp
 interpkit decompose gpt2 "The capital of France is"
 interpkit diff gpt2 my-finetuned-gpt2 "The capital of France is" --save diff.png
@@ -430,6 +482,10 @@ interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae jb
 interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae ./my_sae.safetensors
 interpkit dla gpt2 "The capital of France is" --sae jbloom/GPT2-Small-SAEs-Reformatted --sae-at transformer.h.11.attn
+# Chat / instruct models — applies the tokenizer's chat template automatically
+interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Write a haiku about cats." --max-new-tokens 64
+interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "What is 2+2?" --system "You are terse." --show-prompt
 # Interactive HTML output
 interpkit attention gpt2 "hello world" --html attention.html
 interpkit trace gpt2 --clean "...Paris..." --corrupted "...Rome..." --html trace.html
@@ -439,7 +495,17 @@ interpkit attribute gpt2 "The capital of France is" --html attribution.html
 interpkit attribute microsoft/resnet-50 cat.jpg --target 281
 ```
-Run `interpkit` with no arguments for a full command reference.
+Run `interpkit` with no arguments for a full command reference, or
+`interpkit --extensive` for a beginner-friendly walkthrough of every command.
+If the `interpkit` console script isn't on your `PATH` (e.g. fresh
+environments, sandboxed installs, or running from a checkout without
+re-installing), every command also works as `python -m interpkit ...`:
+```bash
+python -m interpkit scan gpt2 "The capital of France is"
+python -m interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Hello!"
+```
 ---
@@ -486,6 +552,30 @@ model.trace(input_a, input_b, top_k=10)
 ---
+## Known limitations
+- **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
+  HuggingFace transformers' relative-position-bias path triggers on
+  forward hooks for any DeBERTa-v3 model (e.g.
+  `microsoft/deberta-v3-small`). interpkit detects this at load time
+  and gates `trace`, `decompose`, `attribute`, `head_activations`,
+  `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
+  `OperationNotSupportedForArchitecture` rather than the cryptic
+  upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
+  `attention` still work. Use `bert`, `roberta`, `electra`, or
+  `albert` for the gated ops on encoder-only inputs.
+- **Integrated-gradients completeness on some modern decoders.** On
+  Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
+  sum does not converge to model-output completeness even at large
+  `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
+  reliable as a token-importance **ranking** but cannot be interpreted as
+  additive contribution **magnitudes** on these models. `attribute()`
+  reports this programmatically: `result["interpretation"]` is
+  `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
+  which are saliency methods), versus `"quantitative"` when IG completeness
+  holds. Branch on that field rather than parsing the warning text.
 ## Examples
 See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -501,6 +591,7 @@ See the [`examples/`](examples/) directory for Jupyter notebooks:
 | `07_vision_models` | ResNet/ViT attribution, ablation, activations |
 | `08_dla_and_circuits` | DLA, head activations, residual decomposition, OV/QK analysis, composition, circuit discovery |
 | `09_scan_and_batch` | Auto-scan, batch operations, dataset workflows |
+| `10_chat_models` | Chat-template handling, `model.chat()`, message-list inputs, chat-style steering |
 ---

{interpkit-0.3.0 → interpkit-0.5.0}/README.md RENAMED Viewed

@@ -12,27 +12,55 @@
 Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
-InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
+InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
 ---
 ## Install
+We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
+Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
+```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv pip install interpkit
+# For linear probe support:
+uv pip install "interpkit[probe]"
+```
+Or with plain `venv` + `pip`:
 ```bash
+python3.11 -m venv .venv
+source .venv/bin/activate
 pip install interpkit
 # For linear probe support:
-pip install interpkit[probe]
+pip install "interpkit[probe]"
 ```
-Or install from source for development:
+Or with `conda`:
+```bash
+conda create -n interpkit python=3.11 -y
+conda activate interpkit
+pip install interpkit
+```
+Installing from source for development:
 ```bash
 git clone https://github.com/z4nix/interpkit.git
 cd interpkit
-pip install -e ".[dev]"
+uv venv --python 3.11 && source .venv/bin/activate
+uv pip install -e ".[dev]"
 ```
+> Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
 ---
 ## Quickstart
@@ -63,6 +91,25 @@ model = interpkit.load("google/vit-base-patch16-224")
 model = interpkit.load("bert-base-uncased")
 ```
+### Chat models
+Instruction-tuned models work too — interpkit applies the tokenizer's chat template automatically.
+```python
+chat = interpkit.load("HuggingFaceTB/SmolLM2-360M-Instruct")
+result = chat.chat("Write a haiku about cats.", max_new_tokens=64)
+print(result["response"])
+# Run any other op on the templated prompt
+chat.dla(result["prompt"])
+# Or pass a message list directly to any op
+chat.dla([{"role": "user", "content": "Capital of France?"}])
+```
+See [examples/10_chat_models.ipynb](examples/10_chat_models.ipynb) for a full walkthrough including chat-style steering.
 ---
 ## Operations
@@ -70,6 +117,7 @@ model = interpkit.load("bert-base-uncased")
 | Operation | What it does | Works on |
 |-----------|-------------|----------|
 | **`scan`** | One-command model overview: runs DLA, lens, attention, attribution and surfaces key findings | LMs |
+| **`chat`** | Send a message through the tokenizer's chat template and generate a reply | Chat / instruct LMs |
 | **`dla`** | Direct Logit Attribution — decompose output logits by head and MLP contribution; optionally decompose through an SAE into per-feature attributions | LMs |
 | `inspect` | Module tree with types, param counts, shapes | Any model |
 | `patch` | Activation patching at a module, head, or position | Any model |
@@ -280,10 +328,12 @@ results = model.dla_batch(["The capital of France is", "The CEO of Apple is"])
 ## Steering
 ```python
-vector = model.steer_vector("Love", "Hate", at="transformer.h.8")
+vector = model.steer_vector(" love", " hate", at="transformer.h.8")
 model.steer("The weather today is", vector=vector, at="transformer.h.8", scale=2.0)
 ```
+> Note the leading spaces. BPE tokenizers (GPT-2, Llama, ...) treat `" love"` and `"love"` as different tokens, and the leading-space variant is the one the model actually sees in normal text. interpkit prints a warning if you forget.
 ## Linear Probe
 ```python
@@ -374,7 +424,7 @@ interpkit lens gpt2 "The capital of France is"
 interpkit lens gpt2 "The capital of France is" --position -1
 interpkit attention gpt2 "The capital of France is" --layer 8 --save attention.png
 interpkit attribute gpt2 "The capital of France is"
-interpkit steer gpt2 "The weather is" --positive Love --negative Hate --at transformer.h.8
+interpkit steer gpt2 "The weather is" --positive " love" --negative " hate" --at transformer.h.8
 interpkit ablate gpt2 "The capital of France is" --at transformer.h.8.mlp
 interpkit decompose gpt2 "The capital of France is"
 interpkit diff gpt2 my-finetuned-gpt2 "The capital of France is" --save diff.png
@@ -382,6 +432,10 @@ interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae jb
 interpkit features gpt2 "The capital of France is" --at transformer.h.8 --sae ./my_sae.safetensors
 interpkit dla gpt2 "The capital of France is" --sae jbloom/GPT2-Small-SAEs-Reformatted --sae-at transformer.h.11.attn
+# Chat / instruct models — applies the tokenizer's chat template automatically
+interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Write a haiku about cats." --max-new-tokens 64
+interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "What is 2+2?" --system "You are terse." --show-prompt
 # Interactive HTML output
 interpkit attention gpt2 "hello world" --html attention.html
 interpkit trace gpt2 --clean "...Paris..." --corrupted "...Rome..." --html trace.html
@@ -391,7 +445,17 @@ interpkit attribute gpt2 "The capital of France is" --html attribution.html
 interpkit attribute microsoft/resnet-50 cat.jpg --target 281
 ```
-Run `interpkit` with no arguments for a full command reference.
+Run `interpkit` with no arguments for a full command reference, or
+`interpkit --extensive` for a beginner-friendly walkthrough of every command.
+If the `interpkit` console script isn't on your `PATH` (e.g. fresh
+environments, sandboxed installs, or running from a checkout without
+re-installing), every command also works as `python -m interpkit ...`:
+```bash
+python -m interpkit scan gpt2 "The capital of France is"
+python -m interpkit chat HuggingFaceTB/SmolLM2-360M-Instruct "Hello!"
+```
 ---
@@ -438,6 +502,30 @@ model.trace(input_a, input_b, top_k=10)
 ---
+## Known limitations
+- **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
+  HuggingFace transformers' relative-position-bias path triggers on
+  forward hooks for any DeBERTa-v3 model (e.g.
+  `microsoft/deberta-v3-small`). interpkit detects this at load time
+  and gates `trace`, `decompose`, `attribute`, `head_activations`,
+  `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
+  `OperationNotSupportedForArchitecture` rather than the cryptic
+  upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
+  `attention` still work. Use `bert`, `roberta`, `electra`, or
+  `albert` for the gated ops on encoder-only inputs.
+- **Integrated-gradients completeness on some modern decoders.** On
+  Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
+  sum does not converge to model-output completeness even at large
+  `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
+  reliable as a token-importance **ranking** but cannot be interpreted as
+  additive contribution **magnitudes** on these models. `attribute()`
+  reports this programmatically: `result["interpretation"]` is
+  `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
+  which are saliency methods), versus `"quantitative"` when IG completeness
+  holds. Branch on that field rather than parsing the warning text.
 ## Examples
 See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -453,6 +541,7 @@ See the [`examples/`](examples/) directory for Jupyter notebooks:
 | `07_vision_models` | ResNet/ViT attribution, ablation, activations |
 | `08_dla_and_circuits` | DLA, head activations, residual decomposition, OV/QK analysis, composition, circuit discovery |
 | `09_scan_and_batch` | Auto-scan, batch operations, dataset workflows |
+| `10_chat_models` | Chat-template handling, `model.chat()`, message-list inputs, chat-style steering |
 ---

interpkit-0.5.0/interpkit/__init__.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""interpkit — mech interp for any HuggingFace model."""
+from interpkit.core.arch import (
+    ArchFamily,
+    ArchInfo,
+    BlockSpec,
+    LayerInfo,
+    ModuleInfo,
+    resolve_arch,
+)
+from interpkit.core.exceptions import (
+    ArchitectureNotSupported,
+    AttentionBackendUnavailable,
+    InterpkitError,
+    LensPipelineMismatch,
+    OperationNotSupportedForArchitecture,
+    WrongInputType,
+)
+from interpkit.core.loader import load, load_module
+from interpkit.core.model import Model
+from interpkit.core.registry import register
+from interpkit.core.tl_compat import (
+    list_roundtrippable_hooks,
+    list_tl_hooks,
+    to_native_name,
+    to_tl_name,
+)
+def diff(model_a, model_b, input_data, *, save=None):
+    """Compare activations between two models on the same input."""
+    from interpkit.ops.diff import run_diff
+    return run_diff(model_a, model_b, input_data, save=save)
+__all__ = [
+    # Loaders
+    "load",
+    "load_module",
+    "Model",
+    # Architecture types
+    "ArchInfo",
+    "ArchFamily",
+    "BlockSpec",
+    "resolve_arch",
+    # Per-layer structural types
+    "LayerInfo",
+    "ModuleInfo",
+    # Exception types
+    "InterpkitError",
+    "ArchitectureNotSupported",
+    "AttentionBackendUnavailable",
+    "LensPipelineMismatch",
+    "OperationNotSupportedForArchitecture",
+    "WrongInputType",
+    # Operations
+    "register",
+    "diff",
+    # TL compat
+    "to_tl_name",
+    "to_native_name",
+    "list_tl_hooks",
+    "list_roundtrippable_hooks",
+]

interpkit-0.5.0/interpkit/__main__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Entry point so ``python -m interpkit`` invokes the Typer CLI.
+Mirrors the ``[project.scripts] interpkit = "interpkit.cli.main:run"``
+console script declared in :file:`pyproject.toml`, so users without the
+console script on their ``$PATH`` (e.g. just-installed in a fresh
+environment, vendored copies, ad-hoc subprocess invocations) can still
+reach every CLI command via ``python -m interpkit ...``.
+"""
+from interpkit.cli.main import run
+def main() -> None:
+    """Invoke the CLI — separate function makes patching easier in tests.
+    Uses ``run`` (not ``app`` directly) so interpkit's fail-loud errors are
+    rendered as clean one-line messages instead of tracebacks.
+    """
+    run()
+if __name__ == "__main__":
+    main()

interpkit 0.3.0__tar.gz → 0.5.0__tar.gz

interpkit 0.3.0tar.gz → 0.5.0tar.gz