interpkit 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {interpkit-0.4.0 → interpkit-0.5.0}/PKG-INFO +60 -6
  2. {interpkit-0.4.0 → interpkit-0.5.0}/README.md +56 -4
  3. interpkit-0.5.0/interpkit/__init__.py +65 -0
  4. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/__main__.py +8 -4
  5. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/cli/main.py +164 -4
  6. interpkit-0.5.0/interpkit/core/arch/__init__.py +102 -0
  7. interpkit-0.5.0/interpkit/core/arch/blocks.py +257 -0
  8. interpkit-0.5.0/interpkit/core/arch/family.py +421 -0
  9. interpkit-0.5.0/interpkit/core/arch/heads.py +583 -0
  10. interpkit-0.5.0/interpkit/core/arch/layers.py +462 -0
  11. interpkit-0.5.0/interpkit/core/arch/names.py +60 -0
  12. interpkit-0.5.0/interpkit/core/arch/probe.py +241 -0
  13. interpkit-0.5.0/interpkit/core/arch/residual.py +653 -0
  14. interpkit-0.5.0/interpkit/core/arch/resolve.py +679 -0
  15. interpkit-0.5.0/interpkit/core/arch/tree.py +190 -0
  16. interpkit-0.5.0/interpkit/core/arch/types.py +486 -0
  17. interpkit-0.5.0/interpkit/core/enums.py +105 -0
  18. interpkit-0.5.0/interpkit/core/exceptions.py +83 -0
  19. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/html.py +5 -2
  20. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/inputs.py +44 -0
  21. interpkit-0.5.0/interpkit/core/loader.py +704 -0
  22. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/model.py +388 -34
  23. interpkit-0.5.0/interpkit/core/paths.py +71 -0
  24. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/render.py +63 -7
  25. interpkit-0.5.0/interpkit/core/support_matrix.py +690 -0
  26. interpkit-0.5.0/interpkit/core/tl_compat.py +297 -0
  27. interpkit-0.5.0/interpkit/ops/_atp.py +182 -0
  28. interpkit-0.5.0/interpkit/ops/_hooks.py +233 -0
  29. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/ablate.py +14 -0
  30. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/activations.py +9 -1
  31. interpkit-0.5.0/interpkit/ops/attention.py +334 -0
  32. interpkit-0.5.0/interpkit/ops/attribute.py +844 -0
  33. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/circuits.py +219 -108
  34. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/diff.py +22 -2
  35. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/dla.py +309 -190
  36. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/find_circuit.py +6 -12
  37. interpkit-0.5.0/interpkit/ops/heads.py +282 -0
  38. interpkit-0.5.0/interpkit/ops/lens.py +397 -0
  39. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/patch.py +113 -22
  40. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/probe.py +14 -0
  41. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/sae.py +142 -22
  42. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/steer.py +11 -0
  43. interpkit-0.5.0/interpkit/ops/trace.py +502 -0
  44. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit.egg-info/PKG-INFO +60 -6
  45. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit.egg-info/SOURCES.txt +27 -2
  46. interpkit-0.5.0/interpkit.egg-info/entry_points.txt +2 -0
  47. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit.egg-info/requires.txt +3 -1
  48. {interpkit-0.4.0 → interpkit-0.5.0}/pyproject.toml +27 -4
  49. interpkit-0.5.0/tests/test_archinfo_serialization.py +61 -0
  50. interpkit-0.5.0/tests/test_attention.py +112 -0
  51. interpkit-0.5.0/tests/test_audit_regressions.py +1891 -0
  52. interpkit-0.5.0/tests/test_cache_invalidation.py +66 -0
  53. interpkit-0.5.0/tests/test_capabilities.py +227 -0
  54. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_cli.py +77 -1
  55. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_discovery.py +1 -1
  56. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_discovery_units.py +21 -21
  57. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_error_handling.py +11 -0
  58. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_invariants.py +22 -8
  59. interpkit-0.5.0/tests/test_lens.py +53 -0
  60. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_load_params.py +12 -2
  61. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_multi_arch.py +12 -5
  62. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_ops.py +6 -1
  63. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_param_variants.py +4 -2
  64. interpkit-0.5.0/tests/test_phase3_regressions.py +121 -0
  65. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_regressions.py +5 -2
  66. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_render_internals.py +34 -6
  67. interpkit-0.5.0/tests/test_resolver.py +268 -0
  68. interpkit-0.5.0/tests/test_resolver_golden.py +131 -0
  69. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_robustness_audit.py +56 -29
  70. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_sae.py +6 -1
  71. interpkit-0.5.0/tests/test_seq2seq_contract.py +119 -0
  72. interpkit-0.5.0/tests/test_trace.py +76 -0
  73. interpkit-0.5.0/tests/test_validation.py +130 -0
  74. interpkit-0.4.0/interpkit/__init__.py +0 -27
  75. interpkit-0.4.0/interpkit/core/discovery.py +0 -810
  76. interpkit-0.4.0/interpkit/core/loader.py +0 -322
  77. interpkit-0.4.0/interpkit/core/tl_compat.py +0 -174
  78. interpkit-0.4.0/interpkit/ops/attention.py +0 -365
  79. interpkit-0.4.0/interpkit/ops/attribute.py +0 -377
  80. interpkit-0.4.0/interpkit/ops/heads.py +0 -175
  81. interpkit-0.4.0/interpkit/ops/lens.py +0 -243
  82. interpkit-0.4.0/interpkit/ops/trace.py +0 -349
  83. interpkit-0.4.0/interpkit.egg-info/entry_points.txt +0 -2
  84. interpkit-0.4.0/tests/test_attention.py +0 -44
  85. interpkit-0.4.0/tests/test_lens.py +0 -25
  86. interpkit-0.4.0/tests/test_trace.py +0 -35
  87. {interpkit-0.4.0 → interpkit-0.5.0}/LICENSE +0 -0
  88. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/cli/__init__.py +0 -0
  89. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/__init__.py +0 -0
  90. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/cache.py +0 -0
  91. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/plot.py +0 -0
  92. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/registry.py +0 -0
  93. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/core/theme.py +0 -0
  94. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/__init__.py +0 -0
  95. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/batch.py +0 -0
  96. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/inspect.py +0 -0
  97. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/report.py +0 -0
  98. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit/ops/scan.py +0 -0
  99. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit.egg-info/dependency_links.txt +0 -0
  100. {interpkit-0.4.0 → interpkit-0.5.0}/interpkit.egg-info/top_level.txt +0 -0
  101. {interpkit-0.4.0 → interpkit-0.5.0}/setup.cfg +0 -0
  102. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_ablate.py +0 -0
  103. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_activations.py +0 -0
  104. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_architectures.py +0 -0
  105. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_attribute.py +0 -0
  106. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_cache.py +0 -0
  107. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_chat.py +0 -0
  108. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_diff.py +0 -0
  109. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_html.py +0 -0
  110. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_inputs.py +0 -0
  111. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_inspect.py +0 -0
  112. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_patch.py +0 -0
  113. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_plot_internals.py +0 -0
  114. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_plots.py +0 -0
  115. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_probe.py +0 -0
  116. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_registry.py +0 -0
  117. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_steer.py +0 -0
  118. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_tl_compat.py +0 -0
  119. {interpkit-0.4.0 → interpkit-0.5.0}/tests/test_tl_ops.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: interpkit
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Mech interp for any HuggingFace model.
5
5
  Author: Davide Zani
6
6
  License-Expression: MIT
@@ -20,7 +20,8 @@ Requires-Python: >=3.10
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  Requires-Dist: torch>=2.1
23
- Requires-Dist: transformers>=4.36
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: transformers<6,>=4.36
24
25
  Requires-Dist: safetensors>=0.4
25
26
  Requires-Dist: rich>=13.0
26
27
  Requires-Dist: rich-gradient>=0.3
@@ -28,6 +29,7 @@ Requires-Dist: typer>=0.9
28
29
  Requires-Dist: Pillow>=10.0
29
30
  Requires-Dist: matplotlib>=3.8
30
31
  Requires-Dist: huggingface-hub>=0.20
32
+ Requires-Dist: sentencepiece>=0.1.99
31
33
  Provides-Extra: vision
32
34
  Requires-Dist: torchvision>=0.16; extra == "vision"
33
35
  Provides-Extra: probe
@@ -60,27 +62,55 @@ Dynamic: license-file
60
62
 
61
63
  Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
62
64
 
63
- InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
65
+ InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
64
66
 
65
67
  ---
66
68
 
67
69
  ## Install
68
70
 
71
+ We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
72
+
73
+ Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
74
+
75
+ ```bash
76
+ uv venv --python 3.11
77
+ source .venv/bin/activate
78
+ uv pip install interpkit
79
+
80
+ # For linear probe support:
81
+ uv pip install "interpkit[probe]"
82
+ ```
83
+
84
+ Or with plain `venv` + `pip`:
85
+
69
86
  ```bash
87
+ python3.11 -m venv .venv
88
+ source .venv/bin/activate
70
89
  pip install interpkit
71
90
 
72
91
  # For linear probe support:
73
- pip install interpkit[probe]
92
+ pip install "interpkit[probe]"
74
93
  ```
75
94
 
76
- Or install from source for development:
95
+ Or with `conda`:
96
+
97
+ ```bash
98
+ conda create -n interpkit python=3.11 -y
99
+ conda activate interpkit
100
+ pip install interpkit
101
+ ```
102
+
103
+ Installing from source for development:
77
104
 
78
105
  ```bash
79
106
  git clone https://github.com/z4nix/interpkit.git
80
107
  cd interpkit
81
- pip install -e ".[dev]"
108
+ uv venv --python 3.11 && source .venv/bin/activate
109
+ uv pip install -e ".[dev]"
82
110
  ```
83
111
 
112
+ > Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
113
+
84
114
  ---
85
115
 
86
116
  ## Quickstart
@@ -522,6 +552,30 @@ model.trace(input_a, input_b, top_k=10)
522
552
 
523
553
  ---
524
554
 
555
+ ## Known limitations
556
+
557
+ - **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
558
+ HuggingFace transformers' relative-position-bias path triggers on
559
+ forward hooks for any DeBERTa-v3 model (e.g.
560
+ `microsoft/deberta-v3-small`). interpkit detects this at load time
561
+ and gates `trace`, `decompose`, `attribute`, `head_activations`,
562
+ `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
563
+ `OperationNotSupportedForArchitecture` rather than the cryptic
564
+ upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
565
+ `attention` still work. Use `bert`, `roberta`, `electra`, or
566
+ `albert` for the gated ops on encoder-only inputs.
567
+
568
+ - **Integrated-gradients completeness on some modern decoders.** On
569
+ Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
570
+ sum does not converge to model-output completeness even at large
571
+ `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
572
+ reliable as a token-importance **ranking** but cannot be interpreted as
573
+ additive contribution **magnitudes** on these models. `attribute()`
574
+ reports this programmatically: `result["interpretation"]` is
575
+ `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
576
+ which are saliency methods), versus `"quantitative"` when IG completeness
577
+ holds. Branch on that field rather than parsing the warning text.
578
+
525
579
  ## Examples
526
580
 
527
581
  See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -12,27 +12,55 @@
12
12
 
13
13
  Mechanistic interpretability tooling today is fragmented. Each library supports a narrow set of architectures, and moving to a different model family usually means rewriting hook code from scratch.
14
14
 
15
- InterpKit provides a single, consistent interface for mech interp operations across any HuggingFace model — transformers, SSMs, vision models, and more — with zero annotation required.
15
+ InterpKit provides a single, consistent interface for mech interp operations across a wide range of HuggingFace models — transformers, SSMs, vision models, and more — with automatic architecture discovery and little to no manual setup.
16
16
 
17
17
  ---
18
18
 
19
19
  ## Install
20
20
 
21
+ We strongly recommend installing into an isolated environment so InterpKit's pinned dependencies (e.g. `typer`, `rich`, `transformers`) don't clash with whatever you already have installed globally
22
+
23
+ Using [uv](https://docs.astral.sh/uv/) (recommended — fast, handles Python versions for you):
24
+
25
+ ```bash
26
+ uv venv --python 3.11
27
+ source .venv/bin/activate
28
+ uv pip install interpkit
29
+
30
+ # For linear probe support:
31
+ uv pip install "interpkit[probe]"
32
+ ```
33
+
34
+ Or with plain `venv` + `pip`:
35
+
21
36
  ```bash
37
+ python3.11 -m venv .venv
38
+ source .venv/bin/activate
22
39
  pip install interpkit
23
40
 
24
41
  # For linear probe support:
25
- pip install interpkit[probe]
42
+ pip install "interpkit[probe]"
26
43
  ```
27
44
 
28
- Or install from source for development:
45
+ Or with `conda`:
46
+
47
+ ```bash
48
+ conda create -n interpkit python=3.11 -y
49
+ conda activate interpkit
50
+ pip install interpkit
51
+ ```
52
+
53
+ Installing from source for development:
29
54
 
30
55
  ```bash
31
56
  git clone https://github.com/z4nix/interpkit.git
32
57
  cd interpkit
33
- pip install -e ".[dev]"
58
+ uv venv --python 3.11 && source .venv/bin/activate
59
+ uv pip install -e ".[dev]"
34
60
  ```
35
61
 
62
+ > Python 3.10+ is required. If you must install into your system Python, use `pip install --user interpkit` and be aware that conflicting versions of `typer`, `rich`, or `transformers` already on your machine can break the CLI.
63
+
36
64
  ---
37
65
 
38
66
  ## Quickstart
@@ -474,6 +502,30 @@ model.trace(input_a, input_b, top_k=10)
474
502
 
475
503
  ---
476
504
 
505
+ ## Known limitations
506
+
507
+ - **DeBERTa-v3 (DisentangledSelfAttention).** A known broadcast bug in
508
+ HuggingFace transformers' relative-position-bias path triggers on
509
+ forward hooks for any DeBERTa-v3 model (e.g.
510
+ `microsoft/deberta-v3-small`). interpkit detects this at load time
511
+ and gates `trace`, `decompose`, `attribute`, `head_activations`,
512
+ `steer`, `probe`, `diff`, `ov_scores`, `qk_scores` with a clean
513
+ `OperationNotSupportedForArchitecture` rather than the cryptic
514
+ upstream `RuntimeError: tensor (512) must match (7)`. `lens` and
515
+ `attention` still work. Use `bert`, `roberta`, `electra`, or
516
+ `albert` for the gated ops on encoder-only inputs.
517
+
518
+ - **Integrated-gradients completeness on some modern decoders.** On
519
+ Qwen2/Qwen2.5/Qwen3 and SmolLM-family models, the trapezoidal Riemann
520
+ sum does not converge to model-output completeness even at large
521
+ `n_steps` (the P0b/N-008 empirical finding). Per-token IG scores remain
522
+ reliable as a token-importance **ranking** but cannot be interpreted as
523
+ additive contribution **magnitudes** on these models. `attribute()`
524
+ reports this programmatically: `result["interpretation"]` is
525
+ `"ranking_only"` in that case (and for `gradient` / `gradient_x_input`,
526
+ which are saliency methods), versus `"quantitative"` when IG completeness
527
+ holds. Branch on that field rather than parsing the warning text.
528
+
477
529
  ## Examples
478
530
 
479
531
  See the [`examples/`](examples/) directory for Jupyter notebooks:
@@ -0,0 +1,65 @@
1
+ """interpkit — mech interp for any HuggingFace model."""
2
+
3
+ from interpkit.core.arch import (
4
+ ArchFamily,
5
+ ArchInfo,
6
+ BlockSpec,
7
+ LayerInfo,
8
+ ModuleInfo,
9
+ resolve_arch,
10
+ )
11
+ from interpkit.core.exceptions import (
12
+ ArchitectureNotSupported,
13
+ AttentionBackendUnavailable,
14
+ InterpkitError,
15
+ LensPipelineMismatch,
16
+ OperationNotSupportedForArchitecture,
17
+ WrongInputType,
18
+ )
19
+ from interpkit.core.loader import load, load_module
20
+ from interpkit.core.model import Model
21
+ from interpkit.core.registry import register
22
+ from interpkit.core.tl_compat import (
23
+ list_roundtrippable_hooks,
24
+ list_tl_hooks,
25
+ to_native_name,
26
+ to_tl_name,
27
+ )
28
+
29
+
30
+ def diff(model_a, model_b, input_data, *, save=None):
31
+ """Compare activations between two models on the same input."""
32
+ from interpkit.ops.diff import run_diff
33
+
34
+ return run_diff(model_a, model_b, input_data, save=save)
35
+
36
+
37
+ __all__ = [
38
+ # Loaders
39
+ "load",
40
+ "load_module",
41
+ "Model",
42
+ # Architecture types
43
+ "ArchInfo",
44
+ "ArchFamily",
45
+ "BlockSpec",
46
+ "resolve_arch",
47
+ # Per-layer structural types
48
+ "LayerInfo",
49
+ "ModuleInfo",
50
+ # Exception types
51
+ "InterpkitError",
52
+ "ArchitectureNotSupported",
53
+ "AttentionBackendUnavailable",
54
+ "LensPipelineMismatch",
55
+ "OperationNotSupportedForArchitecture",
56
+ "WrongInputType",
57
+ # Operations
58
+ "register",
59
+ "diff",
60
+ # TL compat
61
+ "to_tl_name",
62
+ "to_native_name",
63
+ "list_tl_hooks",
64
+ "list_roundtrippable_hooks",
65
+ ]
@@ -1,18 +1,22 @@
1
1
  """Entry point so ``python -m interpkit`` invokes the Typer CLI.
2
2
 
3
- Mirrors the ``[project.scripts] interpkit = "interpkit.cli.main:app"``
3
+ Mirrors the ``[project.scripts] interpkit = "interpkit.cli.main:run"``
4
4
  console script declared in :file:`pyproject.toml`, so users without the
5
5
  console script on their ``$PATH`` (e.g. just-installed in a fresh
6
6
  environment, vendored copies, ad-hoc subprocess invocations) can still
7
7
  reach every CLI command via ``python -m interpkit ...``.
8
8
  """
9
9
 
10
- from interpkit.cli.main import app
10
+ from interpkit.cli.main import run
11
11
 
12
12
 
13
13
  def main() -> None:
14
- """Invoke the Typer app — separate function makes patching easier in tests."""
15
- app()
14
+ """Invoke the CLI — separate function makes patching easier in tests.
15
+
16
+ Uses ``run`` (not ``app`` directly) so interpkit's fail-loud errors are
17
+ rendered as clean one-line messages instead of tracebacks.
18
+ """
19
+ run()
16
20
 
17
21
 
18
22
  if __name__ == "__main__":
@@ -1,8 +1,17 @@
1
- """CLI entry point — Typer app with all interpkit commands."""
1
+ """CLI entry point — Typer app with all interpkit commands.
2
+
3
+ When ``--format json`` is set, all status / progress output (rich panels,
4
+ load progress bars, tqdm) is silenced or routed to stderr (F-023). The
5
+ stdout stream stays clean JSON for programmatic consumers — pre-1.0
6
+ ``--format json`` interleaved rich panels and tqdm bars with the JSON
7
+ block, breaking ``json.loads(p.stdout)`` for every CLI invocation.
8
+ """
2
9
 
3
10
  from __future__ import annotations
4
11
 
5
12
  import json as _json
13
+ import os as _os
14
+ import sys as _sys
6
15
  from importlib.metadata import version as _pkg_version
7
16
 
8
17
  import typer
@@ -33,11 +42,85 @@ app = typer.Typer(
33
42
  no_args_is_help=False,
34
43
  add_completion=False,
35
44
  rich_markup_mode="rich",
45
+ # interpkit's own errors (OperationNotSupportedForArchitecture,
46
+ # WrongInputType, LensPipelineMismatch, …) are deliberate, well-messaged,
47
+ # user-facing failures — not bugs. Disable Typer's rich-traceback so they
48
+ # don't reach the user as a scary stack trace; ``run()`` renders them as a
49
+ # clean one-line error instead.
50
+ pretty_exceptions_enable=False,
36
51
  )
52
+ # F-023: console object — production code should call _make_console() so
53
+ # JSON-mode stderr routing happens uniformly. The module-level singleton
54
+ # is reassigned by main() once --format is parsed.
37
55
  console = Console()
38
56
 
39
57
  _output_format: str = "rich"
40
58
 
59
+
60
+ def _make_console() -> Console:
61
+ """Construct a Console that respects the active output format.
62
+
63
+ In ``json`` mode, status / progress output goes to stderr so stdout
64
+ remains clean JSON. In ``rich`` mode, behaves identically to the
65
+ pre-1.0 module-level singleton.
66
+ """
67
+ if _output_format == "json":
68
+ return Console(file=_sys.stderr)
69
+ return Console()
70
+
71
+
72
+ def _silence_third_party_loaders() -> None:
73
+ """Mute transformers / tqdm / huggingface chatter in JSON mode.
74
+
75
+ Pre-1.0 ``--format json`` had model-loading tqdm bars and the
76
+ "Loaded ... on cpu" rich line interleaved with the actual JSON
77
+ payload (F-023). Programmatic consumers couldn't json.loads(stdout).
78
+
79
+ Also re-binds every op-module console to write to stderr so rich
80
+ op-level rendering doesn't pollute the JSON stream.
81
+ """
82
+ if _output_format != "json":
83
+ return
84
+ # Silence HF transformers progress / warnings to stderr-only.
85
+ try:
86
+ from transformers import logging as _hf_logging
87
+ _hf_logging.set_verbosity_error()
88
+ _hf_logging.disable_progress_bar()
89
+ except (ImportError, AttributeError):
90
+ pass
91
+ # Silence raw tqdm.
92
+ _os.environ["TRANSFORMERS_VERBOSITY"] = "error"
93
+ _os.environ["TQDM_DISABLE"] = "1"
94
+ _os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
95
+
96
+ # Re-bind op-module consoles to stderr so renders don't pollute stdout.
97
+ import importlib
98
+
99
+ _stderr_console = Console(file=_sys.stderr)
100
+ for mod_name in (
101
+ "interpkit.core.render",
102
+ "interpkit.core.plot",
103
+ "interpkit.ops.attention",
104
+ "interpkit.ops.attribute",
105
+ "interpkit.ops.batch",
106
+ "interpkit.ops.circuits",
107
+ "interpkit.ops.diff",
108
+ "interpkit.ops.find_circuit",
109
+ "interpkit.ops.lens",
110
+ "interpkit.ops.probe",
111
+ "interpkit.ops.report",
112
+ "interpkit.ops.sae",
113
+ "interpkit.ops.scan",
114
+ "interpkit.ops.steer",
115
+ "interpkit.ops.trace",
116
+ ):
117
+ try:
118
+ mod = importlib.import_module(mod_name)
119
+ if hasattr(mod, "console"):
120
+ mod.console = _stderr_console # type: ignore[attr-defined]
121
+ except ImportError:
122
+ continue
123
+
41
124
  _VERSION = _pkg_version("interpkit")
42
125
 
43
126
 
@@ -63,8 +146,17 @@ def _load_model(
63
146
  ):
64
147
  from interpkit.core.model import load
65
148
 
149
+ # F-007 fix: don't forward dtype=None — load() now requires explicit
150
+ # dtype. Defer to its built-in default (fp32) when the CLI user didn't
151
+ # specify --dtype.
152
+ kwargs: dict = {"device": device}
153
+ if dtype is not None:
154
+ kwargs["dtype"] = dtype
155
+ if device_map is not None:
156
+ kwargs["device_map"] = device_map
157
+
66
158
  with console.status(f" Loading [bold]{model_name}[/bold]..."):
67
- m = load(model_name, device=device, dtype=dtype, device_map=device_map)
159
+ m = load(model_name, **kwargs)
68
160
  console.print(f" [bold green]Loaded[/bold green] [{ACCENT}]{model_name}[/{ACCENT}] on [bold]{m._device}[/bold]")
69
161
  return m
70
162
 
@@ -417,8 +509,11 @@ def main(
417
509
  ),
418
510
  ) -> None:
419
511
  """Mech interp for any HuggingFace model."""
420
- global _output_format
512
+ global _output_format, console
421
513
  _output_format = fmt
514
+ # F-023: re-bind module-level console so it routes to stderr in JSON mode.
515
+ console = _make_console()
516
+ _silence_third_party_loaders()
422
517
  if ctx.invoked_subcommand is not None:
423
518
  return
424
519
  if extensive:
@@ -526,6 +621,38 @@ def inspect(
526
621
  ) -> None:
527
622
  """Print the model's module tree with types, param counts, and detected roles."""
528
623
  m = _load_model(model_name, device=device, dtype=dtype, device_map=device_map)
624
+ if _output_format == "json":
625
+ # F-023: inspect previously ignored --format json. Now emits a
626
+ # structured JSON description of the architecture.
627
+ arch = m.arch_info
628
+ result = {
629
+ "model": model_name,
630
+ "family": arch.family.value if hasattr(arch.family, "value") else str(arch.family),
631
+ "arch_family": arch.arch_family,
632
+ "device": m.device,
633
+ "dtype": str(m.dtype),
634
+ "num_layers": arch.num_layers,
635
+ "hidden_size": arch.hidden_size,
636
+ "num_attention_heads": arch.num_attention_heads,
637
+ "vocab_size": arch.vocab_size,
638
+ "is_encoder_decoder": arch.is_encoder_decoder,
639
+ "spatial": arch.spatial,
640
+ "head_path": arch.head_path,
641
+ "embed_path": arch.embed_path,
642
+ "pre_head_path": arch.pre_head_path,
643
+ "project_out_path": arch.project_out_path,
644
+ "blocks": [
645
+ {"path": b.path, "stage": b.stage,
646
+ "has_attention": b.has_attention, "has_residual": b.has_residual}
647
+ for b in arch.blocks
648
+ ],
649
+ "modules": [
650
+ {"name": m.name, "type": m.type_name, "param_count": m.param_count, "role": m.role}
651
+ for m in arch.modules
652
+ ],
653
+ }
654
+ _json_dump(result)
655
+ return
529
656
  with console.status(" Inspecting model..."):
530
657
  m.inspect()
531
658
 
@@ -1063,5 +1190,38 @@ def chat(
1063
1190
  _json_dump({k: v for k, v in result.items() if k not in {"input_ids", "output_ids"}})
1064
1191
 
1065
1192
 
1193
+ def run() -> None:
1194
+ """CLI entry point that renders interpkit's intentional errors cleanly.
1195
+
1196
+ The ``InterpkitError`` family (e.g. ``OperationNotSupportedForArchitecture``,
1197
+ ``WrongInputType``, ``LensPipelineMismatch``) is the project's fail-loud
1198
+ contract — these are clear, actionable, user-facing messages, not crashes.
1199
+ Presenting them as a Python traceback undermines that, so we catch them at
1200
+ the boundary and print a single clean line (JSON object in ``--format json``)
1201
+ + exit non-zero. Unexpected exceptions still propagate as a normal traceback.
1202
+ """
1203
+ from interpkit.core.exceptions import InterpkitError
1204
+
1205
+ try:
1206
+ app()
1207
+ except (InterpkitError, ValueError, KeyError, IndexError) as exc:
1208
+ # interpkit's user-facing validation failures: unsupported op / wrong
1209
+ # input type (InterpkitError family), empty input (ValueError), unknown
1210
+ # module path (KeyError with a "did you mean" hint), out-of-range
1211
+ # position (ValueError / IndexError). These are clear, actionable
1212
+ # messages — render one line, not a traceback. Genuine internal bugs
1213
+ # raise other types (RuntimeError, TypeError, …) and still surface a
1214
+ # full traceback. ``KeyError.__str__`` wraps the message in quotes, so
1215
+ # pull ``args[0]`` for it.
1216
+ msg = exc.args[0] if (isinstance(exc, KeyError) and exc.args) else str(exc)
1217
+ if _output_format == "json":
1218
+ import json as _json
1219
+
1220
+ print(_json.dumps({"error": type(exc).__name__, "message": str(msg)}))
1221
+ else:
1222
+ Console(file=_sys.stderr).print(f"[bold red]Error:[/bold red] {msg}")
1223
+ raise SystemExit(1) from None
1224
+
1225
+
1066
1226
  if __name__ == "__main__":
1067
- app()
1227
+ run()
@@ -0,0 +1,102 @@
1
+ """Architecture resolution: one cohesive package.
2
+
3
+ Consolidates what used to be three entangled modules — ``discovery``,
4
+ the ``resolve`` package, and ``residual`` — into a single
5
+ ``interpkit.core.arch`` package with one :class:`ArchInfo` contract.
6
+
7
+ Submodule layout:
8
+
9
+ - ``names`` — module-name vocabulary + regexes.
10
+ - ``types`` — ``ArchInfo``, ``ArchFamily``, ``BlockSpec``, ``LayerInfo``, ``ModuleInfo``.
11
+ - ``tree`` — static module-tree primitives + weight extraction.
12
+ - ``probe`` — runtime forward-hook probes.
13
+ - ``family`` — family classification, topology, config parsing.
14
+ - ``blocks`` — block / decoder-block discovery.
15
+ - ``layers`` — per-layer attn/mlp/qkv resolution + role assignment.
16
+ - ``heads`` — head / unembedding / project-out / MLM / pre-head discovery.
17
+ - ``resolve`` — ``resolve_arch`` orchestrator + ``discover`` + overrides.
18
+ - ``residual`` — residual-stream decomposition schemas.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from interpkit.core.arch.names import (
24
+ ALL_QKV_NAMES,
25
+ ATTN_NAMES,
26
+ ATTN_RE,
27
+ FUSED_QKV_NAMES,
28
+ K_PROJ_NAMES,
29
+ MLP_NAMES,
30
+ MLP_RE,
31
+ O_PROJ_NAMES,
32
+ Q_PROJ_NAMES,
33
+ V_PROJ_NAMES,
34
+ names_to_regex,
35
+ )
36
+ from interpkit.core.arch.residual import (
37
+ Component,
38
+ PostLNResidual,
39
+ PreLNResidual,
40
+ ResidualSchema,
41
+ Seq2seqResidual,
42
+ SharedLayerResidual,
43
+ residual_schema_for,
44
+ )
45
+ from interpkit.core.arch.resolve import (
46
+ ARCH_OVERRIDES,
47
+ apply_overrides,
48
+ discover,
49
+ resolve_arch,
50
+ )
51
+ from interpkit.core.arch.tree import (
52
+ canonical_linear_weight,
53
+ extract_proj_weight,
54
+ get_weight,
55
+ module_at_path,
56
+ )
57
+ from interpkit.core.arch.types import (
58
+ ArchFamily,
59
+ ArchInfo,
60
+ BlockSpec,
61
+ LayerInfo,
62
+ ModuleInfo,
63
+ )
64
+
65
+ __all__ = [
66
+ # Types
67
+ "ArchInfo",
68
+ "ArchFamily",
69
+ "BlockSpec",
70
+ "LayerInfo",
71
+ "ModuleInfo",
72
+ # Resolution
73
+ "resolve_arch",
74
+ "discover",
75
+ "apply_overrides",
76
+ "ARCH_OVERRIDES",
77
+ # Tree / weight helpers
78
+ "module_at_path",
79
+ "get_weight",
80
+ "extract_proj_weight",
81
+ "canonical_linear_weight",
82
+ # Module-name vocabulary
83
+ "ATTN_NAMES",
84
+ "MLP_NAMES",
85
+ "FUSED_QKV_NAMES",
86
+ "Q_PROJ_NAMES",
87
+ "K_PROJ_NAMES",
88
+ "V_PROJ_NAMES",
89
+ "ALL_QKV_NAMES",
90
+ "O_PROJ_NAMES",
91
+ "ATTN_RE",
92
+ "MLP_RE",
93
+ "names_to_regex",
94
+ # Residual schemas
95
+ "Component",
96
+ "ResidualSchema",
97
+ "PreLNResidual",
98
+ "PostLNResidual",
99
+ "SharedLayerResidual",
100
+ "Seq2seqResidual",
101
+ "residual_schema_for",
102
+ ]