llm-token-heatmap 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_token_heatmap-0.1.0/LICENSE +21 -0
- llm_token_heatmap-0.1.0/PKG-INFO +155 -0
- llm_token_heatmap-0.1.0/README.md +109 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/__init__.py +108 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/activation_probe.py +613 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/activation_serializer.py +124 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/adaptive_probe.py +132 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/attention_probe.py +434 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/attention_serializer.py +303 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/attention_stats.py +308 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/cli.py +1495 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/commands/__init__.py +12 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/commands/_util.py +33 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/commands/hpc.py +564 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/commands/web.py +70 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/diff.py +298 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/direct_logit_attribution.py +272 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/export.py +132 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/generation.py +207 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/intervention.py +246 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/logit_lens.py +335 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/manifold.py +285 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/neuron_attribution.py +136 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/plotting.py +859 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/probe.py +261 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/runner.py +365 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/sampling.py +104 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap/trace_payload.py +372 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/PKG-INFO +155 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/SOURCES.txt +55 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/dependency_links.txt +1 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/entry_points.txt +2 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/requires.txt +22 -0
- llm_token_heatmap-0.1.0/llm_token_heatmap.egg-info/top_level.txt +1 -0
- llm_token_heatmap-0.1.0/pyproject.toml +109 -0
- llm_token_heatmap-0.1.0/setup.cfg +4 -0
- llm_token_heatmap-0.1.0/tests/test_activation_e2e.py +540 -0
- llm_token_heatmap-0.1.0/tests/test_activation_probe.py +284 -0
- llm_token_heatmap-0.1.0/tests/test_activation_schema.py +122 -0
- llm_token_heatmap-0.1.0/tests/test_activation_serializer.py +288 -0
- llm_token_heatmap-0.1.0/tests/test_adaptive_probe.py +151 -0
- llm_token_heatmap-0.1.0/tests/test_attention_probe.py +236 -0
- llm_token_heatmap-0.1.0/tests/test_attention_serializer.py +306 -0
- llm_token_heatmap-0.1.0/tests/test_attention_stats.py +285 -0
- llm_token_heatmap-0.1.0/tests/test_cli.py +765 -0
- llm_token_heatmap-0.1.0/tests/test_diff.py +257 -0
- llm_token_heatmap-0.1.0/tests/test_direct_logit_attribution.py +202 -0
- llm_token_heatmap-0.1.0/tests/test_export.py +221 -0
- llm_token_heatmap-0.1.0/tests/test_generation.py +429 -0
- llm_token_heatmap-0.1.0/tests/test_intervention.py +146 -0
- llm_token_heatmap-0.1.0/tests/test_logit_lens.py +348 -0
- llm_token_heatmap-0.1.0/tests/test_manifold.py +216 -0
- llm_token_heatmap-0.1.0/tests/test_neuron_attribution.py +93 -0
- llm_token_heatmap-0.1.0/tests/test_plotting.py +274 -0
- llm_token_heatmap-0.1.0/tests/test_probe.py +119 -0
- llm_token_heatmap-0.1.0/tests/test_sampling.py +120 -0
- llm_token_heatmap-0.1.0/tests/test_trace_payload.py +175 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jiucheng Zang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-token-heatmap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Analyze and visualize how an LLM picks each next token: probabilities, attention, logit-lens, activations, manifold geometry — CLI + web app.
|
|
5
|
+
Author-email: Jiucheng Zang <git.jiucheng@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/zangjiucheng/Token-Heatmap
|
|
8
|
+
Project-URL: Repository, https://github.com/zangjiucheng/Token-Heatmap
|
|
9
|
+
Project-URL: Issues, https://github.com/zangjiucheng/Token-Heatmap/issues
|
|
10
|
+
Keywords: llm,interpretability,mechanistic-interpretability,transformers,pytorch,visualization,attention,logit-lens,token-probability
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: torch>=2.1
|
|
27
|
+
Requires-Dist: torchvision>=0.16
|
|
28
|
+
Requires-Dist: transformers>=5.0
|
|
29
|
+
Requires-Dist: accelerate>=0.30
|
|
30
|
+
Requires-Dist: pandas>=2.0
|
|
31
|
+
Requires-Dist: numpy>=1.24
|
|
32
|
+
Requires-Dist: matplotlib>=3.8
|
|
33
|
+
Requires-Dist: pyyaml>=5.1
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
39
|
+
Requires-Dist: jsonschema>=4.18; extra == "dev"
|
|
40
|
+
Provides-Extra: models
|
|
41
|
+
Requires-Dist: tiktoken>=0.6; extra == "models"
|
|
42
|
+
Requires-Dist: einops>=0.7; extra == "models"
|
|
43
|
+
Provides-Extra: docs
|
|
44
|
+
Requires-Dist: mkdocs>=1.6; extra == "docs"
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
# LLM Token Heatmap
|
|
48
|
+
|
|
49
|
+
A PyTorch toolkit for analyzing and visualizing how a causal language model
|
|
50
|
+
picks each next token during generation. Captures per-step probability
|
|
51
|
+
distributions, attention, logit-lens, and activations; **decomposes each token's
|
|
52
|
+
logit into per-layer — and per attention head — contributions (direct logit
|
|
53
|
+
attribution)**; analyzes the activation geometry (PCA / intrinsic dimension /
|
|
54
|
+
a TWERA-style neuron ranking); exports to CSV / JSON; renders static heatmaps;
|
|
55
|
+
and ships with an interactive React web app — a **lens workspace** that is a
|
|
56
|
+
static, file-based **trace viewer** — for drill-down exploration. Works with any
|
|
57
|
+
HuggingFace causal LM (Qwen, Llama, Mistral, Gemma, Phi, …).
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
prompt + previous tokens → logits → probabilities → next token
|
|
61
|
+
│
|
|
62
|
+
└─▶ recorded per step:
|
|
63
|
+
top tokens, prob, logprob, rank,
|
|
64
|
+
entropy, k_used, selected token
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## What you get
|
|
68
|
+
|
|
69
|
+
- **`llm_token_heatmap`** — Python library: `AdaptiveTokenProbe`, a manual generation loop, sampling helpers, CSV/JSON/DataFrame export, attention + logit-lens + activation probes, a self-contained model-architecture summary, TWERA-style neuron attribution, **direct logit attribution (per-layer + per-head)**, component / head ablation primitives, post-hoc manifold geometry, matplotlib heatmaps, and activation diff. Loads on GPU in **bf16** (with optional **`--load-in-4bit`** NF4 for big models).
|
|
70
|
+
- **`token-heatmap`** — CLI that takes a model + prompt (or a YAML config) and writes a full trace bundle to disk: `trace` (generate + capture) and `manifold` (analyze the activation clouds). Includes `--serve` to view the result in the browser.
|
|
71
|
+
- **`web/frontend`** — React + Vite SPA: a static, file-based **trace viewer** — a **lens workspace** with a grouped lens rail (**Generation / Internals / Geometry**), a persistent generation spine (token strip + entropy / selected-probability timelines), and a resizable inspector. Lenses: **Token Heatmap**, **Model** (architecture overview), **Output** (complete generated-text render), **Attention**, **Logit Lens**, **Activations** (per-step ↔ whole-trace **TWERA** ranking toggle), **Attribution** (**direct logit attribution** — each token's logit split by layer, expandable to **per head**), **Graph** (the same attribution as a pruned node-link **attribution graph**), **Manifold** (3-D rotatable cloud + probe/helix readouts); plus step detail, CSV/PNG export, and a diff view. It loads traces from a dropped file, a `?trace=<url>` URL, or the bundled sample — no backend server. (Interactive ablation will return later via the CLI precomputing ablations into the trace.)
|
|
72
|
+
- **`token-heatmap web build`** — builds the static viewer for deployment on servers without Node.js.
|
|
73
|
+
- **Desktop app** — the viewer packaged as a native [Tauri](https://tauri.app/) app (no browser/Node.js needed). Grab an installer from the [Releases](https://github.com/zangjiucheng/Token-Heatmap/releases), or build it with `cd web/frontend && npm run app:build`. See [`docs/web-app.md`](docs/web-app.md#desktop-app-tauri).
|
|
74
|
+
- **`token-heatmap hpc run <config>`** — one command from your laptop: do the GPU compute on an HPC (Slurm), then rsync the whole run back so viewing needs no GPU. Companions: `token-heatmap hpc setup` (build the GPU venv) and `token-heatmap hpc serve` (SSH tunnel + remote file server).
|
|
75
|
+
|
|
76
|
+
## Documentation
|
|
77
|
+
|
|
78
|
+
| Topic | Page |
|
|
79
|
+
| ---------------------------------- | ---------------------------------------------------- |
|
|
80
|
+
| Setting up the environment | [`docs/installation.md`](docs/installation.md) |
|
|
81
|
+
| The `token-heatmap` CLI | [`docs/cli.md`](docs/cli.md) |
|
|
82
|
+
| Using the Python library | [`docs/python-api.md`](docs/python-api.md) |
|
|
83
|
+
| Running the web app | [`docs/web-app.md`](docs/web-app.md) |
|
|
84
|
+
| Trace JSON schema | [`docs/schema.md`](docs/schema.md) |
|
|
85
|
+
| Interpreting the metrics and plots | [`docs/interpreting.md`](docs/interpreting.md) |
|
|
86
|
+
| Manifold / counting reproduction (+ GPU & HPC guide) | [`docs/manifold-reproduction.md`](docs/manifold-reproduction.md) |
|
|
87
|
+
| Common issues | [`docs/troubleshooting.md`](docs/troubleshooting.md) |
|
|
88
|
+
|
|
89
|
+
The docs index lives at [`docs/README.md`](docs/README.md).
|
|
90
|
+
|
|
91
|
+
## Quick start
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
git clone <repo-url> llm-token-heatmap
|
|
95
|
+
cd llm-token-heatmap
|
|
96
|
+
|
|
97
|
+
# Option A — pip + venv
|
|
98
|
+
./scripts/setup.sh
|
|
99
|
+
source .venv/bin/activate
|
|
100
|
+
|
|
101
|
+
# Option B — conda (no Node.js required on this machine)
|
|
102
|
+
conda env create -f environment.yml
|
|
103
|
+
conda activate token-heatmap
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# CLI: generate a trace bundle
|
|
108
|
+
token-heatmap trace \
|
|
109
|
+
--model Qwen/Qwen2.5-0.5B-Instruct \
|
|
110
|
+
--prompt "Explain diffusion models." \
|
|
111
|
+
--max-new-tokens 80 \
|
|
112
|
+
--out outputs/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Or use a YAML config file (see `configs/example.yaml`):
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
token-heatmap trace --config configs/example.yaml
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Any HuggingFace causal LM works — swap `--model` for a Llama / Mistral / Gemma /
|
|
122
|
+
Phi id (gated repos need an `HF_TOKEN`). For large models on a single GPU add
|
|
123
|
+
`--load-in-4bit`. To capture everything the web app can show, add
|
|
124
|
+
`--capture-attention --capture-logit-lens --capture-activations
|
|
125
|
+
--capture-full-activations`, then `token-heatmap manifold --trace <trace>.json`.
|
|
126
|
+
|
|
127
|
+
Full CLI flags: [`docs/cli.md`](docs/cli.md). Python equivalent: [`docs/python-api.md`](docs/python-api.md).
|
|
128
|
+
|
|
129
|
+
## Viewing the trace
|
|
130
|
+
|
|
131
|
+
The viewer is a static, file-based React app — no backend. The quickest path lets
|
|
132
|
+
the CLI generate a trace and open the viewer in one command:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
token-heatmap trace --config configs/example.yaml --serve --frontend
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Compute on a GPU cluster and view locally with no GPU or tunnel:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
token-heatmap hpc setup # one-time: build the GPU venv
|
|
142
|
+
token-heatmap hpc run configs/example.yaml # runs on Slurm, rsyncs results back
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Prefer a native window? Grab a desktop installer from the
|
|
146
|
+
[Releases](https://github.com/zangjiucheng/Token-Heatmap/releases) or build it with
|
|
147
|
+
`cd web/frontend && npm run app:build`.
|
|
148
|
+
|
|
149
|
+
Full guide — manual file drop, `?trace=<url>`, HPC port-forward, prebuilt `dist/`,
|
|
150
|
+
desktop app: [`docs/web-app.md`](docs/web-app.md). Slurm/qos notes:
|
|
151
|
+
[`docs/manifold-reproduction.md`](docs/manifold-reproduction.md).
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
See [MIT LICENSE](LICENSE)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# LLM Token Heatmap
|
|
2
|
+
|
|
3
|
+
A PyTorch toolkit for analyzing and visualizing how a causal language model
|
|
4
|
+
picks each next token during generation. Captures per-step probability
|
|
5
|
+
distributions, attention, logit-lens, and activations; **decomposes each token's
|
|
6
|
+
logit into per-layer — and per attention head — contributions (direct logit
|
|
7
|
+
attribution)**; analyzes the activation geometry (PCA / intrinsic dimension /
|
|
8
|
+
a TWERA-style neuron ranking); exports to CSV / JSON; renders static heatmaps;
|
|
9
|
+
and ships with an interactive React web app — a **lens workspace** that is a
|
|
10
|
+
static, file-based **trace viewer** — for drill-down exploration. Works with any
|
|
11
|
+
HuggingFace causal LM (Qwen, Llama, Mistral, Gemma, Phi, …).
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
prompt + previous tokens → logits → probabilities → next token
|
|
15
|
+
│
|
|
16
|
+
└─▶ recorded per step:
|
|
17
|
+
top tokens, prob, logprob, rank,
|
|
18
|
+
entropy, k_used, selected token
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## What you get
|
|
22
|
+
|
|
23
|
+
- **`llm_token_heatmap`** — Python library: `AdaptiveTokenProbe`, a manual generation loop, sampling helpers, CSV/JSON/DataFrame export, attention + logit-lens + activation probes, a self-contained model-architecture summary, TWERA-style neuron attribution, **direct logit attribution (per-layer + per-head)**, component / head ablation primitives, post-hoc manifold geometry, matplotlib heatmaps, and activation diff. Loads on GPU in **bf16** (with optional **`--load-in-4bit`** NF4 for big models).
|
|
24
|
+
- **`token-heatmap`** — CLI that takes a model + prompt (or a YAML config) and writes a full trace bundle to disk: `trace` (generate + capture) and `manifold` (analyze the activation clouds). Includes `--serve` to view the result in the browser.
|
|
25
|
+
- **`web/frontend`** — React + Vite SPA: a static, file-based **trace viewer** — a **lens workspace** with a grouped lens rail (**Generation / Internals / Geometry**), a persistent generation spine (token strip + entropy / selected-probability timelines), and a resizable inspector. Lenses: **Token Heatmap**, **Model** (architecture overview), **Output** (complete generated-text render), **Attention**, **Logit Lens**, **Activations** (per-step ↔ whole-trace **TWERA** ranking toggle), **Attribution** (**direct logit attribution** — each token's logit split by layer, expandable to **per head**), **Graph** (the same attribution as a pruned node-link **attribution graph**), **Manifold** (3-D rotatable cloud + probe/helix readouts); plus step detail, CSV/PNG export, and a diff view. It loads traces from a dropped file, a `?trace=<url>` URL, or the bundled sample — no backend server. (Interactive ablation will return later via the CLI precomputing ablations into the trace.)
|
|
26
|
+
- **`token-heatmap web build`** — builds the static viewer for deployment on servers without Node.js.
|
|
27
|
+
- **Desktop app** — the viewer packaged as a native [Tauri](https://tauri.app/) app (no browser/Node.js needed). Grab an installer from the [Releases](https://github.com/zangjiucheng/Token-Heatmap/releases), or build it with `cd web/frontend && npm run app:build`. See [`docs/web-app.md`](docs/web-app.md#desktop-app-tauri).
|
|
28
|
+
- **`token-heatmap hpc run <config>`** — one command from your laptop: do the GPU compute on an HPC (Slurm), then rsync the whole run back so viewing needs no GPU. Companions: `token-heatmap hpc setup` (build the GPU venv) and `token-heatmap hpc serve` (SSH tunnel + remote file server).
|
|
29
|
+
|
|
30
|
+
## Documentation
|
|
31
|
+
|
|
32
|
+
| Topic | Page |
|
|
33
|
+
| ---------------------------------- | ---------------------------------------------------- |
|
|
34
|
+
| Setting up the environment | [`docs/installation.md`](docs/installation.md) |
|
|
35
|
+
| The `token-heatmap` CLI | [`docs/cli.md`](docs/cli.md) |
|
|
36
|
+
| Using the Python library | [`docs/python-api.md`](docs/python-api.md) |
|
|
37
|
+
| Running the web app | [`docs/web-app.md`](docs/web-app.md) |
|
|
38
|
+
| Trace JSON schema | [`docs/schema.md`](docs/schema.md) |
|
|
39
|
+
| Interpreting the metrics and plots | [`docs/interpreting.md`](docs/interpreting.md) |
|
|
40
|
+
| Manifold / counting reproduction (+ GPU & HPC guide) | [`docs/manifold-reproduction.md`](docs/manifold-reproduction.md) |
|
|
41
|
+
| Common issues | [`docs/troubleshooting.md`](docs/troubleshooting.md) |
|
|
42
|
+
|
|
43
|
+
The docs index lives at [`docs/README.md`](docs/README.md).
|
|
44
|
+
|
|
45
|
+
## Quick start
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
git clone <repo-url> llm-token-heatmap
|
|
49
|
+
cd llm-token-heatmap
|
|
50
|
+
|
|
51
|
+
# Option A — pip + venv
|
|
52
|
+
./scripts/setup.sh
|
|
53
|
+
source .venv/bin/activate
|
|
54
|
+
|
|
55
|
+
# Option B — conda (no Node.js required on this machine)
|
|
56
|
+
conda env create -f environment.yml
|
|
57
|
+
conda activate token-heatmap
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# CLI: generate a trace bundle
|
|
62
|
+
token-heatmap trace \
|
|
63
|
+
--model Qwen/Qwen2.5-0.5B-Instruct \
|
|
64
|
+
--prompt "Explain diffusion models." \
|
|
65
|
+
--max-new-tokens 80 \
|
|
66
|
+
--out outputs/
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Or use a YAML config file (see `configs/example.yaml`):
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
token-heatmap trace --config configs/example.yaml
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Any HuggingFace causal LM works — swap `--model` for a Llama / Mistral / Gemma /
|
|
76
|
+
Phi id (gated repos need an `HF_TOKEN`). For large models on a single GPU add
|
|
77
|
+
`--load-in-4bit`. To capture everything the web app can show, add
|
|
78
|
+
`--capture-attention --capture-logit-lens --capture-activations
|
|
79
|
+
--capture-full-activations`, then `token-heatmap manifold --trace <trace>.json`.
|
|
80
|
+
|
|
81
|
+
Full CLI flags: [`docs/cli.md`](docs/cli.md). Python equivalent: [`docs/python-api.md`](docs/python-api.md).
|
|
82
|
+
|
|
83
|
+
## Viewing the trace
|
|
84
|
+
|
|
85
|
+
The viewer is a static, file-based React app — no backend. The quickest path lets
|
|
86
|
+
the CLI generate a trace and open the viewer in one command:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
token-heatmap trace --config configs/example.yaml --serve --frontend
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Compute on a GPU cluster and view locally with no GPU or tunnel:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
token-heatmap hpc setup # one-time: build the GPU venv
|
|
96
|
+
token-heatmap hpc run configs/example.yaml # runs on Slurm, rsyncs results back
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Prefer a native window? Grab a desktop installer from the
|
|
100
|
+
[Releases](https://github.com/zangjiucheng/Token-Heatmap/releases) or build it with
|
|
101
|
+
`cd web/frontend && npm run app:build`.
|
|
102
|
+
|
|
103
|
+
Full guide — manual file drop, `?trace=<url>`, HPC port-forward, prebuilt `dist/`,
|
|
104
|
+
desktop app: [`docs/web-app.md`](docs/web-app.md). Slurm/qos notes:
|
|
105
|
+
[`docs/manifold-reproduction.md`](docs/manifold-reproduction.md).
|
|
106
|
+
|
|
107
|
+
## License
|
|
108
|
+
|
|
109
|
+
See [MIT LICENSE](LICENSE)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""LLM Token Heatmap.
|
|
2
|
+
|
|
3
|
+
A PyTorch component for analyzing LLM inference-time token probability
|
|
4
|
+
distributions, with adaptive top-k tracing, CSV export, and heatmap
|
|
5
|
+
visualization.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from llm_token_heatmap.activation_probe import (
|
|
9
|
+
ActivationFullStats,
|
|
10
|
+
ActivationLayerEntry,
|
|
11
|
+
ActivationProbe,
|
|
12
|
+
ActivationProbeConfig,
|
|
13
|
+
ActivationProbeError,
|
|
14
|
+
TopNeuron,
|
|
15
|
+
tokenizer_fingerprint,
|
|
16
|
+
)
|
|
17
|
+
from llm_token_heatmap.adaptive_probe import AdaptiveProbeConfig, AdaptiveTokenProbe
|
|
18
|
+
from llm_token_heatmap.attention_probe import (
|
|
19
|
+
AttentionProbe,
|
|
20
|
+
AttentionProbeConfig,
|
|
21
|
+
AttentionProbeError,
|
|
22
|
+
AttentionStats,
|
|
23
|
+
)
|
|
24
|
+
from llm_token_heatmap.attention_serializer import (
|
|
25
|
+
attention_stats_to_payload,
|
|
26
|
+
read_sidecar,
|
|
27
|
+
write_sidecar,
|
|
28
|
+
)
|
|
29
|
+
from llm_token_heatmap.attention_stats import (
|
|
30
|
+
AttentionDerivedStats,
|
|
31
|
+
AttentionHeadStats,
|
|
32
|
+
AttentionLayerAggregates,
|
|
33
|
+
AttentionLayerDerivedStats,
|
|
34
|
+
compute_attention_stats,
|
|
35
|
+
)
|
|
36
|
+
from llm_token_heatmap.diff import DIFF_SCHEMA_VERSION, compare_activations
|
|
37
|
+
from llm_token_heatmap.export import trace_to_dataframe
|
|
38
|
+
from llm_token_heatmap.generation import generate_with_adaptive_probe
|
|
39
|
+
from llm_token_heatmap.logit_lens import (
|
|
40
|
+
LogitLens,
|
|
41
|
+
LogitLensConfig,
|
|
42
|
+
LogitLensError,
|
|
43
|
+
LogitLensLayerStats,
|
|
44
|
+
LogitLensStats,
|
|
45
|
+
)
|
|
46
|
+
from llm_token_heatmap.plotting import (
|
|
47
|
+
plot_activation_delta,
|
|
48
|
+
plot_adaptive_heatmap,
|
|
49
|
+
plot_attention_layer_head_grid,
|
|
50
|
+
plot_attention_pattern,
|
|
51
|
+
plot_entropy,
|
|
52
|
+
plot_logit_lens,
|
|
53
|
+
plot_logit_lens_selected_rank,
|
|
54
|
+
plot_raw_vs_processed_heatmap,
|
|
55
|
+
plot_raw_vs_processed_selected_prob,
|
|
56
|
+
plot_selected_probability,
|
|
57
|
+
)
|
|
58
|
+
from llm_token_heatmap.sampling import apply_sampling_filters, sample_next_token
|
|
59
|
+
|
|
60
|
+
SCHEMA_VERSION = "2.0.0"
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
"ActivationFullStats",
|
|
64
|
+
"ActivationLayerEntry",
|
|
65
|
+
"ActivationProbe",
|
|
66
|
+
"ActivationProbeConfig",
|
|
67
|
+
"ActivationProbeError",
|
|
68
|
+
"AdaptiveProbeConfig",
|
|
69
|
+
"AdaptiveTokenProbe",
|
|
70
|
+
"AttentionDerivedStats",
|
|
71
|
+
"AttentionHeadStats",
|
|
72
|
+
"AttentionLayerAggregates",
|
|
73
|
+
"AttentionLayerDerivedStats",
|
|
74
|
+
"AttentionProbe",
|
|
75
|
+
"AttentionProbeConfig",
|
|
76
|
+
"AttentionProbeError",
|
|
77
|
+
"AttentionStats",
|
|
78
|
+
"DIFF_SCHEMA_VERSION",
|
|
79
|
+
"LogitLens",
|
|
80
|
+
"LogitLensConfig",
|
|
81
|
+
"LogitLensError",
|
|
82
|
+
"LogitLensLayerStats",
|
|
83
|
+
"LogitLensStats",
|
|
84
|
+
"SCHEMA_VERSION",
|
|
85
|
+
"TopNeuron",
|
|
86
|
+
"apply_sampling_filters",
|
|
87
|
+
"attention_stats_to_payload",
|
|
88
|
+
"compare_activations",
|
|
89
|
+
"compute_attention_stats",
|
|
90
|
+
"generate_with_adaptive_probe",
|
|
91
|
+
"plot_activation_delta",
|
|
92
|
+
"plot_adaptive_heatmap",
|
|
93
|
+
"plot_attention_layer_head_grid",
|
|
94
|
+
"plot_attention_pattern",
|
|
95
|
+
"plot_entropy",
|
|
96
|
+
"plot_logit_lens",
|
|
97
|
+
"plot_logit_lens_selected_rank",
|
|
98
|
+
"plot_raw_vs_processed_heatmap",
|
|
99
|
+
"plot_raw_vs_processed_selected_prob",
|
|
100
|
+
"plot_selected_probability",
|
|
101
|
+
"read_sidecar",
|
|
102
|
+
"sample_next_token",
|
|
103
|
+
"tokenizer_fingerprint",
|
|
104
|
+
"trace_to_dataframe",
|
|
105
|
+
"write_sidecar",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
__version__ = "0.1.0"
|