opd-viz 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ .venv/
6
+
7
+ # Testing
8
+ .pytest_cache/
9
+ .coverage
10
+ htmlcov/
11
+
12
+ # Linting
13
+ .ruff_cache/
14
+ .mypy_cache/
15
+
16
+ # Build
17
+ dist/
18
+ build/
19
+ *.egg-info/
20
+
21
+ # IDE
22
+ .idea/
23
+ .vscode/
opd_viz-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Construct Labs GmbH
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
opd_viz-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: opd-viz
3
+ Version: 0.1.0
4
+ Summary: Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model
5
+ Project-URL: Homepage, https://constructlabs.com
6
+ Author-email: Construct Labs GmbH <hello@constructlabs.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: ai,distillation,kl-divergence,machine-learning,reinforcement-learning,sdpo,visualization,vllm
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: jinja2>=3.1
23
+ Requires-Dist: mcp>=1.2
24
+ Requires-Dist: numpy>=1.24
25
+ Requires-Dist: transformers>=4.44
26
+ Provides-Extra: dev
27
+ Requires-Dist: build>=1.0.0; extra == 'dev'
28
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
29
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
30
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
31
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # opd-viz
35
+
36
+ Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model.
37
+
38
+ `opd-viz` visualizes the gold-hint SDPO per-token distillation loss: for each rollout
39
+ token it compares the student-policy prediction against the gold-procedure-reprompted
40
+ teacher prediction as a top-k KL, and colors that divergence onto the realized tokens in a
41
+ browser heatmap. It is model-agnostic (you pick the endpoint, model, and tokenizer at
42
+ runtime from the browser) and brings-your-own-MCP (register any MCP server for the agentic
43
+ generate loop).
44
+
45
+ ## Install / run
46
+
47
+ Zero-checkout, via uv:
48
+
49
+ ```bash
50
+ uvx --from opd-viz opd-viz --port 8088
51
+ ```
52
+
53
+ or into an environment:
54
+
55
+ ```bash
56
+ pip install opd-viz
57
+ opd-viz --port 8088
58
+ ```
59
+
60
+ Then open `http://127.0.0.1:8088`. The server binds loopback and is single-user; there is
61
+ no auth on the opd-viz server itself.
62
+
63
+ Flags: `--host` (default `127.0.0.1`), `--port` (default `8088`). Everything else is set in
64
+ the browser.
65
+
66
+ ## Requirements
67
+
68
+ - A served, OpenAI-compatible **vLLM** endpoint. It **must** be started with
69
+ `--max-logprobs 100` so the teacher-forcing pass can return top-100 `prompt_logprobs`;
70
+ `opd-viz` preflights this on Connect and reports a clear error if it is missing.
71
+ - The tokenizer matching your model is pulled from the Hugging Face Hub by the model id at
72
+ Connect time. For gated repos, set `HF_TOKEN` in the environment before launching:
73
+
74
+ ```bash
75
+ HF_TOKEN=<paste your hf_… token> uvx --from opd-viz opd-viz
76
+ ```
77
+
78
+ (Reference a secret manager rather than pasting tokens into shared shells.)
79
+
80
+ ## Use it (connect, generate, recompute)
81
+
82
+ 1. **Connect.** Fill in the vLLM endpoint (e.g. `http://host:port/v1`), optional API key,
83
+ the model id (e.g. `org/Model-Name`), an optional system prompt, and sampling params
84
+ (temperature plus any of top_p / top_k / min_p / presence_penalty / repetition_penalty;
85
+ blank fields are not sent). Click **Connect** — this loads the matching tokenizer, runs
86
+ the vLLM preflight, and runs a chat-template self-check, surfacing any warnings. Changing
87
+ the model and clicking Connect again re-pulls the matching tokenizer and re-preflights;
88
+ no restart needed.
89
+ 2. **Generate.** Type a question and click **Generate rollout**. Without tools this is a
90
+ single assistant turn (the student = its own tokens, so `gen == score` holds). The
91
+ generation logprobs are the student distribution.
92
+ 3. **Recompute.** Paste the gold hindsight (and optionally tweak the reprompt template),
93
+ then click **Recompute & visualize**. The exact generated token ids are teacher-forced
94
+ under the gold-hint prompt via vLLM `prompt_logprobs`, and the per-token KL (JSD / reverse
95
+ / forward) is colored onto the realized tokens. Hover any token for the student/teacher
96
+ top-k and the top-1 (dis)agreement.
97
+
98
+ A rollout is stamped with the config that produced it; if you reconnect with a different
99
+ model/config and then score a stale rollout, the server refuses (regenerate instead of
100
+ mis-coloring).
101
+
102
+ ## MCP tools (optional, agentic)
103
+
104
+ Register MCP servers in the Connect panel to make their tools available to an agentic
105
+ generate loop:
106
+
107
+ - **remote**: a URL (e.g. `https://host/mcp`) plus an optional auth header.
108
+ - **stdio**: a command + args (+ optional `KEY=val` env), e.g. `uvx some-mcp-server`.
109
+
110
+ After connecting, the **use tools** toggle enables once at least one MCP tool is discovered.
111
+ With it on, Generate runs the model through tool calls (executed live) until it answers
112
+ without a tool call; scoring colors each assistant turn and greys the tool results.
113
+
114
+ `opd-viz` contacts only your vLLM endpoint, the MCP servers you register, and the HF Hub
115
+ (for the tokenizer). Nothing else.
@@ -0,0 +1,82 @@
1
+ # opd-viz
2
+
3
+ Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model.
4
+
5
+ `opd-viz` visualizes the gold-hint SDPO per-token distillation loss: for each rollout
6
+ token it compares the student-policy prediction against the gold-procedure-reprompted
7
+ teacher prediction as a top-k KL, and colors that divergence onto the realized tokens in a
8
+ browser heatmap. It is model-agnostic (you pick the endpoint, model, and tokenizer at
9
+ runtime from the browser) and brings-your-own-MCP (register any MCP server for the agentic
10
+ generate loop).
11
+
12
+ ## Install / run
13
+
14
+ Zero-checkout, via uv:
15
+
16
+ ```bash
17
+ uvx --from opd-viz opd-viz --port 8088
18
+ ```
19
+
20
+ or into an environment:
21
+
22
+ ```bash
23
+ pip install opd-viz
24
+ opd-viz --port 8088
25
+ ```
26
+
27
+ Then open `http://127.0.0.1:8088`. The server binds loopback and is single-user; there is
28
+ no auth on the opd-viz server itself.
29
+
30
+ Flags: `--host` (default `127.0.0.1`), `--port` (default `8088`). Everything else is set in
31
+ the browser.
32
+
33
+ ## Requirements
34
+
35
+ - A served, OpenAI-compatible **vLLM** endpoint. It **must** be started with
36
+ `--max-logprobs 100` so the teacher-forcing pass can return top-100 `prompt_logprobs`;
37
+ `opd-viz` preflights this on Connect and reports a clear error if it is missing.
38
+ - The tokenizer matching your model is pulled from the Hugging Face Hub by the model id at
39
+ Connect time. For gated repos, set `HF_TOKEN` in the environment before launching:
40
+
41
+ ```bash
42
+ HF_TOKEN=<paste your hf_… token> uvx --from opd-viz opd-viz
43
+ ```
44
+
45
+ (Reference a secret manager rather than pasting tokens into shared shells.)
46
+
47
+ ## Use it (connect, generate, recompute)
48
+
49
+ 1. **Connect.** Fill in the vLLM endpoint (e.g. `http://host:port/v1`), optional API key,
50
+ the model id (e.g. `org/Model-Name`), an optional system prompt, and sampling params
51
+ (temperature plus any of top_p / top_k / min_p / presence_penalty / repetition_penalty;
52
+ blank fields are not sent). Click **Connect** — this loads the matching tokenizer, runs
53
+ the vLLM preflight, and runs a chat-template self-check, surfacing any warnings. Changing
54
+ the model and clicking Connect again re-pulls the matching tokenizer and re-preflights;
55
+ no restart needed.
56
+ 2. **Generate.** Type a question and click **Generate rollout**. Without tools this is a
57
+ single assistant turn (the student = its own tokens, so `gen == score` holds). The
58
+ generation logprobs are the student distribution.
59
+ 3. **Recompute.** Paste the gold hindsight (and optionally tweak the reprompt template),
60
+ then click **Recompute & visualize**. The exact generated token ids are teacher-forced
61
+ under the gold-hint prompt via vLLM `prompt_logprobs`, and the per-token KL (JSD / reverse
62
+ / forward) is colored onto the realized tokens. Hover any token for the student/teacher
63
+ top-k and the top-1 (dis)agreement.
64
+
65
+ A rollout is stamped with the config that produced it; if you reconnect with a different
66
+ model/config and then score a stale rollout, the server refuses (regenerate instead of
67
+ mis-coloring).
68
+
69
+ ## MCP tools (optional, agentic)
70
+
71
+ Register MCP servers in the Connect panel to make their tools available to an agentic
72
+ generate loop:
73
+
74
+ - **remote**: a URL (e.g. `https://host/mcp`) plus an optional auth header.
75
+ - **stdio**: a command + args (+ optional `KEY=val` env), e.g. `uvx some-mcp-server`.
76
+
77
+ After connecting, the **use tools** toggle enables once at least one MCP tool is discovered.
78
+ With it on, Generate runs the model through tool calls (executed live) until it answers
79
+ without a tool call; scoring colors each assistant turn and greys the tool results.
80
+
81
+ `opd-viz` contacts only your vLLM endpoint, the MCP servers you register, and the HF Hub
82
+ (for the tokenizer). Nothing else.
@@ -0,0 +1,104 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "opd-viz"
7
+ version = "0.1.0"
8
+ description = "Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Construct Labs GmbH", email = "hello@constructlabs.com" }
14
+ ]
15
+ keywords = [
16
+ "sdpo",
17
+ "distillation",
18
+ "kl-divergence",
19
+ "vllm",
20
+ "visualization",
21
+ "reinforcement-learning",
22
+ "ai",
23
+ "machine-learning",
24
+ ]
25
+ classifiers = [
26
+ "Development Status :: 4 - Beta",
27
+ "Intended Audience :: Developers",
28
+ "Intended Audience :: Science/Research",
29
+ "License :: OSI Approved :: MIT License",
30
+ "Operating System :: OS Independent",
31
+ "Programming Language :: Python :: 3",
32
+ "Programming Language :: Python :: 3.10",
33
+ "Programming Language :: Python :: 3.11",
34
+ "Programming Language :: Python :: 3.12",
35
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
36
+ "Typing :: Typed",
37
+ ]
38
+ dependencies = [
39
+ "transformers>=4.44",
40
+ "jinja2>=3.1", # apply_chat_template (render path) needs it; transformers leaves it optional
41
+ "numpy>=1.24", # Phase 2 (KL math); transformers already pulls it
42
+ "mcp>=1.2", # Phase 4 (generic MCP client)
43
+ ]
44
+
45
+ [project.scripts]
46
+ opd-viz = "opd_viz.server:main"
47
+
48
+ [project.optional-dependencies]
49
+ dev = [
50
+ "pytest>=7.0.0",
51
+ "pytest-asyncio>=0.21.0",
52
+ "ruff>=0.1.0",
53
+ "mypy>=1.0.0",
54
+ "build>=1.0.0",
55
+ ]
56
+
57
+ [project.urls]
58
+ Homepage = "https://constructlabs.com"
59
+
60
+ [tool.hatch.build.targets.sdist]
61
+ include = [
62
+ "/src",
63
+ "/README.md",
64
+ "/LICENSE",
65
+ ]
66
+
67
+ [tool.hatch.build.targets.wheel]
68
+ # hatchling ships every file under the package dir (incl. assets/viz_client.js and
69
+ # py.typed), so no explicit force-include is needed; a force-include here would
70
+ # double-add the asset and fail the build. The Phase 5 smoke test guards inclusion.
71
+ packages = ["src/opd_viz"]
72
+
73
+ [tool.ruff]
74
+ line-length = 88
75
+ target-version = "py310"
76
+
77
+ [tool.ruff.lint]
78
+ select = [
79
+ "E", # pycodestyle errors
80
+ "W", # pycodestyle warnings
81
+ "F", # Pyflakes
82
+ "I", # isort
83
+ "B", # flake8-bugbear
84
+ "C4", # flake8-comprehensions
85
+ "UP", # pyupgrade
86
+ ]
87
+ ignore = [
88
+ "E501", # line too long (handled by formatter)
89
+ "E401", # multiple imports on one line (kept from the original loose scripts)
90
+ "E701", # multiple statements on one line, colon (kept from the original loose scripts)
91
+ "E702", # multiple statements on one line, semicolon (kept from the original loose scripts)
92
+ ]
93
+
94
+ [tool.ruff.lint.isort]
95
+ known-first-party = ["opd_viz"]
96
+
97
+ [tool.mypy]
98
+ python_version = "3.10"
99
+ ignore_missing_imports = true
100
+
101
+ [tool.pytest.ini_options]
102
+ testpaths = ["tests"]
103
+ python_files = ["test_*.py"]
104
+ asyncio_mode = "auto"
@@ -0,0 +1,18 @@
1
+ """opd-viz - live per-token distillation-loss (SDPO) heatmap server.
2
+
3
+ A self-contained, model-agnostic server that visualizes the gold-hint SDPO per-token
4
+ distillation loss against any vLLM-served chat model: for each rollout token, the
5
+ student-policy prediction vs the gold-procedure-reprompted teacher prediction, as a
6
+ top-k KL colored onto the realized tokens in a browser heatmap.
7
+
8
+ For licensing and support, contact hello@constructlabs.com
9
+ """
10
+
11
+ from importlib.metadata import PackageNotFoundError, version
12
+
13
+ try:
14
+ __version__ = version("opd-viz")
15
+ except PackageNotFoundError:
16
+ __version__ = "0.0.0+dev"
17
+
18
+ __all__ = ["__version__"]
@@ -0,0 +1,210 @@
1
+ // Shared SDPO heatmap client. Pure browser JS, no build step. Inlined verbatim by
2
+ // BOTH the static builder (build_multi_viz.py) and the live server (serve_viz.py).
3
+ // All per-token interactivity recomputes client-side from the stored primitives
4
+ // (loss variant, kl_clip, importance multiply, top-1-disagree, KDE, hover).
5
+ //
6
+ // State is a mutable record array. setData() (static bootstrap) / addRecord() (live
7
+ // recompute) repopulate the sample dropdown and re-render. CAP (the fixed colour
8
+ // reference = 95th pct of kl_jsd over scored tokens) is passed in by the static
9
+ // builder, or computed client-side in live mode.
10
+ let DATA = [], CAP = 6, cur = 0;
11
+ const sel = document.getElementById('sel');
12
+
13
+ // top-1 agreement, computed client-side from the stored top-k. The two scorers order
14
+ // their top-k differently (Qwen3.6: sorted by teacher prob; verl-dump 8B: teacher probs
15
+ // in STUDENT rank order), so take each side's argmax BY PROBABILITY rather than [0].
16
+ // Both top-k are restricted to the student's top-k candidate set, so this is
17
+ // teacher-argmax-within-set vs student-argmax. Prefer a stored top1_agree if present.
18
+ function amaxTok(rows){ let bi=0; for(let i=1;i<rows.length;i++){ if(rows[i][1]>rows[bi][1]) bi=i; } return rows[bi][0]; }
19
+ function teacherTop1(t){ return (t.t_top&&t.t_top.length)?amaxTok(t.t_top):null; }
20
+ function agree(t){ return (t.top1_agree!=null) ? t.top1_agree
21
+ : (t.s_top&&t.t_top&&t.s_top.length&&t.t_top.length) ? amaxTok(t.s_top)===amaxTok(t.t_top) : true; }
22
+ // gap = t_chosen - s_chosen, the detached student-teacher logprob gap (SDAR gate input,
23
+ // AntiSD's PMI, SERL magnitude). 0 when either chosen logprob is missing.
24
+ function gapOf(t){ return (t.t_chosen!=null && t.s_chosen!=null) ? (t.t_chosen - t.s_chosen) : 0; }
25
+ function baseSignal(t){
26
+ switch(document.getElementById('signal').value){
27
+ case 'gap': return gapOf(t);
28
+ case 'abs_gap': return Math.abs(gapOf(t));
29
+ case 'vopd': return -t.kl_rev; // vOPD value baseline -D_KL(pi||pi_T) ~ -kl_rev
30
+ case 'entropy': return t.entropy!=null ? t.entropy : 0;
31
+ default: return t[document.getElementById('signal').value]; // kl_jsd | kl_rev | kl_fwd
32
+ }
33
+ }
34
+ // the single token-to-scalar chokepoint. Returns a SIGNED scalar (sign preserved through
35
+ // clip); set-level stages (mask, top-p, cap, KDE, strip) all consume this. Transform order:
36
+ // base -> gate -> importance -> clip.
37
+ function signalOf(t){
38
+ let x = baseSignal(t);
39
+ if(document.getElementById('gate').checked) x *= 1/(1+Math.exp(-5*gapOf(t))); // SDAR sigmoid gate beta=5
40
+ if(document.getElementById('isclip').checked){ const ic=DATA[cur].is_clip; if(ic) x *= Math.min(t.is_ratio, ic); }
41
+ const clip=+document.getElementById('clip').value;
42
+ if(clip>0) x = Math.sign(x) * Math.min(Math.abs(x), clip); // clip magnitude, keep sign
43
+ return x;
44
+ }
45
+ function drawKDE(vals,cap){
46
+ const svg=document.getElementById('kde'),W=190,H=46,pad=3;
47
+ if(!vals.length){svg.innerHTML='';return;}
48
+ const xmax=Math.max(cap,Math.max(...vals))||1, n=vals.length;
49
+ const mean=vals.reduce((a,b)=>a+b,0)/n;
50
+ const sd=Math.sqrt(vals.reduce((a,b)=>a+(b-mean)*(b-mean),0)/n)||0.1;
51
+ const bw=Math.max(0.05,1.06*sd*Math.pow(n,-0.2)),M=80;
52
+ let ys=[],ymax=0;
53
+ for(let i=0;i<M;i++){const x=xmax*i/(M-1);let d=0;
54
+ for(const v of vals){const z=(x-v)/bw;d+=Math.exp(-0.5*z*z);}
55
+ d/=(n*bw*Math.sqrt(2*Math.PI));ys.push(d);if(d>ymax)ymax=d;}
56
+ ymax=ymax||1; const capx=pad+(W-2*pad)*Math.min(cap,xmax)/xmax;
57
+ let p=`M ${pad} ${H-pad}`;
58
+ for(let i=0;i<M;i++){const px=pad+(W-2*pad)*i/(M-1),py=H-pad-(H-2*pad)*ys[i]/ymax;p+=` L ${px.toFixed(1)} ${py.toFixed(1)}`;}
59
+ p+=` L ${W-pad} ${H-pad} Z`;
60
+ svg.innerHTML=`<path d="${p}" fill="#ff6b3566" stroke="#ff8a4a" stroke-width="1"/>`+
61
+ `<line x1="${capx.toFixed(1)}" y1="2" x2="${capx.toFixed(1)}" y2="${H-2}" stroke="#ff3b3088" stroke-width="1" stroke-dasharray="2,2"/>`;
62
+ }
63
+ // split the flat token stream into assistant turns at the unscored tool-result separator
64
+ // (the only client-derivable boundary; drift tokens are unscored but keep the real token).
65
+ function turnsOf(tokens){
66
+ const turns=[]; let cur=[];
67
+ for(const t of tokens){
68
+ if(!t.scored && /↳ tool result:/.test(t.tok)){ if(cur.length){turns.push(cur);cur=[];} continue; }
69
+ cur.push(t);
70
+ }
71
+ if(cur.length) turns.push(cur);
72
+ return turns;
73
+ }
74
+ // one bar per assistant turn, height = mean |displayed signal| over that turn's scored tokens.
75
+ // Auto-hide for single-turn rollouts (no degenerate single bar).
76
+ function drawStrip(tokens){
77
+ const svg=document.getElementById('strip'); if(!svg) return;
78
+ const turns=turnsOf(tokens).map(ts=>{
79
+ const v=ts.filter(t=>t.scored).map(t=>Math.abs(signalOf(t)));
80
+ return v.length ? v.reduce((a,b)=>a+b,0)/v.length : 0;
81
+ });
82
+ if(turns.length<2){ svg.innerHTML=''; svg.style.display='none'; return; } // single-turn: degrade
83
+ svg.style.display='';
84
+ const W=150,H=46,pad=4, ymax=Math.max(...turns)||1, n=turns.length, bw=(W-2*pad)/n;
85
+ svg.innerHTML=turns.map((m,i)=>{
86
+ const h=(H-2*pad)*m/ymax, x=pad+i*bw+1, y=H-pad-h;
87
+ return `<rect x="${x.toFixed(1)}" y="${y.toFixed(1)}" width="${(bw-2).toFixed(1)}" height="${h.toFixed(1)}" fill="#ff8a4a" opacity="0.8"/>`;
88
+ }).join('');
89
+ }
90
+ // diverging map: intensity from |x|/cap; warm hue for x>=0 (teacher-favored / positive),
91
+ // cool for x<0 (student-favored / negative); outline at the cap in the matching hue.
92
+ function color(x,cap){
93
+ const a=Math.min(1,Math.sqrt(Math.abs(x)/cap)), warm=x>=0;
94
+ const hue=warm?35-35*a:210, light=55-12*a;
95
+ const ol=Math.abs(x)>=cap?(';outline:1.5px solid '+(warm?'#ff3b30':'#3b82f6')+';outline-offset:-1px'):'';
96
+ return `background:hsla(${hue},85%,${light}%,${a.toFixed(3)})${ol}`;
97
+ }
98
+ function esc(s){return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');}
99
+ function tops(rows){return rows.map(([t,p])=>`${esc(t)}=${p}`).join(' &middot; ');}
100
+ // position the hover tooltip; flip it ABOVE the cursor when it would overflow the viewport bottom
101
+ function placeTip(tip,e){
102
+ tip.style.display='block';
103
+ tip.style.left=Math.min(e.clientX+14,innerWidth-540)+'px';
104
+ const th=tip.offsetHeight||140;
105
+ tip.style.top=(e.clientY+18+th>innerHeight ? Math.max(8,e.clientY-th-14) : e.clientY+18)+'px';
106
+ }
107
+ function render(){
108
+ const r=DATA[cur]; if(!r){return;} sel.value=String(cur);
109
+ const clip=+document.getElementById('clip').value; document.getElementById('clipv').textContent=clip>0?clip:'off';
110
+ const dis=document.getElementById('disagree').checked;
111
+ const ntop1=document.getElementById('nottop1').checked;
112
+ const masked = t => (dis && agree(t)) || (ntop1 && t.realized_top1); // mask -> excluded
113
+ const allScored=r.tokens.filter(t=>t.scored);
114
+ const nDis=allScored.filter(t=>!agree(t)).length;
115
+ let scored=r.tokens.filter(t=>t.scored && !masked(t));
116
+ // top-p selection on |signal|: keep the smallest set whose cumulative |signal| >= p*total
117
+ const topp=+document.getElementById('topp').value;
118
+ const tv=document.getElementById('toppv'); if(tv) tv.textContent=topp>0?topp:'off';
119
+ if(topp>0 && topp<1 && scored.length){
120
+ const ranked=[...scored].sort((a,b)=>Math.abs(signalOf(b))-Math.abs(signalOf(a)));
121
+ const total=ranked.reduce((s,t)=>s+Math.abs(signalOf(t)),0)||1; let acc=0; const keep=new Set();
122
+ for(const t of ranked){ keep.add(t); acc+=Math.abs(signalOf(t)); if(acc>=topp*total) break; }
123
+ scored=scored.filter(t=>keep.has(t));
124
+ }
125
+ const shown=new Set(scored); // survivors get colored; mask/top-p exclusions stay transparent text
126
+ // colour reference = MAX of |displayed signal| over survivors, so intensity spans the full
127
+ // range and the high end stays discriminable. The KDE is over magnitude; sign lives in color().
128
+ const sls=scored.map(signalOf), abs=sls.map(Math.abs);
129
+ const mean=sls.reduce((a,b)=>a+b,0)/(sls.length||1);
130
+ const cap=abs.length ? abs.reduce((m,x)=>x>m?x:m,0) : CAP;
131
+ const fid=100*allScored.filter(t=>t.realized_top1).length/(allScored.length||1);
132
+ drawKDE(abs,cap);
133
+ drawStrip(r.tokens);
134
+ const rw = r.reward==null ? '' : ` &middot; <b style="color:${r.reward>=1?'#7ee787':'#ff7b72'}">${r.reward>=1?'SOLVED':'failed'} (${r.reward})</b>`;
135
+ const src = r.backend ? ` &middot; <span class=lab>${esc(r.backend)} &middot; top-${r.k||'?'}${r.k&&r.k<100?' <span style="color:#d8a14a">(coarse tail)</span>':''}</span>` : '';
136
+ const sel_active = dis || ntop1 || (topp>0 && topp<1);
137
+ const disLab = sel_active ? `${scored.length} shown / ${r.n_scored} scored toks &middot; mean signal ${mean.toFixed(3)} &middot; ${nDis} top-1 disagree` : `${r.n_scored} scored / ${r.n_total} toks &middot; mean signal ${mean.toFixed(3)} &middot; ${nDis} top-1 disagree`;
138
+ const legend = `<span class=u>grey = tool/unscored</span>` + (sel_active?` &middot; <span class=lab>uncolored = masked/excluded (signal 0)</span>`:``);
139
+ document.getElementById('hdr').innerHTML=`<b>${r.sample}</b>${rw}${src} &middot; &alpha;=${r.alpha} &middot; ${disLab} &middot; realized==student-top1: <b>${fid.toFixed(0)}%</b> &middot; ${legend}`;
140
+ // Show the full picture in one continuous view: the reprompt template (question +
141
+ // hindsight injected) as a grey lead-in, then the generated completion colored by KL.
142
+ const st=document.getElementById('stream'); st.innerHTML='';
143
+ if(r.hint_prompt){
144
+ const pre=document.createElement('div');
145
+ pre.className='hint-prompt';
146
+ pre.textContent=r.hint_prompt;
147
+ st.appendChild(pre);
148
+ }
149
+ r.tokens.forEach((tk)=>{const sp=document.createElement('span');
150
+ sp.textContent=tk.tok.replace(/\n/g,'↵');
151
+ if(!tk.scored){ sp.className='u'; // only tool-response / unscored tokens are greyed
152
+ sp.onmousemove=(e)=>{const tip=document.getElementById('tip');
153
+ tip.innerHTML=`<span class=lab>tool response / unscored (response_mask=0, no loss)</span>`;
154
+ placeTip(tip,e);};
155
+ sp.onmouseleave=()=>document.getElementById('tip').style.display='none';
156
+ st.appendChild(sp); return; }
157
+ // masked / top-p-excluded tokens stay readable text with transparent background, not greyed
158
+ const vis = shown.has(tk);
159
+ const L = vis ? signalOf(tk) : 0; sp.className='t'; sp.style.cssText=color(L,cap);
160
+ sp.onmousemove=(e)=>{const tip=document.getElementById('tip');
161
+ const agr = agree(tk)
162
+ ? ` <span class=s>top-1 AGREE</span>`
163
+ : ` <span class=tt>top-1 DISAGREE &rarr; teacher wants ${esc(teacherTop1(tk)||'?')}</span>`;
164
+ const g = gapOf(tk), favs = g>=0 ? '<span class=tt>teacher-favored</span>' : '<span class=s>student-favored</span>';
165
+ tip.innerHTML=
166
+ `<b class=kl>signal ${signalOf(tk).toFixed(3)}</b> <span class=lab>(${document.getElementById('signal').value})</span>${agr}<br>`+
167
+ `<span class=lab>gap t&minus;s:</span> <b>${g>=0?'+':''}${g.toFixed(3)}</b> ${favs} `+
168
+ `<span class=lab>&middot; s_chosen ${tk.s_chosen} &middot; t_chosen ${tk.t_chosen}</span><br>`+
169
+ `<span class=lab>kl jsd ${tk.kl_jsd} rev ${tk.kl_rev} fwd ${tk.kl_fwd} &middot; H ${tk.entropy} &middot; eff ${tk.eff_support}</span><br>`+
170
+ `<span class=lab>realized:</span> ${esc(tk.tok)} ${tk.realized_top1?'<span class=s>(=student top-1)</span>':'<span class=tt>(NOT top-1)</span>'}<br>`+
171
+ `<span class=s>student:</span> ${tops(tk.s_top)}<br><span class=tt>teacher:</span> ${tops(tk.t_top)}`;
172
+ placeTip(tip,e);};
173
+ sp.onmouseleave=()=>document.getElementById('tip').style.display='none';
174
+ st.appendChild(sp);});
175
+ }
176
+ function nav(d){if(!DATA.length)return;cur=(cur+d+DATA.length)%DATA.length;render();}
177
+ function computeCap(recs){
178
+ const kls=[]; for(const r of recs) for(const t of r.tokens) if(t.scored) kls.push(t.kl_jsd);
179
+ kls.sort((a,b)=>a-b);
180
+ return kls.length ? Math.round(kls[Math.floor(0.95*(kls.length-1))]*100)/100 : 6.0;
181
+ }
182
+ function repopulate(){
183
+ sel.innerHTML='';
184
+ DATA.forEach((r,i)=>{const o=document.createElement('wa-option');o.value=String(i);o.textContent=`${i+1}/${DATA.length} ${r.sample}`;sel.appendChild(o);});
185
+ }
186
+ // static bootstrap: load a fixed corpus with a precomputed CAP
187
+ function setData(recs, cap){
188
+ DATA=recs||[]; CAP=(cap!=null)?cap:computeCap(DATA);
189
+ cur=Math.min(cur, Math.max(0, DATA.length-1)); repopulate();
190
+ if(DATA.length) render();
191
+ else {document.getElementById('stream').innerHTML='';document.getElementById('hdr').innerHTML='';}
192
+ }
193
+ // live mode: append one freshly-scored record, recompute CAP, jump to it
194
+ function addRecord(rec){ DATA.push(rec); CAP=computeCap(DATA); cur=DATA.length-1; repopulate(); render(); }
195
+ // Wire the loss-control bar. Web Awesome's wa-select / wa-slider / wa-switch emit
196
+ // NATIVE change/input events, so plain addEventListener works (no wa- prefix).
197
+ function _wire(id,ev,fn){const el=document.getElementById(id); if(el) el.addEventListener(ev,fn);}
198
+ _wire('sel','change',function(){cur=+this.value;render();});
199
+ _wire('signal','change',render);
200
+ _wire('clip','input',render);
201
+ _wire('gate','change',render);
202
+ _wire('isclip','change',render);
203
+ _wire('topp','input',render);
204
+ _wire('disagree','change',render);
205
+ _wire('nottop1','change',render);
206
+ document.onkeydown=(e)=>{
207
+ const t=e.target, tag=t&&t.tagName;
208
+ if(tag==='WA-INPUT'||tag==='WA-TEXTAREA'||tag==='INPUT'||tag==='TEXTAREA') return; // don't hijack typing
209
+ if(e.key==='ArrowLeft')nav(-1);if(e.key==='ArrowRight')nav(1);
210
+ };