opd-viz 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opd_viz-0.1.0/.gitignore +23 -0
- opd_viz-0.1.0/LICENSE +21 -0
- opd_viz-0.1.0/PKG-INFO +115 -0
- opd_viz-0.1.0/README.md +82 -0
- opd_viz-0.1.0/pyproject.toml +104 -0
- opd_viz-0.1.0/src/opd_viz/__init__.py +18 -0
- opd_viz-0.1.0/src/opd_viz/assets/viz_client.js +210 -0
- opd_viz-0.1.0/src/opd_viz/backends.py +414 -0
- opd_viz-0.1.0/src/opd_viz/config.py +187 -0
- opd_viz-0.1.0/src/opd_viz/core.py +173 -0
- opd_viz-0.1.0/src/opd_viz/mcp.py +185 -0
- opd_viz-0.1.0/src/opd_viz/py.typed +0 -0
- opd_viz-0.1.0/src/opd_viz/server.py +460 -0
- opd_viz-0.1.0/src/opd_viz/viewer.py +270 -0
opd_viz-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
.venv/
|
|
6
|
+
|
|
7
|
+
# Testing
|
|
8
|
+
.pytest_cache/
|
|
9
|
+
.coverage
|
|
10
|
+
htmlcov/
|
|
11
|
+
|
|
12
|
+
# Linting
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.mypy_cache/
|
|
15
|
+
|
|
16
|
+
# Build
|
|
17
|
+
dist/
|
|
18
|
+
build/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
|
|
21
|
+
# IDE
|
|
22
|
+
.idea/
|
|
23
|
+
.vscode/
|
opd_viz-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Construct Labs GmbH
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
opd_viz-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: opd-viz
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model
|
|
5
|
+
Project-URL: Homepage, https://constructlabs.com
|
|
6
|
+
Author-email: Construct Labs GmbH <hello@constructlabs.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: ai,distillation,kl-divergence,machine-learning,reinforcement-learning,sdpo,visualization,vllm
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: jinja2>=3.1
|
|
23
|
+
Requires-Dist: mcp>=1.2
|
|
24
|
+
Requires-Dist: numpy>=1.24
|
|
25
|
+
Requires-Dist: transformers>=4.44
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# opd-viz
|
|
35
|
+
|
|
36
|
+
Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model.
|
|
37
|
+
|
|
38
|
+
`opd-viz` visualizes the gold-hint SDPO per-token distillation loss: for each rollout
|
|
39
|
+
token it compares the student-policy prediction against the gold-procedure-reprompted
|
|
40
|
+
teacher prediction as a top-k KL, and colors that divergence onto the realized tokens in a
|
|
41
|
+
browser heatmap. It is model-agnostic (you pick the endpoint, model, and tokenizer at
|
|
42
|
+
runtime from the browser) and brings-your-own-MCP (register any MCP server for the agentic
|
|
43
|
+
generate loop).
|
|
44
|
+
|
|
45
|
+
## Install / run
|
|
46
|
+
|
|
47
|
+
Zero-checkout, via uv:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx --from opd-viz opd-viz --port 8088
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
or into an environment:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install opd-viz
|
|
57
|
+
opd-viz --port 8088
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Then open `http://127.0.0.1:8088`. The server binds loopback and is single-user; there is
|
|
61
|
+
no auth on the opd-viz server itself.
|
|
62
|
+
|
|
63
|
+
Flags: `--host` (default `127.0.0.1`), `--port` (default `8088`). Everything else is set in
|
|
64
|
+
the browser.
|
|
65
|
+
|
|
66
|
+
## Requirements
|
|
67
|
+
|
|
68
|
+
- A served, OpenAI-compatible **vLLM** endpoint. It **must** be started with
|
|
69
|
+
`--max-logprobs 100` so the teacher-forcing pass can return top-100 `prompt_logprobs`;
|
|
70
|
+
`opd-viz` preflights this on Connect and reports a clear error if it is missing.
|
|
71
|
+
- The tokenizer matching your model is pulled from the Hugging Face Hub by the model id at
|
|
72
|
+
Connect time. For gated repos, set `HF_TOKEN` in the environment before launching:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
HF_TOKEN=<paste your hf_… token> uvx --from opd-viz opd-viz
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
(Reference a secret manager rather than pasting tokens into shared shells.)
|
|
79
|
+
|
|
80
|
+
## Use it (connect, generate, recompute)
|
|
81
|
+
|
|
82
|
+
1. **Connect.** Fill in the vLLM endpoint (e.g. `http://host:port/v1`), optional API key,
|
|
83
|
+
the model id (e.g. `org/Model-Name`), an optional system prompt, and sampling params
|
|
84
|
+
(temperature plus any of top_p / top_k / min_p / presence_penalty / repetition_penalty;
|
|
85
|
+
blank fields are not sent). Click **Connect** — this loads the matching tokenizer, runs
|
|
86
|
+
the vLLM preflight, and runs a chat-template self-check, surfacing any warnings. Changing
|
|
87
|
+
the model and clicking Connect again re-pulls the matching tokenizer and re-preflights;
|
|
88
|
+
no restart needed.
|
|
89
|
+
2. **Generate.** Type a question and click **Generate rollout**. Without tools this is a
|
|
90
|
+
single assistant turn (the student = its own tokens, so `gen == score` holds). The
|
|
91
|
+
generation logprobs are the student distribution.
|
|
92
|
+
3. **Recompute.** Paste the gold hindsight (and optionally tweak the reprompt template),
|
|
93
|
+
then click **Recompute & visualize**. The exact generated token ids are teacher-forced
|
|
94
|
+
under the gold-hint prompt via vLLM `prompt_logprobs`, and the per-token KL (JSD / reverse
|
|
95
|
+
/ forward) is colored onto the realized tokens. Hover any token for the student/teacher
|
|
96
|
+
top-k and the top-1 (dis)agreement.
|
|
97
|
+
|
|
98
|
+
A rollout is stamped with the config that produced it; if you reconnect with a different
|
|
99
|
+
model/config and then score a stale rollout, the server refuses (regenerate instead of
|
|
100
|
+
mis-coloring).
|
|
101
|
+
|
|
102
|
+
## MCP tools (optional, agentic)
|
|
103
|
+
|
|
104
|
+
Register MCP servers in the Connect panel to make their tools available to an agentic
|
|
105
|
+
generate loop:
|
|
106
|
+
|
|
107
|
+
- **remote**: a URL (e.g. `https://host/mcp`) plus an optional auth header.
|
|
108
|
+
- **stdio**: a command + args (+ optional `KEY=val` env), e.g. `uvx some-mcp-server`.
|
|
109
|
+
|
|
110
|
+
After connecting, the **use tools** toggle enables once at least one MCP tool is discovered.
|
|
111
|
+
With it on, Generate runs the model through tool calls (executed live) until it answers
|
|
112
|
+
without a tool call; scoring colors each assistant turn and greys the tool results.
|
|
113
|
+
|
|
114
|
+
`opd-viz` contacts only your vLLM endpoint, the MCP servers you register, and the HF Hub
|
|
115
|
+
(for the tokenizer). Nothing else.
|
opd_viz-0.1.0/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# opd-viz
|
|
2
|
+
|
|
3
|
+
Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model.
|
|
4
|
+
|
|
5
|
+
`opd-viz` visualizes the gold-hint SDPO per-token distillation loss: for each rollout
|
|
6
|
+
token it compares the student-policy prediction against the gold-procedure-reprompted
|
|
7
|
+
teacher prediction as a top-k KL, and colors that divergence onto the realized tokens in a
|
|
8
|
+
browser heatmap. It is model-agnostic (you pick the endpoint, model, and tokenizer at
|
|
9
|
+
runtime from the browser) and brings-your-own-MCP (register any MCP server for the agentic
|
|
10
|
+
generate loop).
|
|
11
|
+
|
|
12
|
+
## Install / run
|
|
13
|
+
|
|
14
|
+
Zero-checkout, via uv:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
uvx --from opd-viz opd-viz --port 8088
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
or into an environment:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install opd-viz
|
|
24
|
+
opd-viz --port 8088
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Then open `http://127.0.0.1:8088`. The server binds loopback and is single-user; there is
|
|
28
|
+
no auth on the opd-viz server itself.
|
|
29
|
+
|
|
30
|
+
Flags: `--host` (default `127.0.0.1`), `--port` (default `8088`). Everything else is set in
|
|
31
|
+
the browser.
|
|
32
|
+
|
|
33
|
+
## Requirements
|
|
34
|
+
|
|
35
|
+
- A served, OpenAI-compatible **vLLM** endpoint. It **must** be started with
|
|
36
|
+
`--max-logprobs 100` so the teacher-forcing pass can return top-100 `prompt_logprobs`;
|
|
37
|
+
`opd-viz` preflights this on Connect and reports a clear error if it is missing.
|
|
38
|
+
- The tokenizer matching your model is pulled from the Hugging Face Hub by the model id at
|
|
39
|
+
Connect time. For gated repos, set `HF_TOKEN` in the environment before launching:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
HF_TOKEN=<paste your hf_… token> uvx --from opd-viz opd-viz
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
(Reference a secret manager rather than pasting tokens into shared shells.)
|
|
46
|
+
|
|
47
|
+
## Use it (connect, generate, recompute)
|
|
48
|
+
|
|
49
|
+
1. **Connect.** Fill in the vLLM endpoint (e.g. `http://host:port/v1`), optional API key,
|
|
50
|
+
the model id (e.g. `org/Model-Name`), an optional system prompt, and sampling params
|
|
51
|
+
(temperature plus any of top_p / top_k / min_p / presence_penalty / repetition_penalty;
|
|
52
|
+
blank fields are not sent). Click **Connect** — this loads the matching tokenizer, runs
|
|
53
|
+
the vLLM preflight, and runs a chat-template self-check, surfacing any warnings. Changing
|
|
54
|
+
the model and clicking Connect again re-pulls the matching tokenizer and re-preflights;
|
|
55
|
+
no restart needed.
|
|
56
|
+
2. **Generate.** Type a question and click **Generate rollout**. Without tools this is a
|
|
57
|
+
single assistant turn (the student = its own tokens, so `gen == score` holds). The
|
|
58
|
+
generation logprobs are the student distribution.
|
|
59
|
+
3. **Recompute.** Paste the gold hindsight (and optionally tweak the reprompt template),
|
|
60
|
+
then click **Recompute & visualize**. The exact generated token ids are teacher-forced
|
|
61
|
+
under the gold-hint prompt via vLLM `prompt_logprobs`, and the per-token KL (JSD / reverse
|
|
62
|
+
/ forward) is colored onto the realized tokens. Hover any token for the student/teacher
|
|
63
|
+
top-k and the top-1 (dis)agreement.
|
|
64
|
+
|
|
65
|
+
A rollout is stamped with the config that produced it; if you reconnect with a different
|
|
66
|
+
model/config and then score a stale rollout, the server refuses (regenerate instead of
|
|
67
|
+
mis-coloring).
|
|
68
|
+
|
|
69
|
+
## MCP tools (optional, agentic)
|
|
70
|
+
|
|
71
|
+
Register MCP servers in the Connect panel to make their tools available to an agentic
|
|
72
|
+
generate loop:
|
|
73
|
+
|
|
74
|
+
- **remote**: a URL (e.g. `https://host/mcp`) plus an optional auth header.
|
|
75
|
+
- **stdio**: a command + args (+ optional `KEY=val` env), e.g. `uvx some-mcp-server`.
|
|
76
|
+
|
|
77
|
+
After connecting, the **use tools** toggle enables once at least one MCP tool is discovered.
|
|
78
|
+
With it on, Generate runs the model through tool calls (executed live) until it answers
|
|
79
|
+
without a tool call; scoring colors each assistant turn and greys the tool results.
|
|
80
|
+
|
|
81
|
+
`opd-viz` contacts only your vLLM endpoint, the MCP servers you register, and the HF Hub
|
|
82
|
+
(for the tokenizer). Nothing else.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "opd-viz"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Live per-token distillation-loss (SDPO) heatmap server for any vLLM-served chat model"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Construct Labs GmbH", email = "hello@constructlabs.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"sdpo",
|
|
17
|
+
"distillation",
|
|
18
|
+
"kl-divergence",
|
|
19
|
+
"vllm",
|
|
20
|
+
"visualization",
|
|
21
|
+
"reinforcement-learning",
|
|
22
|
+
"ai",
|
|
23
|
+
"machine-learning",
|
|
24
|
+
]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 4 - Beta",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"Intended Audience :: Science/Research",
|
|
29
|
+
"License :: OSI Approved :: MIT License",
|
|
30
|
+
"Operating System :: OS Independent",
|
|
31
|
+
"Programming Language :: Python :: 3",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Programming Language :: Python :: 3.11",
|
|
34
|
+
"Programming Language :: Python :: 3.12",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
36
|
+
"Typing :: Typed",
|
|
37
|
+
]
|
|
38
|
+
dependencies = [
|
|
39
|
+
"transformers>=4.44",
|
|
40
|
+
"jinja2>=3.1", # apply_chat_template (render path) needs it; transformers leaves it optional
|
|
41
|
+
"numpy>=1.24", # Phase 2 (KL math); transformers already pulls it
|
|
42
|
+
"mcp>=1.2", # Phase 4 (generic MCP client)
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.scripts]
|
|
46
|
+
opd-viz = "opd_viz.server:main"
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
dev = [
|
|
50
|
+
"pytest>=7.0.0",
|
|
51
|
+
"pytest-asyncio>=0.21.0",
|
|
52
|
+
"ruff>=0.1.0",
|
|
53
|
+
"mypy>=1.0.0",
|
|
54
|
+
"build>=1.0.0",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[project.urls]
|
|
58
|
+
Homepage = "https://constructlabs.com"
|
|
59
|
+
|
|
60
|
+
[tool.hatch.build.targets.sdist]
|
|
61
|
+
include = [
|
|
62
|
+
"/src",
|
|
63
|
+
"/README.md",
|
|
64
|
+
"/LICENSE",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
[tool.hatch.build.targets.wheel]
|
|
68
|
+
# hatchling ships every file under the package dir (incl. assets/viz_client.js and
|
|
69
|
+
# py.typed), so no explicit force-include is needed; a force-include here would
|
|
70
|
+
# double-add the asset and fail the build. The Phase 5 smoke test guards inclusion.
|
|
71
|
+
packages = ["src/opd_viz"]
|
|
72
|
+
|
|
73
|
+
[tool.ruff]
|
|
74
|
+
line-length = 88
|
|
75
|
+
target-version = "py310"
|
|
76
|
+
|
|
77
|
+
[tool.ruff.lint]
|
|
78
|
+
select = [
|
|
79
|
+
"E", # pycodestyle errors
|
|
80
|
+
"W", # pycodestyle warnings
|
|
81
|
+
"F", # Pyflakes
|
|
82
|
+
"I", # isort
|
|
83
|
+
"B", # flake8-bugbear
|
|
84
|
+
"C4", # flake8-comprehensions
|
|
85
|
+
"UP", # pyupgrade
|
|
86
|
+
]
|
|
87
|
+
ignore = [
|
|
88
|
+
"E501", # line too long (handled by formatter)
|
|
89
|
+
"E401", # multiple imports on one line (kept from the original loose scripts)
|
|
90
|
+
"E701", # multiple statements on one line, colon (kept from the original loose scripts)
|
|
91
|
+
"E702", # multiple statements on one line, semicolon (kept from the original loose scripts)
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
[tool.ruff.lint.isort]
|
|
95
|
+
known-first-party = ["opd_viz"]
|
|
96
|
+
|
|
97
|
+
[tool.mypy]
|
|
98
|
+
python_version = "3.10"
|
|
99
|
+
ignore_missing_imports = true
|
|
100
|
+
|
|
101
|
+
[tool.pytest.ini_options]
|
|
102
|
+
testpaths = ["tests"]
|
|
103
|
+
python_files = ["test_*.py"]
|
|
104
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""opd-viz - live per-token distillation-loss (SDPO) heatmap server.
|
|
2
|
+
|
|
3
|
+
A self-contained, model-agnostic server that visualizes the gold-hint SDPO per-token
|
|
4
|
+
distillation loss against any vLLM-served chat model: for each rollout token, the
|
|
5
|
+
student-policy prediction vs the gold-procedure-reprompted teacher prediction, as a
|
|
6
|
+
top-k KL colored onto the realized tokens in a browser heatmap.
|
|
7
|
+
|
|
8
|
+
For licensing and support, contact hello@constructlabs.com
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
__version__ = version("opd-viz")
|
|
15
|
+
except PackageNotFoundError:
|
|
16
|
+
__version__ = "0.0.0+dev"
|
|
17
|
+
|
|
18
|
+
__all__ = ["__version__"]
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
// Shared SDPO heatmap client. Pure browser JS, no build step. Inlined verbatim by
|
|
2
|
+
// BOTH the static builder (build_multi_viz.py) and the live server (serve_viz.py).
|
|
3
|
+
// All per-token interactivity recomputes client-side from the stored primitives
|
|
4
|
+
// (loss variant, kl_clip, importance multiply, top-1-disagree, KDE, hover).
|
|
5
|
+
//
|
|
6
|
+
// State is a mutable record array. setData() (static bootstrap) / addRecord() (live
|
|
7
|
+
// recompute) repopulate the sample dropdown and re-render. CAP (the fixed colour
|
|
8
|
+
// reference = 95th pct of kl_jsd over scored tokens) is passed in by the static
|
|
9
|
+
// builder, or computed client-side in live mode.
|
|
10
|
+
let DATA = [], CAP = 6, cur = 0;
|
|
11
|
+
const sel = document.getElementById('sel');
|
|
12
|
+
|
|
13
|
+
// top-1 agreement, computed client-side from the stored top-k. The two scorers order
|
|
14
|
+
// their top-k differently (Qwen3.6: sorted by teacher prob; verl-dump 8B: teacher probs
|
|
15
|
+
// in STUDENT rank order), so take each side's argmax BY PROBABILITY rather than [0].
|
|
16
|
+
// Both top-k are restricted to the student's top-k candidate set, so this is
|
|
17
|
+
// teacher-argmax-within-set vs student-argmax. Prefer a stored top1_agree if present.
|
|
18
|
+
function amaxTok(rows){ let bi=0; for(let i=1;i<rows.length;i++){ if(rows[i][1]>rows[bi][1]) bi=i; } return rows[bi][0]; }
|
|
19
|
+
function teacherTop1(t){ return (t.t_top&&t.t_top.length)?amaxTok(t.t_top):null; }
|
|
20
|
+
function agree(t){ return (t.top1_agree!=null) ? t.top1_agree
|
|
21
|
+
: (t.s_top&&t.t_top&&t.s_top.length&&t.t_top.length) ? amaxTok(t.s_top)===amaxTok(t.t_top) : true; }
|
|
22
|
+
// gap = t_chosen - s_chosen, the detached student-teacher logprob gap (SDAR gate input,
|
|
23
|
+
// AntiSD's PMI, SERL magnitude). 0 when either chosen logprob is missing.
|
|
24
|
+
function gapOf(t){ return (t.t_chosen!=null && t.s_chosen!=null) ? (t.t_chosen - t.s_chosen) : 0; }
|
|
25
|
+
function baseSignal(t){
|
|
26
|
+
switch(document.getElementById('signal').value){
|
|
27
|
+
case 'gap': return gapOf(t);
|
|
28
|
+
case 'abs_gap': return Math.abs(gapOf(t));
|
|
29
|
+
case 'vopd': return -t.kl_rev; // vOPD value baseline -D_KL(pi||pi_T) ~ -kl_rev
|
|
30
|
+
case 'entropy': return t.entropy!=null ? t.entropy : 0;
|
|
31
|
+
default: return t[document.getElementById('signal').value]; // kl_jsd | kl_rev | kl_fwd
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// the single token-to-scalar chokepoint. Returns a SIGNED scalar (sign preserved through
|
|
35
|
+
// clip); set-level stages (mask, top-p, cap, KDE, strip) all consume this. Transform order:
|
|
36
|
+
// base -> gate -> importance -> clip.
|
|
37
|
+
function signalOf(t){
|
|
38
|
+
let x = baseSignal(t);
|
|
39
|
+
if(document.getElementById('gate').checked) x *= 1/(1+Math.exp(-5*gapOf(t))); // SDAR sigmoid gate beta=5
|
|
40
|
+
if(document.getElementById('isclip').checked){ const ic=DATA[cur].is_clip; if(ic) x *= Math.min(t.is_ratio, ic); }
|
|
41
|
+
const clip=+document.getElementById('clip').value;
|
|
42
|
+
if(clip>0) x = Math.sign(x) * Math.min(Math.abs(x), clip); // clip magnitude, keep sign
|
|
43
|
+
return x;
|
|
44
|
+
}
|
|
45
|
+
function drawKDE(vals,cap){
|
|
46
|
+
const svg=document.getElementById('kde'),W=190,H=46,pad=3;
|
|
47
|
+
if(!vals.length){svg.innerHTML='';return;}
|
|
48
|
+
const xmax=Math.max(cap,Math.max(...vals))||1, n=vals.length;
|
|
49
|
+
const mean=vals.reduce((a,b)=>a+b,0)/n;
|
|
50
|
+
const sd=Math.sqrt(vals.reduce((a,b)=>a+(b-mean)*(b-mean),0)/n)||0.1;
|
|
51
|
+
const bw=Math.max(0.05,1.06*sd*Math.pow(n,-0.2)),M=80;
|
|
52
|
+
let ys=[],ymax=0;
|
|
53
|
+
for(let i=0;i<M;i++){const x=xmax*i/(M-1);let d=0;
|
|
54
|
+
for(const v of vals){const z=(x-v)/bw;d+=Math.exp(-0.5*z*z);}
|
|
55
|
+
d/=(n*bw*Math.sqrt(2*Math.PI));ys.push(d);if(d>ymax)ymax=d;}
|
|
56
|
+
ymax=ymax||1; const capx=pad+(W-2*pad)*Math.min(cap,xmax)/xmax;
|
|
57
|
+
let p=`M ${pad} ${H-pad}`;
|
|
58
|
+
for(let i=0;i<M;i++){const px=pad+(W-2*pad)*i/(M-1),py=H-pad-(H-2*pad)*ys[i]/ymax;p+=` L ${px.toFixed(1)} ${py.toFixed(1)}`;}
|
|
59
|
+
p+=` L ${W-pad} ${H-pad} Z`;
|
|
60
|
+
svg.innerHTML=`<path d="${p}" fill="#ff6b3566" stroke="#ff8a4a" stroke-width="1"/>`+
|
|
61
|
+
`<line x1="${capx.toFixed(1)}" y1="2" x2="${capx.toFixed(1)}" y2="${H-2}" stroke="#ff3b3088" stroke-width="1" stroke-dasharray="2,2"/>`;
|
|
62
|
+
}
|
|
63
|
+
// split the flat token stream into assistant turns at the unscored tool-result separator
|
|
64
|
+
// (the only client-derivable boundary; drift tokens are unscored but keep the real token).
|
|
65
|
+
function turnsOf(tokens){
|
|
66
|
+
const turns=[]; let cur=[];
|
|
67
|
+
for(const t of tokens){
|
|
68
|
+
if(!t.scored && /↳ tool result:/.test(t.tok)){ if(cur.length){turns.push(cur);cur=[];} continue; }
|
|
69
|
+
cur.push(t);
|
|
70
|
+
}
|
|
71
|
+
if(cur.length) turns.push(cur);
|
|
72
|
+
return turns;
|
|
73
|
+
}
|
|
74
|
+
// one bar per assistant turn, height = mean |displayed signal| over that turn's scored tokens.
|
|
75
|
+
// Auto-hide for single-turn rollouts (no degenerate single bar).
|
|
76
|
+
function drawStrip(tokens){
|
|
77
|
+
const svg=document.getElementById('strip'); if(!svg) return;
|
|
78
|
+
const turns=turnsOf(tokens).map(ts=>{
|
|
79
|
+
const v=ts.filter(t=>t.scored).map(t=>Math.abs(signalOf(t)));
|
|
80
|
+
return v.length ? v.reduce((a,b)=>a+b,0)/v.length : 0;
|
|
81
|
+
});
|
|
82
|
+
if(turns.length<2){ svg.innerHTML=''; svg.style.display='none'; return; } // single-turn: degrade
|
|
83
|
+
svg.style.display='';
|
|
84
|
+
const W=150,H=46,pad=4, ymax=Math.max(...turns)||1, n=turns.length, bw=(W-2*pad)/n;
|
|
85
|
+
svg.innerHTML=turns.map((m,i)=>{
|
|
86
|
+
const h=(H-2*pad)*m/ymax, x=pad+i*bw+1, y=H-pad-h;
|
|
87
|
+
return `<rect x="${x.toFixed(1)}" y="${y.toFixed(1)}" width="${(bw-2).toFixed(1)}" height="${h.toFixed(1)}" fill="#ff8a4a" opacity="0.8"/>`;
|
|
88
|
+
}).join('');
|
|
89
|
+
}
|
|
90
|
+
// diverging map: intensity from |x|/cap; warm hue for x>=0 (teacher-favored / positive),
|
|
91
|
+
// cool for x<0 (student-favored / negative); outline at the cap in the matching hue.
|
|
92
|
+
function color(x,cap){
|
|
93
|
+
const a=Math.min(1,Math.sqrt(Math.abs(x)/cap)), warm=x>=0;
|
|
94
|
+
const hue=warm?35-35*a:210, light=55-12*a;
|
|
95
|
+
const ol=Math.abs(x)>=cap?(';outline:1.5px solid '+(warm?'#ff3b30':'#3b82f6')+';outline-offset:-1px'):'';
|
|
96
|
+
return `background:hsla(${hue},85%,${light}%,${a.toFixed(3)})${ol}`;
|
|
97
|
+
}
|
|
98
|
+
function esc(s){return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');}
|
|
99
|
+
function tops(rows){return rows.map(([t,p])=>`${esc(t)}=${p}`).join(' · ');}
|
|
100
|
+
// position the hover tooltip; flip it ABOVE the cursor when it would overflow the viewport bottom
|
|
101
|
+
function placeTip(tip,e){
|
|
102
|
+
tip.style.display='block';
|
|
103
|
+
tip.style.left=Math.min(e.clientX+14,innerWidth-540)+'px';
|
|
104
|
+
const th=tip.offsetHeight||140;
|
|
105
|
+
tip.style.top=(e.clientY+18+th>innerHeight ? Math.max(8,e.clientY-th-14) : e.clientY+18)+'px';
|
|
106
|
+
}
|
|
107
|
+
function render(){
|
|
108
|
+
const r=DATA[cur]; if(!r){return;} sel.value=String(cur);
|
|
109
|
+
const clip=+document.getElementById('clip').value; document.getElementById('clipv').textContent=clip>0?clip:'off';
|
|
110
|
+
const dis=document.getElementById('disagree').checked;
|
|
111
|
+
const ntop1=document.getElementById('nottop1').checked;
|
|
112
|
+
const masked = t => (dis && agree(t)) || (ntop1 && t.realized_top1); // mask -> excluded
|
|
113
|
+
const allScored=r.tokens.filter(t=>t.scored);
|
|
114
|
+
const nDis=allScored.filter(t=>!agree(t)).length;
|
|
115
|
+
let scored=r.tokens.filter(t=>t.scored && !masked(t));
|
|
116
|
+
// top-p selection on |signal|: keep the smallest set whose cumulative |signal| >= p*total
|
|
117
|
+
const topp=+document.getElementById('topp').value;
|
|
118
|
+
const tv=document.getElementById('toppv'); if(tv) tv.textContent=topp>0?topp:'off';
|
|
119
|
+
if(topp>0 && topp<1 && scored.length){
|
|
120
|
+
const ranked=[...scored].sort((a,b)=>Math.abs(signalOf(b))-Math.abs(signalOf(a)));
|
|
121
|
+
const total=ranked.reduce((s,t)=>s+Math.abs(signalOf(t)),0)||1; let acc=0; const keep=new Set();
|
|
122
|
+
for(const t of ranked){ keep.add(t); acc+=Math.abs(signalOf(t)); if(acc>=topp*total) break; }
|
|
123
|
+
scored=scored.filter(t=>keep.has(t));
|
|
124
|
+
}
|
|
125
|
+
const shown=new Set(scored); // survivors get colored; mask/top-p exclusions stay transparent text
|
|
126
|
+
// colour reference = MAX of |displayed signal| over survivors, so intensity spans the full
|
|
127
|
+
// range and the high end stays discriminable. The KDE is over magnitude; sign lives in color().
|
|
128
|
+
const sls=scored.map(signalOf), abs=sls.map(Math.abs);
|
|
129
|
+
const mean=sls.reduce((a,b)=>a+b,0)/(sls.length||1);
|
|
130
|
+
const cap=abs.length ? abs.reduce((m,x)=>x>m?x:m,0) : CAP;
|
|
131
|
+
const fid=100*allScored.filter(t=>t.realized_top1).length/(allScored.length||1);
|
|
132
|
+
drawKDE(abs,cap);
|
|
133
|
+
drawStrip(r.tokens);
|
|
134
|
+
const rw = r.reward==null ? '' : ` · <b style="color:${r.reward>=1?'#7ee787':'#ff7b72'}">${r.reward>=1?'SOLVED':'failed'} (${r.reward})</b>`;
|
|
135
|
+
const src = r.backend ? ` · <span class=lab>${esc(r.backend)} · top-${r.k||'?'}${r.k&&r.k<100?' <span style="color:#d8a14a">(coarse tail)</span>':''}</span>` : '';
|
|
136
|
+
const sel_active = dis || ntop1 || (topp>0 && topp<1);
|
|
137
|
+
const disLab = sel_active ? `${scored.length} shown / ${r.n_scored} scored toks · mean signal ${mean.toFixed(3)} · ${nDis} top-1 disagree` : `${r.n_scored} scored / ${r.n_total} toks · mean signal ${mean.toFixed(3)} · ${nDis} top-1 disagree`;
|
|
138
|
+
const legend = `<span class=u>grey = tool/unscored</span>` + (sel_active?` · <span class=lab>uncolored = masked/excluded (signal 0)</span>`:``);
|
|
139
|
+
document.getElementById('hdr').innerHTML=`<b>${r.sample}</b>${rw}${src} · α=${r.alpha} · ${disLab} · realized==student-top1: <b>${fid.toFixed(0)}%</b> · ${legend}`;
|
|
140
|
+
// Show the full picture in one continuous view: the reprompt template (question +
|
|
141
|
+
// hindsight injected) as a grey lead-in, then the generated completion colored by KL.
|
|
142
|
+
const st=document.getElementById('stream'); st.innerHTML='';
|
|
143
|
+
if(r.hint_prompt){
|
|
144
|
+
const pre=document.createElement('div');
|
|
145
|
+
pre.className='hint-prompt';
|
|
146
|
+
pre.textContent=r.hint_prompt;
|
|
147
|
+
st.appendChild(pre);
|
|
148
|
+
}
|
|
149
|
+
r.tokens.forEach((tk)=>{const sp=document.createElement('span');
|
|
150
|
+
sp.textContent=tk.tok.replace(/\n/g,'↵');
|
|
151
|
+
if(!tk.scored){ sp.className='u'; // only tool-response / unscored tokens are greyed
|
|
152
|
+
sp.onmousemove=(e)=>{const tip=document.getElementById('tip');
|
|
153
|
+
tip.innerHTML=`<span class=lab>tool response / unscored (response_mask=0, no loss)</span>`;
|
|
154
|
+
placeTip(tip,e);};
|
|
155
|
+
sp.onmouseleave=()=>document.getElementById('tip').style.display='none';
|
|
156
|
+
st.appendChild(sp); return; }
|
|
157
|
+
// masked / top-p-excluded tokens stay readable text with transparent background, not greyed
|
|
158
|
+
const vis = shown.has(tk);
|
|
159
|
+
const L = vis ? signalOf(tk) : 0; sp.className='t'; sp.style.cssText=color(L,cap);
|
|
160
|
+
sp.onmousemove=(e)=>{const tip=document.getElementById('tip');
|
|
161
|
+
const agr = agree(tk)
|
|
162
|
+
? ` <span class=s>top-1 AGREE</span>`
|
|
163
|
+
: ` <span class=tt>top-1 DISAGREE → teacher wants ${esc(teacherTop1(tk)||'?')}</span>`;
|
|
164
|
+
const g = gapOf(tk), favs = g>=0 ? '<span class=tt>teacher-favored</span>' : '<span class=s>student-favored</span>';
|
|
165
|
+
tip.innerHTML=
|
|
166
|
+
`<b class=kl>signal ${signalOf(tk).toFixed(3)}</b> <span class=lab>(${document.getElementById('signal').value})</span>${agr}<br>`+
|
|
167
|
+
`<span class=lab>gap t−s:</span> <b>${g>=0?'+':''}${g.toFixed(3)}</b> ${favs} `+
|
|
168
|
+
`<span class=lab>· s_chosen ${tk.s_chosen} · t_chosen ${tk.t_chosen}</span><br>`+
|
|
169
|
+
`<span class=lab>kl jsd ${tk.kl_jsd} rev ${tk.kl_rev} fwd ${tk.kl_fwd} · H ${tk.entropy} · eff ${tk.eff_support}</span><br>`+
|
|
170
|
+
`<span class=lab>realized:</span> ${esc(tk.tok)} ${tk.realized_top1?'<span class=s>(=student top-1)</span>':'<span class=tt>(NOT top-1)</span>'}<br>`+
|
|
171
|
+
`<span class=s>student:</span> ${tops(tk.s_top)}<br><span class=tt>teacher:</span> ${tops(tk.t_top)}`;
|
|
172
|
+
placeTip(tip,e);};
|
|
173
|
+
sp.onmouseleave=()=>document.getElementById('tip').style.display='none';
|
|
174
|
+
st.appendChild(sp);});
|
|
175
|
+
}
|
|
176
|
+
function nav(d){if(!DATA.length)return;cur=(cur+d+DATA.length)%DATA.length;render();}
|
|
177
|
+
function computeCap(recs){
|
|
178
|
+
const kls=[]; for(const r of recs) for(const t of r.tokens) if(t.scored) kls.push(t.kl_jsd);
|
|
179
|
+
kls.sort((a,b)=>a-b);
|
|
180
|
+
return kls.length ? Math.round(kls[Math.floor(0.95*(kls.length-1))]*100)/100 : 6.0;
|
|
181
|
+
}
|
|
182
|
+
function repopulate(){
|
|
183
|
+
sel.innerHTML='';
|
|
184
|
+
DATA.forEach((r,i)=>{const o=document.createElement('wa-option');o.value=String(i);o.textContent=`${i+1}/${DATA.length} ${r.sample}`;sel.appendChild(o);});
|
|
185
|
+
}
|
|
186
|
+
// static bootstrap: load a fixed corpus with a precomputed CAP
|
|
187
|
+
function setData(recs, cap){
|
|
188
|
+
DATA=recs||[]; CAP=(cap!=null)?cap:computeCap(DATA);
|
|
189
|
+
cur=Math.min(cur, Math.max(0, DATA.length-1)); repopulate();
|
|
190
|
+
if(DATA.length) render();
|
|
191
|
+
else {document.getElementById('stream').innerHTML='';document.getElementById('hdr').innerHTML='';}
|
|
192
|
+
}
|
|
193
|
+
// live mode: append one freshly-scored record, recompute CAP, jump to it
|
|
194
|
+
function addRecord(rec){ DATA.push(rec); CAP=computeCap(DATA); cur=DATA.length-1; repopulate(); render(); }
|
|
195
|
+
// Wire the loss-control bar. Web Awesome's wa-select / wa-slider / wa-switch emit
|
|
196
|
+
// NATIVE change/input events, so plain addEventListener works (no wa- prefix).
|
|
197
|
+
function _wire(id,ev,fn){const el=document.getElementById(id); if(el) el.addEventListener(ev,fn);}
|
|
198
|
+
_wire('sel','change',function(){cur=+this.value;render();});
|
|
199
|
+
_wire('signal','change',render);
|
|
200
|
+
_wire('clip','input',render);
|
|
201
|
+
_wire('gate','change',render);
|
|
202
|
+
_wire('isclip','change',render);
|
|
203
|
+
_wire('topp','input',render);
|
|
204
|
+
_wire('disagree','change',render);
|
|
205
|
+
_wire('nottop1','change',render);
|
|
206
|
+
document.onkeydown=(e)=>{
|
|
207
|
+
const t=e.target, tag=t&&t.tagName;
|
|
208
|
+
if(tag==='WA-INPUT'||tag==='WA-TEXTAREA'||tag==='INPUT'||tag==='TEXTAREA') return; // don't hijack typing
|
|
209
|
+
if(e.key==='ArrowLeft')nav(-1);if(e.key==='ArrowRight')nav(1);
|
|
210
|
+
};
|