@adia-ai/a2ui-mcp 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +75 -0
- package/evals/compose-from-chunks-holdout.jsonl +20 -0
- package/package.json +6 -5
- package/scripts/dogfood-test.mjs +1 -1
- package/scripts/eval-chunk-synthesis.mjs +1 -1
- package/scripts/eval-compose-from-chunks.mjs +264 -0
- package/scripts/eval-diff.mjs +2 -2
- package/scripts/eval-fix.mjs +1 -1
- package/scripts/eval-refine-synthesis.mjs +4 -4
- package/scripts/generate.mjs +3 -3
- package/scripts/multi-turn-test.mjs +1 -1
- package/scripts/smoke-engine-registry.mjs +2 -2
- package/scripts/smoke-issues.mjs +2 -2
- package/scripts/smoke-refine.mjs +3 -3
- package/scripts/smoke-register-engine.mjs +1 -1
- package/scripts/smoke-state-cache.mjs +1 -1
- package/scripts/smoke-synthesis.mjs +1 -1
- package/scripts/test-a2ui.mjs +5 -5
- package/scripts/test-chunks.mjs +2 -2
- package/scripts/test-evals.mjs +1 -1
- package/scripts/visual-validate.mjs +1 -1
- package/server.js +10 -10
package/CHANGELOG.md
CHANGED
|
@@ -9,6 +9,81 @@ zettel strategies.
|
|
|
9
9
|
|
|
10
10
|
## [Unreleased]
|
|
11
11
|
|
|
12
|
+
_No pending changes._
|
|
13
|
+
|
|
14
|
+
## [0.2.0] - 2026-05-02
|
|
15
|
+
|
|
16
|
+
**Lockstep cut + corpus caret-lock fix.** All 8 published `@adia-ai/*`
|
|
17
|
+
packages now share one version, governed by
|
|
18
|
+
[`docs/specs/package-architecture.md` § 15 (Versioning Policy)](../../../docs/specs/package-architecture.md#15-versioning-policy).
|
|
19
|
+
|
|
20
|
+
This release **fixes a live shipping bug** in 0.1.3: the `dependencies`
|
|
21
|
+
range `"@adia-ai/a2ui-corpus": "^0.0.6"` locked to *exactly* `0.0.6`
|
|
22
|
+
under npm pre-1.0 semver, so every fresh `npm i @adia-ai/a2ui-mcp`
|
|
23
|
+
since the corpus 0.0.7 cut was installing a stale corpus — missing the
|
|
24
|
+
§§22-25 nav consolidation, the new chunks, and the catalog regen. The
|
|
25
|
+
new range `^0.2.0` works correctly. Same class fixed for the other 4
|
|
26
|
+
internal deps in this package.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- **`a2ui-mcp@0.1.3 → corpus@0.0.6` install bug.** The new `^0.2.0`
|
|
31
|
+
ranges resolve to the actually-current versions instead of the
|
|
32
|
+
caret-locked old ones. Consumers running `npx adiaui-mcp` from npm
|
|
33
|
+
now see the post-§§22-25 corpus + the post-rename component catalog.
|
|
34
|
+
|
|
35
|
+
### Changed
|
|
36
|
+
|
|
37
|
+
- `version`: `0.1.3` → `0.2.0`.
|
|
38
|
+
- `dependencies["@adia-ai/a2ui-compose"]`: `^0.1.0` → `^0.2.0`.
|
|
39
|
+
- `dependencies["@adia-ai/a2ui-retrieval"]`: `^0.0.1` → `^0.2.0`.
|
|
40
|
+
- `dependencies["@adia-ai/a2ui-validator"]`: `^0.0.1` → `^0.2.0`.
|
|
41
|
+
- `dependencies["@adia-ai/a2ui-corpus"]`: `^0.0.6` → `^0.2.0`.
|
|
42
|
+
|
|
43
|
+
### No source changes
|
|
44
|
+
|
|
45
|
+
`server.js`, tools, scripts, evals, personas — all byte-identical to
|
|
46
|
+
0.1.3. The cut bumps version + the 4 internal dep ranges only.
|
|
47
|
+
Consumers who want the substantive 0.1.3 work (`compose_from_chunks`
|
|
48
|
+
eval runner + 20-intent hold-out set) get it via either 0.1.3 or
|
|
49
|
+
0.2.0 — and 0.2.0 additionally installs the correct corpus version.
|
|
50
|
+
|
|
51
|
+
## [0.1.3] - 2026-05-02
|
|
52
|
+
|
|
53
|
+
Additive — `compose_from_chunks` eval runner + hold-out set. No
|
|
54
|
+
BREAKING changes.
|
|
55
|
+
|
|
56
|
+
### Added
|
|
57
|
+
|
|
58
|
+
- **`compose_from_chunks` eval runner + hold-out set**. Closes the
|
|
59
|
+
spec at
|
|
60
|
+
[`docs/specs/compose-from-chunks-eval.md`](../../../docs/specs/compose-from-chunks-eval.md):
|
|
61
|
+
- `evals/compose-from-chunks-holdout.jsonl` — 20 intents (10
|
|
62
|
+
single-turn compose + 10 multi-turn refine) across 9
|
|
63
|
+
categories (data-display, forms, layout, data, data-viz,
|
|
64
|
+
agent, overlay, chat, settings, display).
|
|
65
|
+
- `scripts/eval-compose-from-chunks.mjs` — runner with `--stub`
|
|
66
|
+
(default; retrieval-only, no API calls) and `--real-llm`
|
|
67
|
+
(wires Anthropic SDK) modes. Composite scoring (structural
|
|
68
|
+
30 + coverage 20 + retrieval 20 + render 30, with null-
|
|
69
|
+
pro-rata redistribution when render is deferred). `--json`
|
|
70
|
+
+ `--report-file` outputs. Threshold 80; exits 1 if avg < 80.
|
|
71
|
+
- `npm run eval:compose-from-chunks` exposes the runner.
|
|
72
|
+
- Stub baseline: 9 of 20 intents retrievable directly (avg
|
|
73
|
+
~50-70 each); 11 need synthesis (correctly warn without
|
|
74
|
+
`--real-llm`). Aggregate ~26 / 100 stub. Real-LLM mode is
|
|
75
|
+
the gating signal for the `chunk-zettel` engine promotion.
|
|
76
|
+
|
|
77
|
+
Render-fidelity component is DEFERRED — needs Playwright
|
|
78
|
+
headless render + console-error capture wiring; current
|
|
79
|
+
composite drops the render weight pro-rata across
|
|
80
|
+
structural/coverage/retrieval until that lands.
|
|
81
|
+
|
|
82
|
+
### Changed
|
|
83
|
+
|
|
84
|
+
- `package.json` `files:` array now includes `evals/` so the hold-out
|
|
85
|
+
set ships in the published tarball.
|
|
86
|
+
|
|
12
87
|
---
|
|
13
88
|
|
|
14
89
|
## [0.1.2] - 2026-05-01
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{"id":"intent-001","kind":"compose","category":"data-display","intent":"kpi grid with 4 stat cards: users, revenue, sessions, churn","expected_components":["Card","Stat","Grid"],"expected_chunk":"kpi-grid-4-card"}
|
|
2
|
+
{"id":"intent-002","kind":"compose","category":"forms","intent":"sign-in form with email + password + 'forgot password' link","expected_components":["Card","Input","Button","Field"],"expected_chunk":"auth-sign-in"}
|
|
3
|
+
{"id":"intent-003","kind":"compose","category":"layout","intent":"settings page with three tabs (general, integrations, billing)","expected_components":["Tabs","Tab","Card","Section"],"expected_chunk":"settings-tabs-3"}
|
|
4
|
+
{"id":"intent-004","kind":"compose","category":"data","intent":"data table of users with role badge + last-active timestamp","expected_components":["Table","Badge"],"expected_chunk":"users-table"}
|
|
5
|
+
{"id":"intent-005","kind":"compose","category":"data-viz","intent":"conversion funnel chart over 6 stages, with drop-off labels","expected_components":["Chart","Card","ChartLegend"],"expected_chunk":"conversion-funnel"}
|
|
6
|
+
{"id":"intent-006","kind":"compose","category":"agent","intent":"agent activity feed with reasoning steps + final artifact","expected_components":["AgentTrace","AgentReasoning","AgentArtifact"],"expected_chunk":"agent-activity-feed"}
|
|
7
|
+
{"id":"intent-007","kind":"compose","category":"layout","intent":"split-pane editor: code on the left, preview on the right","expected_components":["EditorShell","Pane","Code"],"expected_chunk":"editor-split"}
|
|
8
|
+
{"id":"intent-008","kind":"compose","category":"overlay","intent":"command palette modal with grouped results (recent, suggestions)","expected_components":["Command","Modal"],"expected_chunk":"command-grouped"}
|
|
9
|
+
{"id":"intent-009","kind":"compose","category":"forms","intent":"registration step 2 of 5 — profile setup with 4 fields","expected_components":["Card","StepProgress","Field","Input"],"expected_chunk":"reg-step-shell"}
|
|
10
|
+
{"id":"intent-010","kind":"compose","category":"layout","intent":"404 error page with breadcrumb + back-to-home link","expected_components":["Card","Breadcrumb","Button"],"expected_chunk":"error-404"}
|
|
11
|
+
{"id":"intent-011","kind":"refine","category":"data-display","intent":"dashboard for project metrics","refine":"add a date-range filter at the top","expected_components":["Card","Stat","Select"],"expected_chunk":"project-dashboard"}
|
|
12
|
+
{"id":"intent-012","kind":"refine","category":"display","intent":"user profile card","refine":"make the email editable inline","expected_components":["Card","Avatar","Input"],"expected_chunk":"user-profile-card"}
|
|
13
|
+
{"id":"intent-013","kind":"refine","category":"data","intent":"kanban board with 3 columns","refine":"add a count badge to each column header","expected_components":["Card","Badge","Header"],"expected_chunk":"kanban-3col"}
|
|
14
|
+
{"id":"intent-014","kind":"refine","category":"chat","intent":"chat surface with streaming reply","refine":"add a stop button while streaming","expected_components":["ChatShell","Button","ChatInput"],"expected_chunk":"chat-streaming"}
|
|
15
|
+
{"id":"intent-015","kind":"refine","category":"forms","intent":"sign-up form with email + password","refine":"add password strength meter","expected_components":["Card","Input","Progress"],"expected_chunk":"auth-sign-up"}
|
|
16
|
+
{"id":"intent-016","kind":"refine","category":"settings","intent":"settings tab for notifications","refine":"split email + push into separate sections","expected_components":["Card","Section","Switch"],"expected_chunk":"settings-notifications"}
|
|
17
|
+
{"id":"intent-017","kind":"refine","category":"data","intent":"table of orders","refine":"add a bulk-action toolbar above the table","expected_components":["Table","TableToolbar","Button"],"expected_chunk":"orders-table"}
|
|
18
|
+
{"id":"intent-018","kind":"refine","category":"agent","intent":"agent reasoning panel","refine":"collapse intermediate steps by default, expandable","expected_components":["AgentReasoning","Accordion"],"expected_chunk":"agent-reasoning-collapsed"}
|
|
19
|
+
{"id":"intent-019","kind":"refine","category":"overlay","intent":"modal confirming destructive action","refine":"require typing the resource name to confirm","expected_components":["Modal","Input","Button"],"expected_chunk":"destructive-confirm"}
|
|
20
|
+
{"id":"intent-020","kind":"refine","category":"display","intent":"marketing landing hero","refine":"add a secondary 'see demo' CTA","expected_components":["Card","Heading","Button"],"expected_chunk":"marketing-hero"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adia-ai/a2ui-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "AdiaUI A2UI MCP server. Exposes the compose engine over MCP with an engine selector for monolithic + zettel strategies.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
"server.js",
|
|
11
11
|
"tools/",
|
|
12
12
|
"scripts/",
|
|
13
|
+
"evals/",
|
|
13
14
|
"personas/",
|
|
14
15
|
"README.md",
|
|
15
16
|
"CHANGELOG.md"
|
|
@@ -26,10 +27,10 @@
|
|
|
26
27
|
},
|
|
27
28
|
"dependencies": {
|
|
28
29
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
29
|
-
"@adia-ai/a2ui-compose": "^0.
|
|
30
|
-
"@adia-ai/a2ui-retrieval": "^0.0
|
|
31
|
-
"@adia-ai/a2ui-validator": "^0.0
|
|
32
|
-
"@adia-ai/a2ui-corpus": "^0.0
|
|
30
|
+
"@adia-ai/a2ui-compose": "^0.2.0",
|
|
31
|
+
"@adia-ai/a2ui-retrieval": "^0.2.0",
|
|
32
|
+
"@adia-ai/a2ui-validator": "^0.2.0",
|
|
33
|
+
"@adia-ai/a2ui-corpus": "^0.2.0",
|
|
33
34
|
"zod": "^3.24.0"
|
|
34
35
|
}
|
|
35
36
|
}
|
package/scripts/dogfood-test.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Dogfood test: Run 20 diverse intents through A2UI instant mode.
|
|
3
3
|
*/
|
|
4
|
-
const { generateUI } = await import('../../compose/
|
|
4
|
+
const { generateUI } = await import('../../compose/core/generator.js');
|
|
5
5
|
|
|
6
6
|
const intents = [
|
|
7
7
|
'user registration form with name, email, password',
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
21
|
import '../../../../scripts/load-env.mjs';
|
|
22
|
-
import { composeFromIntent } from '../../compose/
|
|
22
|
+
import { composeFromIntent } from '../../compose/strategies/zettel/chunk-synthesizer.js';
|
|
23
23
|
import { createAdapter } from '../../compose/llm/llm-bridge.js';
|
|
24
24
|
|
|
25
25
|
// Hold-out intents — chosen to NOT have a 1:1 chunk match (so synthesis path
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* eval-compose-from-chunks.mjs — Hold-out eval for the chunk-aware
|
|
4
|
+
* synthesizer. Per `docs/specs/compose-from-chunks-eval.md`.
|
|
5
|
+
*
|
|
6
|
+
* Reads `packages/a2ui/mcp/evals/compose-from-chunks-holdout.jsonl`,
|
|
7
|
+
* runs each intent through `composeFromIntent`, and emits a per-intent
|
|
8
|
+
* + aggregate report.
|
|
9
|
+
*
|
|
10
|
+
* Two modes:
|
|
11
|
+
* --stub (default) — retrieval-only path, no LLM calls. Fast;
|
|
12
|
+
* produces a baseline that exercises the chunk-corpus +
|
|
13
|
+
* retrieval scoring without spending API budget. Use to
|
|
14
|
+
* verify the runner shape + the corpus surface.
|
|
15
|
+
* --real-llm — wires the LLM bridge for synthesis-tier composition.
|
|
16
|
+
* Costs ~$2 per full eval at Claude Sonnet 4.6 pricing.
|
|
17
|
+
* Requires `ANTHROPIC_API_KEY` in env.
|
|
18
|
+
*
|
|
19
|
+
* Scoring (composite, 0-100):
|
|
20
|
+
* - Structural (30%) — composition emitted (html non-null OR plan
|
|
21
|
+
* non-null).
|
|
22
|
+
* - Coverage (20%) — at least one of the intent's
|
|
23
|
+
* `expected_components` appears in the emitted markup.
|
|
24
|
+
* - Retrieval (20%) — top-k retrieved chunks include the intent's
|
|
25
|
+
* `expected_chunk` (when set). Soft-asserted; absence flags but
|
|
26
|
+
* doesn't fail.
|
|
27
|
+
* - Render fidelity (30%) — DEFERRED. Real implementation needs
|
|
28
|
+
* Playwright headless render + console-error capture; this runner
|
|
29
|
+
* emits a placeholder pending the render-fidelity smoke wiring.
|
|
30
|
+
* The composite re-distributes its weight pro-rata across the
|
|
31
|
+
* three remaining components when render is null.
|
|
32
|
+
*
|
|
33
|
+
* Exit:
|
|
34
|
+
* 0 if avg ≥ 80 (passes the chunk-zettel promotion gate threshold)
|
|
35
|
+
* 1 if avg < 80
|
|
36
|
+
*
|
|
37
|
+
* Usage:
|
|
38
|
+
* npm run eval:compose-from-chunks # stub mode
|
|
39
|
+
* npm run eval:compose-from-chunks -- --real-llm # real-LLM
|
|
40
|
+
* npm run eval:compose-from-chunks -- --limit 5 # first 5 intents
|
|
41
|
+
* npm run eval:compose-from-chunks -- --json # JSON report
|
|
42
|
+
* npm run eval:compose-from-chunks -- --report-file # write to docs/reports/
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
import '../../../../scripts/load-env.mjs';
|
|
46
|
+
|
|
47
|
+
import fs from 'node:fs';
|
|
48
|
+
import path from 'node:path';
|
|
49
|
+
import { fileURLToPath } from 'node:url';
|
|
50
|
+
|
|
51
|
+
import { composeFromIntent } from '../../compose/strategies/zettel/chunk-synthesizer.js';
|
|
52
|
+
import { searchChunksAsync } from '../../corpus/scripts/chunk-library.js';
|
|
53
|
+
|
|
54
|
+
const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../../../..');
|
|
55
|
+
const HOLDOUT = path.join(REPO_ROOT, 'packages/a2ui/mcp/evals/compose-from-chunks-holdout.jsonl');
|
|
56
|
+
const PASS_THRESHOLD = 80;
|
|
57
|
+
|
|
58
|
+
const args = process.argv.slice(2);
|
|
59
|
+
const FLAG_REAL_LLM = args.includes('--real-llm');
|
|
60
|
+
const FLAG_JSON = args.includes('--json');
|
|
61
|
+
const FLAG_REPORT = args.includes('--report-file');
|
|
62
|
+
const limitIdx = args.indexOf('--limit');
|
|
63
|
+
const LIMIT = limitIdx >= 0 ? parseInt(args[limitIdx + 1], 10) : null;
|
|
64
|
+
|
|
65
|
+
// ─────────────────────────────────────────────────────────────────
|
|
66
|
+
// Hold-out loader
|
|
67
|
+
// ─────────────────────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
function loadHoldOut() {
|
|
70
|
+
const raw = fs.readFileSync(HOLDOUT, 'utf8');
|
|
71
|
+
const lines = raw.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
72
|
+
const intents = lines.map((l) => JSON.parse(l));
|
|
73
|
+
return LIMIT ? intents.slice(0, LIMIT) : intents;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ─────────────────────────────────────────────────────────────────
|
|
77
|
+
// LLM adapter
|
|
78
|
+
// ─────────────────────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
async function buildLLMAdapter() {
|
|
81
|
+
if (!FLAG_REAL_LLM) return null;
|
|
82
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
83
|
+
if (!apiKey) {
|
|
84
|
+
console.error('--real-llm requires ANTHROPIC_API_KEY in env.');
|
|
85
|
+
process.exit(2);
|
|
86
|
+
}
|
|
87
|
+
const { default: Anthropic } = await import('@anthropic-ai/sdk');
|
|
88
|
+
const client = new Anthropic({ apiKey });
|
|
89
|
+
return {
|
|
90
|
+
async complete({ system, user, model = 'claude-sonnet-4-6', maxTokens = 2048 }) {
|
|
91
|
+
const resp = await client.messages.create({
|
|
92
|
+
model,
|
|
93
|
+
max_tokens: maxTokens,
|
|
94
|
+
system,
|
|
95
|
+
messages: [{ role: 'user', content: user }],
|
|
96
|
+
});
|
|
97
|
+
const text = resp.content
|
|
98
|
+
.filter((b) => b.type === 'text')
|
|
99
|
+
.map((b) => b.text)
|
|
100
|
+
.join('');
|
|
101
|
+
return { text };
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ─────────────────────────────────────────────────────────────────
|
|
107
|
+
// Scoring
|
|
108
|
+
// ─────────────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
function scoreStructural(result) {
|
|
111
|
+
if (result.html || result.plan) return 100;
|
|
112
|
+
return 0;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function scoreCoverage(result, expected) {
|
|
116
|
+
if (!expected || expected.length === 0) return 100;
|
|
117
|
+
const html = String(result.html || '');
|
|
118
|
+
let hits = 0;
|
|
119
|
+
for (const tag of expected) {
|
|
120
|
+
const lowered = tag.toLowerCase().replace(/^ui/, '');
|
|
121
|
+
const re = new RegExp(`<${lowered}-ui[\\s>]`, 'i');
|
|
122
|
+
if (re.test(html)) hits++;
|
|
123
|
+
}
|
|
124
|
+
return Math.round((hits / expected.length) * 100);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async function scoreRetrieval(intent, expectedChunk) {
|
|
128
|
+
if (!expectedChunk) return null;
|
|
129
|
+
const hits = await searchChunksAsync(intent, { limit: 5 });
|
|
130
|
+
const found = hits.some((h) => h.name === expectedChunk);
|
|
131
|
+
return found ? 100 : 0;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function scoreRenderFidelity(_result) {
|
|
135
|
+
// DEFERRED — Playwright headless render + console-error capture.
|
|
136
|
+
// See spec § Out-of-band for the follow-up.
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function compositeScore({ structural, coverage, retrieval, render }) {
|
|
141
|
+
const components = [
|
|
142
|
+
{ name: 'structural', value: structural, weight: 30 },
|
|
143
|
+
{ name: 'coverage', value: coverage, weight: 20 },
|
|
144
|
+
{ name: 'retrieval', value: retrieval, weight: 20 },
|
|
145
|
+
{ name: 'render', value: render, weight: 30 },
|
|
146
|
+
].filter((c) => c.value !== null);
|
|
147
|
+
|
|
148
|
+
const totalWeight = components.reduce((s, c) => s + c.weight, 0);
|
|
149
|
+
const weighted = components.reduce((s, c) => s + c.value * c.weight, 0);
|
|
150
|
+
return totalWeight > 0 ? Math.round(weighted / totalWeight) : 0;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ─────────────────────────────────────────────────────────────────
|
|
154
|
+
// Eval loop
|
|
155
|
+
// ─────────────────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
async function evalIntent(intent, llmAdapter) {
|
|
158
|
+
const t0 = performance.now();
|
|
159
|
+
const result = await composeFromIntent({
|
|
160
|
+
intent: intent.intent,
|
|
161
|
+
llmAdapter,
|
|
162
|
+
maxAttempts: 2,
|
|
163
|
+
});
|
|
164
|
+
const elapsedMs = Math.round(performance.now() - t0);
|
|
165
|
+
|
|
166
|
+
const structural = scoreStructural(result);
|
|
167
|
+
const coverage = scoreCoverage(result, intent.expected_components);
|
|
168
|
+
const retrieval = await scoreRetrieval(intent.intent, intent.expected_chunk);
|
|
169
|
+
const render = scoreRenderFidelity(result);
|
|
170
|
+
|
|
171
|
+
const score = compositeScore({ structural, coverage, retrieval, render });
|
|
172
|
+
return {
|
|
173
|
+
id: intent.id,
|
|
174
|
+
kind: intent.kind,
|
|
175
|
+
intent: intent.intent,
|
|
176
|
+
source: result.source,
|
|
177
|
+
elapsedMs,
|
|
178
|
+
structural,
|
|
179
|
+
coverage,
|
|
180
|
+
retrieval,
|
|
181
|
+
render,
|
|
182
|
+
score,
|
|
183
|
+
warnings: result.warnings ?? [],
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ─────────────────────────────────────────────────────────────────
|
|
188
|
+
// Reporter
|
|
189
|
+
// ─────────────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
function reportText(results, mode) {
|
|
192
|
+
const avg = Math.round(results.reduce((s, r) => s + r.score, 0) / results.length);
|
|
193
|
+
const passing = results.filter((r) => r.score >= PASS_THRESHOLD).length;
|
|
194
|
+
const out = [];
|
|
195
|
+
out.push(`# compose_from_chunks eval — ${mode === 'real-llm' ? 'real LLM' : 'stub (retrieval only)'}`);
|
|
196
|
+
out.push('');
|
|
197
|
+
out.push(`Aggregate: avg **${avg}**, passing **${passing} / ${results.length}** (threshold ${PASS_THRESHOLD}).`);
|
|
198
|
+
out.push('');
|
|
199
|
+
out.push('| ID | Kind | Source | Struct | Cov | Retr | Render | Score | ms |');
|
|
200
|
+
out.push('|---|---|---|---:|---:|---:|---:|---:|---:|');
|
|
201
|
+
for (const r of results) {
|
|
202
|
+
out.push(`| ${r.id} | ${r.kind} | ${r.source ?? '—'} | ${r.structural} | ${r.coverage} | ${r.retrieval ?? '—'} | ${r.render ?? '—'} | **${r.score}** | ${r.elapsedMs} |`);
|
|
203
|
+
}
|
|
204
|
+
if (results.some((r) => r.warnings.length > 0)) {
|
|
205
|
+
out.push('');
|
|
206
|
+
out.push('## Warnings');
|
|
207
|
+
for (const r of results) {
|
|
208
|
+
if (r.warnings.length === 0) continue;
|
|
209
|
+
out.push(`- **${r.id}**: ${r.warnings.join('; ')}`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return out.join('\n');
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function reportJSON(results, mode) {
|
|
216
|
+
const avg = Math.round(results.reduce((s, r) => s + r.score, 0) / results.length);
|
|
217
|
+
const passing = results.filter((r) => r.score >= PASS_THRESHOLD).length;
|
|
218
|
+
return JSON.stringify({
|
|
219
|
+
mode,
|
|
220
|
+
threshold: PASS_THRESHOLD,
|
|
221
|
+
avg,
|
|
222
|
+
passing,
|
|
223
|
+
total: results.length,
|
|
224
|
+
results,
|
|
225
|
+
}, null, 2);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ─────────────────────────────────────────────────────────────────
|
|
229
|
+
// Main
|
|
230
|
+
// ─────────────────────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
async function main() {
|
|
233
|
+
const intents = loadHoldOut();
|
|
234
|
+
const llmAdapter = await buildLLMAdapter();
|
|
235
|
+
const mode = FLAG_REAL_LLM ? 'real-llm' : 'stub';
|
|
236
|
+
|
|
237
|
+
const results = [];
|
|
238
|
+
for (const intent of intents) {
|
|
239
|
+
process.stderr.write(`▶ ${intent.id} ${intent.kind.padEnd(7)} ${intent.intent.slice(0, 50)}...\n`);
|
|
240
|
+
const result = await evalIntent(intent, llmAdapter);
|
|
241
|
+
results.push(result);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const output = FLAG_JSON ? reportJSON(results, mode) : reportText(results, mode);
|
|
245
|
+
|
|
246
|
+
if (FLAG_REPORT) {
|
|
247
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
248
|
+
const dir = path.join(REPO_ROOT, 'docs/reports');
|
|
249
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
250
|
+
const file = path.join(dir, `eval-compose-from-chunks-${date}.md`);
|
|
251
|
+
fs.writeFileSync(file, output + '\n');
|
|
252
|
+
console.error(`\nReport written to ${path.relative(REPO_ROOT, file)}`);
|
|
253
|
+
} else {
|
|
254
|
+
console.log(output);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const avg = results.reduce((s, r) => s + r.score, 0) / results.length;
|
|
258
|
+
process.exit(avg >= PASS_THRESHOLD ? 0 : 1);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
main().catch((err) => {
|
|
262
|
+
console.error('eval-compose-from-chunks failed:', err.message);
|
|
263
|
+
process.exit(2);
|
|
264
|
+
});
|
package/scripts/eval-diff.mjs
CHANGED
|
@@ -28,8 +28,8 @@ import { mkdir, writeFile } from 'node:fs/promises';
|
|
|
28
28
|
import { join, dirname } from 'node:path';
|
|
29
29
|
import { fileURLToPath } from 'node:url';
|
|
30
30
|
|
|
31
|
-
import { generateUI } from '../../compose/
|
|
32
|
-
import { generateZettel } from '../../compose/
|
|
31
|
+
import { generateUI } from '../../compose/core/generator.js';
|
|
32
|
+
import { generateZettel } from '../../compose/strategies/zettel/generator-adapter.js';
|
|
33
33
|
import { runHarnessV2 } from '../../compose/evals/harness.mjs';
|
|
34
34
|
import { validateSemantics } from '../../validator/semantic/index.js';
|
|
35
35
|
|
package/scripts/eval-fix.mjs
CHANGED
|
@@ -50,7 +50,7 @@ const MAX_ITER = parseInt([...args].find(a => a.startsWith('--max-iter='))?.spli
|
|
|
50
50
|
|
|
51
51
|
// ── Load modules ──
|
|
52
52
|
|
|
53
|
-
const { generateUI } = await import('../../compose/
|
|
53
|
+
const { generateUI } = await import('../../compose/core/generator.js');
|
|
54
54
|
const { validateSchema } = await import('../../validator/validator.js');
|
|
55
55
|
const { getPattern, searchPatterns } = await import('../../retrieval/pattern-library.js');
|
|
56
56
|
const { createTicket, formatTicket, formatTicketList } = await import('../../../../.tickets/tickets.js');
|
|
@@ -23,10 +23,10 @@ import '../../../../scripts/load-env.mjs';
|
|
|
23
23
|
import {
|
|
24
24
|
refineFromIntent,
|
|
25
25
|
applyOps,
|
|
26
|
-
} from '../../compose/
|
|
27
|
-
import { mintStateId } from '../../compose/
|
|
28
|
-
import { createIssueAccumulator } from '../../compose/
|
|
29
|
-
import { composeFromPlan } from '../../compose/
|
|
26
|
+
} from '../../compose/strategies/zettel/chunk-refiner.js';
|
|
27
|
+
import { mintStateId } from '../../compose/strategies/zettel/state-cache.js';
|
|
28
|
+
import { createIssueAccumulator } from '../../compose/strategies/zettel/issue-reporter.js';
|
|
29
|
+
import { composeFromPlan } from '../../compose/strategies/zettel/chunk-composer.js';
|
|
30
30
|
import { listChunksByKind, getChunk } from '../../corpus/scripts/chunk-library.js';
|
|
31
31
|
import { createAdapter } from '../../compose/llm/llm-bridge.js';
|
|
32
32
|
|
package/scripts/generate.mjs
CHANGED
|
@@ -34,7 +34,7 @@ const mode = flags.has('--thinking') ? 'thinking' : flags.has('--pro') ? 'pro' :
|
|
|
34
34
|
const JSON_OUT = flags.has('--json');
|
|
35
35
|
const HTML_OUT = flags.has('--html');
|
|
36
36
|
|
|
37
|
-
const { generateUI, generateUIStream } = await import('../../compose/
|
|
37
|
+
const { generateUI, generateUIStream } = await import('../../compose/core/generator.js');
|
|
38
38
|
|
|
39
39
|
console.error(`Mode: ${mode} | Intent: "${intent}"`);
|
|
40
40
|
console.error('─'.repeat(50));
|
|
@@ -138,7 +138,7 @@ function componentsToHTML(comps) {
|
|
|
138
138
|
Stat: 'stat-ui', Table: 'table-ui', Chart: 'chart-ui',
|
|
139
139
|
List: 'list-ui', Pagination: 'pagination-ui',
|
|
140
140
|
// Navigation
|
|
141
|
-
Tabs: 'tabs-ui', Tab: 'tab-ui', Nav: 'nav-
|
|
141
|
+
Tabs: 'tabs-ui', Tab: 'tab-ui', Nav: 'nav-ui',
|
|
142
142
|
Breadcrumb: 'breadcrumb-ui', SegmentedControl: 'segmented-ui', Segment: 'segment-ui',
|
|
143
143
|
// Overlay
|
|
144
144
|
Modal: 'modal-ui', Drawer: 'drawer-ui', Popover: 'popover-ui',
|
|
@@ -170,7 +170,7 @@ function componentsToHTML(comps) {
|
|
|
170
170
|
return `${indent}<${tag}${attrStr} nomargin>${c.textContent || ''}</${tag}>`;
|
|
171
171
|
}
|
|
172
172
|
|
|
173
|
-
const tag = TAG_MAP[c.component] || c.component.toLowerCase() + '-
|
|
173
|
+
const tag = TAG_MAP[c.component] || c.component.toLowerCase() + '-ui';
|
|
174
174
|
const skip = new Set(['id', 'component', 'children', 'textContent']);
|
|
175
175
|
const attrs = Object.entries(c)
|
|
176
176
|
.filter(([k]) => !skip.has(k))
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import '../../../../scripts/load-env.mjs';
|
|
16
|
-
import { generateUI } from '../../compose/
|
|
16
|
+
import { generateUI } from '../../compose/core/generator.js';
|
|
17
17
|
import { validateSchema } from '../../validator/validator.js';
|
|
18
18
|
|
|
19
19
|
// ── Test scenarios ──────────────────────────────────────────────────
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* - shape invariants hold on both paths
|
|
9
9
|
*/
|
|
10
10
|
import '../../../../scripts/load-env.mjs';
|
|
11
|
-
import { pick, listEngines, ENGINES } from '../../compose/
|
|
12
|
-
import { generateUI } from '../../compose/
|
|
11
|
+
import { pick, listEngines, ENGINES } from '../../compose/strategies/registry.js';
|
|
12
|
+
import { generateUI } from '../../compose/core/generator.js';
|
|
13
13
|
|
|
14
14
|
console.log('[smoke] engines registered:', listEngines().join(', '));
|
|
15
15
|
|
package/scripts/smoke-issues.mjs
CHANGED
|
@@ -8,8 +8,8 @@ import {
|
|
|
8
8
|
attachTrace,
|
|
9
9
|
createIssueAccumulator,
|
|
10
10
|
AUTO_FIRE_POLICY,
|
|
11
|
-
} from '../../compose/
|
|
12
|
-
import { StateCache } from '../../compose/
|
|
11
|
+
} from '../../compose/strategies/zettel/issue-reporter.js';
|
|
12
|
+
import { StateCache } from '../../compose/strategies/zettel/state-cache.js';
|
|
13
13
|
import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
|
|
14
14
|
import { tmpdir } from 'node:os';
|
|
15
15
|
import { join } from 'node:path';
|
package/scripts/smoke-refine.mjs
CHANGED
|
@@ -7,13 +7,13 @@ import {
|
|
|
7
7
|
applyOps,
|
|
8
8
|
validateOps,
|
|
9
9
|
opsToA2UI,
|
|
10
|
-
} from '../../compose/
|
|
10
|
+
} from '../../compose/strategies/zettel/chunk-refiner.js';
|
|
11
11
|
import {
|
|
12
12
|
StateCache,
|
|
13
13
|
mintStateId,
|
|
14
14
|
mintNextStateId,
|
|
15
|
-
} from '../../compose/
|
|
16
|
-
import { createIssueAccumulator } from '../../compose/
|
|
15
|
+
} from '../../compose/strategies/zettel/state-cache.js';
|
|
16
|
+
import { createIssueAccumulator } from '../../compose/strategies/zettel/issue-reporter.js';
|
|
17
17
|
import {
|
|
18
18
|
getChunk,
|
|
19
19
|
listChunksByKind,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// Smoke test for OD-5 plugin engine registry.
|
|
2
|
-
import { registerEngine, unregisterEngine, pick, listEngines, ENGINES } from '../../compose/
|
|
2
|
+
import { registerEngine, unregisterEngine, pick, listEngines, ENGINES } from '../../compose/strategies/registry.js';
|
|
3
3
|
|
|
4
4
|
let pass = 0, fail = 0;
|
|
5
5
|
const t = (label, ok, detail = '') => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Direct test of zettel generator with LLM adapter — no HTTP server needed.
|
|
2
2
|
import '../../../../scripts/load-env.mjs';
|
|
3
|
-
import { generateZettel, clearSession, getTurns } from '../../compose/
|
|
3
|
+
import { generateZettel, clearSession, getTurns } from '../../compose/strategies/zettel/generator-adapter.js';
|
|
4
4
|
|
|
5
5
|
// Minimal Anthropic adapter inline (mirrors what server.js makes).
|
|
6
6
|
function makeAdapter() {
|
package/scripts/test-a2ui.mjs
CHANGED
|
@@ -76,7 +76,7 @@ try {
|
|
|
76
76
|
|
|
77
77
|
console.log('\n2. Pattern library');
|
|
78
78
|
|
|
79
|
-
const { searchBlocks, listPatterns, lookupDomain } = await import('../../compose/
|
|
79
|
+
const { searchBlocks, listPatterns, lookupDomain } = await import('../../compose/core/reference.js');
|
|
80
80
|
|
|
81
81
|
const allPatterns = listPatterns();
|
|
82
82
|
const withTemplates = allPatterns.filter(p => p.template && Array.isArray(p.template));
|
|
@@ -176,7 +176,7 @@ for (const intent of passTests) {
|
|
|
176
176
|
|
|
177
177
|
console.log('\n4. Instant mode generation');
|
|
178
178
|
|
|
179
|
-
const { generateUI } = await import('../../compose/
|
|
179
|
+
const { generateUI } = await import('../../compose/core/generator.js');
|
|
180
180
|
|
|
181
181
|
const instantTests = [
|
|
182
182
|
{ intent: 'login form', minComponents: 3 },
|
|
@@ -270,9 +270,9 @@ try {
|
|
|
270
270
|
console.log('\n7. Multi-turn refinement (Phase A)');
|
|
271
271
|
|
|
272
272
|
try {
|
|
273
|
-
const { StateCache, mintStateId, mintNextStateId } = await import('../../compose/
|
|
274
|
-
const { createIssueAccumulator } = await import('../../compose/
|
|
275
|
-
const { refineFromIntent, applyOps, opsToA2UI, validateOps } = await import('../../compose/
|
|
273
|
+
const { StateCache, mintStateId, mintNextStateId } = await import('../../compose/strategies/zettel/state-cache.js');
|
|
274
|
+
const { createIssueAccumulator } = await import('../../compose/strategies/zettel/issue-reporter.js');
|
|
275
|
+
const { refineFromIntent, applyOps, opsToA2UI, validateOps } = await import('../../compose/strategies/zettel/chunk-refiner.js');
|
|
276
276
|
const { listChunksByKind } = await import('../../corpus/scripts/chunk-library.js');
|
|
277
277
|
|
|
278
278
|
// 7a. State cache + state-id chain
|
package/scripts/test-chunks.mjs
CHANGED
|
@@ -94,8 +94,8 @@ assert('lookup_chunk(article) finds dashboard-admin-page',
|
|
|
94
94
|
articleChunks.some((c) => c.name === 'dashboard-admin-page'));
|
|
95
95
|
|
|
96
96
|
// ── chunk-composer + chunk-synthesizer ──
|
|
97
|
-
import { composeFromPlan, validatePlan } from '../../compose/
|
|
98
|
-
import { composeFromIntent } from '../../compose/
|
|
97
|
+
import { composeFromPlan, validatePlan } from '../../compose/strategies/zettel/chunk-composer.js';
|
|
98
|
+
import { composeFromIntent } from '../../compose/strategies/zettel/chunk-synthesizer.js';
|
|
99
99
|
|
|
100
100
|
console.log('\n── validatePlan ──');
|
|
101
101
|
|
package/scripts/test-evals.mjs
CHANGED
|
@@ -42,7 +42,7 @@ if (ONLY) evalCases = evalCases.filter(e => String(e.id) === ONLY);
|
|
|
42
42
|
|
|
43
43
|
// ── Load generator ──
|
|
44
44
|
|
|
45
|
-
const { generateUI } = await import('../../compose/
|
|
45
|
+
const { generateUI } = await import('../../compose/core/generator.js');
|
|
46
46
|
const { validateSchema } = await import('../../validator/validator.js');
|
|
47
47
|
|
|
48
48
|
// ── Scoring functions ──
|
|
@@ -22,7 +22,7 @@ const PRO = args.has('--pro');
|
|
|
22
22
|
const OPEN = args.has('--open');
|
|
23
23
|
const OUTPUT = join(__dirname, '..', 'visual-validation.html');
|
|
24
24
|
|
|
25
|
-
const { generateUI } = await import('../../compose/
|
|
25
|
+
const { generateUI } = await import('../../compose/core/generator.js');
|
|
26
26
|
const { validateSchema } = await import('../../validator/validator.js');
|
|
27
27
|
|
|
28
28
|
const TEST_INTENTS = [
|
package/server.js
CHANGED
|
@@ -26,7 +26,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
26
26
|
import { z } from 'zod';
|
|
27
27
|
|
|
28
28
|
// ── Import from a2ui ──
|
|
29
|
-
import { generateUI } from '../compose/
|
|
29
|
+
import { generateUI } from '../compose/core/generator.js';
|
|
30
30
|
import { validateSchema } from '../validator/validator.js';
|
|
31
31
|
import { validateMessages as validateCatalogMessages } from '../validator/catalog-validator.js';
|
|
32
32
|
import {
|
|
@@ -52,11 +52,11 @@ import {
|
|
|
52
52
|
getAllCompositions as getAllZettelCompositions,
|
|
53
53
|
searchAll as searchZettelAll,
|
|
54
54
|
getGraph as getZettelGraph,
|
|
55
|
-
} from '../compose/
|
|
55
|
+
} from '../compose/strategies/zettel/fragment-library.js';
|
|
56
56
|
import {
|
|
57
57
|
resolveComposition as resolveZettelComposition,
|
|
58
58
|
templateToMessages as zettelTemplateToMessages,
|
|
59
|
-
} from '../compose/
|
|
59
|
+
} from '../compose/strategies/zettel/composer.js';
|
|
60
60
|
// Zettel bootstrap is still needed for get_fragment/resolve_composition tools;
|
|
61
61
|
// the generate_ui tool now dispatches through the unified registry in gen-ui.
|
|
62
62
|
|
|
@@ -312,7 +312,7 @@ server.tool(
|
|
|
312
312
|
// ── Pattern & Feedback Tools ──
|
|
313
313
|
|
|
314
314
|
import { registerPattern } from '../retrieval/pattern-library.js';
|
|
315
|
-
import { FeedbackCollector } from '../retrieval/feedback.js';
|
|
315
|
+
import { FeedbackCollector } from '../retrieval/feedback/feedback.js';
|
|
316
316
|
|
|
317
317
|
const feedbackCollector = new FeedbackCollector();
|
|
318
318
|
|
|
@@ -378,7 +378,7 @@ server.tool(
|
|
|
378
378
|
|
|
379
379
|
// ── Quality metrics tool ──
|
|
380
380
|
|
|
381
|
-
import { feedbackStore } from '../retrieval/feedback-store.js';
|
|
381
|
+
import { feedbackStore } from '../retrieval/feedback/feedback-store.js';
|
|
382
382
|
|
|
383
383
|
server.tool(
|
|
384
384
|
'get_quality_metrics',
|
|
@@ -664,8 +664,8 @@ Pair with \`get_chunk\` to fetch full records for any of the returned names.`,
|
|
|
664
664
|
// Spec: docs/specs/genui-chunk-marker.md (§ "Harvester contract", future:
|
|
665
665
|
// composition reasoning). Plan: docs/plans/training-pipeline-chunk-harvest-2026-04-27.md.
|
|
666
666
|
|
|
667
|
-
import { composeFromIntent as composeFromChunksImpl } from '../compose/
|
|
668
|
-
import { composeFromPlan, validatePlan } from '../compose/
|
|
667
|
+
import { composeFromIntent as composeFromChunksImpl } from '../compose/strategies/zettel/chunk-synthesizer.js';
|
|
668
|
+
import { composeFromPlan, validatePlan } from '../compose/strategies/zettel/chunk-composer.js';
|
|
669
669
|
import { createAdapter as createLLMAdapter } from '../compose/llm/llm-bridge.js';
|
|
670
670
|
|
|
671
671
|
// ── Multi-turn architecture (Phase A) ────────────────────────────────
|
|
@@ -676,18 +676,18 @@ import {
|
|
|
676
676
|
getStateCache,
|
|
677
677
|
mintStateId,
|
|
678
678
|
mintNextStateId,
|
|
679
|
-
} from '../compose/
|
|
679
|
+
} from '../compose/strategies/zettel/state-cache.js';
|
|
680
680
|
import {
|
|
681
681
|
reportIssue as reportIssueImpl,
|
|
682
682
|
autoReport,
|
|
683
683
|
createIssueAccumulator,
|
|
684
|
-
} from '../compose/
|
|
684
|
+
} from '../compose/strategies/zettel/issue-reporter.js';
|
|
685
685
|
import {
|
|
686
686
|
refineFromIntent,
|
|
687
687
|
applyOps,
|
|
688
688
|
opsToA2UI,
|
|
689
689
|
validateOps,
|
|
690
|
-
} from '../compose/
|
|
690
|
+
} from '../compose/strategies/zettel/chunk-refiner.js';
|
|
691
691
|
|
|
692
692
|
const stateCache = getStateCache();
|
|
693
693
|
|