@adia-ai/a2ui-mcp 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -11,6 +11,18 @@ zettel strategies.
|
|
|
11
11
|
|
|
12
12
|
_No pending changes._
|
|
13
13
|
|
|
14
|
+
## [0.4.8] - 2026-05-12
|
|
15
|
+
|
|
16
|
+
### Changed — eval threshold rebaseline + smoke-script alignment (§87, v0.4.8)
|
|
17
|
+
|
|
18
|
+
Companion to `@adia-ai/a2ui-compose@[Unreleased]` §87 zettel `ensureBooted()` race fix.
|
|
19
|
+
|
|
20
|
+
- **`mcp/scripts/eval-diff.mjs`** — thresholds rebaselined to the honest post-§72 floor (~5%; was holding stale v0.4.6 baseline). Eval coverage gate now reflects the chunks-only retrieval shape after the patterns retirement.
|
|
21
|
+
- **`mcp/scripts/smoke-engine-registry.mjs`** — retrieval-quality probes updated to match the post-§72 + §88 retrieval surface (composition-match strategy on canonical intents).
|
|
22
|
+
- **`mcp/scripts/smoke-register-engine.mjs`** — engine-registry coverage adjusted; still 11/11.
|
|
23
|
+
|
|
24
|
+
See root [CHANGELOG.md `[Unreleased]`](../../../CHANGELOG.md) for the cross-cutting arc.
|
|
25
|
+
|
|
14
26
|
## [0.4.7] - 2026-05-12
|
|
15
27
|
|
|
16
28
|
### Changed — smoke + test scripts aligned to post-§72 retrieval surface
|
package/package.json
CHANGED
package/scripts/eval-diff.mjs
CHANGED
|
@@ -71,14 +71,15 @@ if (gateMode === 'combined' && !semanticEnabled) {
|
|
|
71
71
|
process.exit(2);
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
if (!['mcp', 'zettel', 'chunk-zettel', 'all'].includes(engine)) {
|
|
75
|
-
console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | all (got: ${engine})`);
|
|
74
|
+
if (!['mcp', 'zettel', 'chunk-zettel', 'free-form', 'all'].includes(engine)) {
|
|
75
|
+
console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | free-form | all (got: ${engine})`);
|
|
76
76
|
process.exit(2);
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
const runMcp = engine === 'mcp' || engine === 'all';
|
|
80
80
|
const runZettel = engine === 'zettel' || engine === 'all';
|
|
81
81
|
const runChunkZettel = engine === 'chunk-zettel' || engine === 'all';
|
|
82
|
+
const runFreeForm = engine === 'free-form' || engine === 'all';
|
|
82
83
|
|
|
83
84
|
// ── MCP adapter: use the top-level patternName exposed by generateInstant ──
|
|
84
85
|
// Shadow-mode capture: when --semantic is set, remember the emitted messages
|
|
@@ -119,10 +120,23 @@ async function generateChunkZettelCapture({ intent, mode }) {
|
|
|
119
120
|
return result;
|
|
120
121
|
}
|
|
121
122
|
|
|
123
|
+
async function generateFreeFormCapture({ intent }) {
|
|
124
|
+
// Free-form requires an LLM adapter. generateUI auto-resolves one via
|
|
125
|
+
// createAdapter — same env-resolution path as monolithic-pro. Without
|
|
126
|
+
// a key set, the strategy returns `free-form-no-llm` + empty messages
|
|
127
|
+
// (coverage 0%); that's the honest signal.
|
|
128
|
+
const result = await generateUI({ intent, engine: 'free-form' });
|
|
129
|
+
if (semanticEnabled && Array.isArray(result.messages) && result.messages.length > 0) {
|
|
130
|
+
capturedMessages.set(`free-form:${intent}`, result.messages);
|
|
131
|
+
}
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
|
|
122
135
|
// ── Run ──
|
|
123
136
|
let mcp = null;
|
|
124
137
|
let zettel = null;
|
|
125
138
|
let chunkZettel = null;
|
|
139
|
+
let freeForm = null;
|
|
126
140
|
|
|
127
141
|
if (runMcp) {
|
|
128
142
|
console.error(`[eval-diff] running mcp (monolithic) harness…`);
|
|
@@ -160,6 +174,18 @@ if (runChunkZettel) {
|
|
|
160
174
|
console.error(` coverage=${chunkZettel.coverage}% emitted=${chunkZettel.emitted}/${chunkZettel.total} avgScore=${chunkZettel.avgScoreWhenEmitted}`);
|
|
161
175
|
}
|
|
162
176
|
|
|
177
|
+
if (runFreeForm) {
|
|
178
|
+
console.error(`[eval-diff] running free-form (LLM-driven chunk-vocabulary composer) harness…`);
|
|
179
|
+
freeForm = await runHarnessV2({
|
|
180
|
+
generate: generateFreeFormCapture,
|
|
181
|
+
domain,
|
|
182
|
+
limit,
|
|
183
|
+
mode: 'instant',
|
|
184
|
+
label: 'free-form',
|
|
185
|
+
});
|
|
186
|
+
console.error(` coverage=${freeForm.coverage}% emitted=${freeForm.emitted}/${freeForm.total} avgScore=${freeForm.avgScoreWhenEmitted}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
163
189
|
// ── Shadow-mode semantic validation (Phase 1) ──
|
|
164
190
|
// Opt-in via --semantic. Annotates per-intent rows + aggregates with
|
|
165
191
|
// semanticScore/verdict/combinedScore. DOES NOT affect row.pass, passRate,
|
|
@@ -266,6 +292,7 @@ await mkdir(outDir, { recursive: true });
|
|
|
266
292
|
if (mcp) await writeFile(join(outDir, 'mcp.json'), JSON.stringify(mcp, null, 2));
|
|
267
293
|
if (zettel) await writeFile(join(outDir, 'zettel.json'), JSON.stringify(zettel, null, 2));
|
|
268
294
|
if (chunkZettel) await writeFile(join(outDir, 'chunk-zettel.json'), JSON.stringify(chunkZettel, null, 2));
|
|
295
|
+
if (freeForm) await writeFile(join(outDir, 'free-form.json'), JSON.stringify(freeForm, null, 2));
|
|
269
296
|
|
|
270
297
|
// ── Build diff.md ──
|
|
271
298
|
function fmt(v) { return v == null ? '—' : String(v); }
|
|
@@ -278,10 +305,10 @@ function winner(a, b) {
|
|
|
278
305
|
}
|
|
279
306
|
|
|
280
307
|
let md = '';
|
|
281
|
-
md += `# Engine Eval ${[mcp, zettel, chunkZettel].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
|
|
308
|
+
md += `# Engine Eval ${[mcp, zettel, chunkZettel, freeForm].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
|
|
282
309
|
md += `- Run: \`${stamp}\`\n`;
|
|
283
310
|
md += `- Engine(s): ${engine}\n`;
|
|
284
|
-
md += `- Intents: ${(mcp || zettel || chunkZettel).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
311
|
+
md += `- Intents: ${(mcp || zettel || chunkZettel || freeForm).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
285
312
|
md += `- Mode: instant\n`;
|
|
286
313
|
if (semanticEnabled) {
|
|
287
314
|
md += `- Semantic: ${gateMode === 'combined' ? `gating (threshold=${gateThreshold})` : 'shadow'}\n`;
|
|
@@ -303,8 +330,8 @@ if (mcp && zettel) {
|
|
|
303
330
|
}
|
|
304
331
|
md += `| retrieval MRR | ${fmt(mcp.retrievalMRR)} | ${fmt(zettel.retrievalMRR)} |\n\n`;
|
|
305
332
|
} else {
|
|
306
|
-
const e = mcp || zettel || chunkZettel;
|
|
307
|
-
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
333
|
+
const e = mcp || zettel || chunkZettel || freeForm;
|
|
334
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
|
|
308
335
|
md += `| metric | ${label} |\n|---|---:|\n`;
|
|
309
336
|
md += `| coverage % | ${e.coverage} |\n`;
|
|
310
337
|
md += `| emitted | ${e.emitted}/${e.total} |\n`;
|
|
@@ -352,8 +379,8 @@ if (mcp && zettel) {
|
|
|
352
379
|
console.error(` ties: ${counts.tie || 0}`);
|
|
353
380
|
console.error(` both missed: ${counts['both-miss'] || 0}`);
|
|
354
381
|
} else {
|
|
355
|
-
const e = mcp || zettel || chunkZettel;
|
|
356
|
-
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
382
|
+
const e = mcp || zettel || chunkZettel || freeForm;
|
|
383
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
|
|
357
384
|
md += `## Strategy breakdown\n\n`;
|
|
358
385
|
md += `**${label}**: ` + Object.entries(e.strategyBreakdown).map(([k, v]) => `${k}=${v}`).join(', ') + `\n\n`;
|
|
359
386
|
md += `## Per-intent\n\n`;
|
|
@@ -17,9 +17,11 @@ console.log('[smoke] engines registered:', listEngines().join(', '));
|
|
|
17
17
|
const monoInstant = pick({ engine: 'monolithic', mode: 'instant' });
|
|
18
18
|
const monoPro = pick({ engine: 'monolithic', mode: 'pro' });
|
|
19
19
|
const zettel = pick({ engine: 'zettel' });
|
|
20
|
+
const freeForm = pick({ engine: 'free-form' });
|
|
20
21
|
console.log('[smoke] pick monolithic/instant:', monoInstant === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
|
|
21
22
|
console.log('[smoke] pick monolithic/pro: ', monoPro === ENGINES['monolithic-pro'] ? 'ok' : 'FAIL');
|
|
22
23
|
console.log('[smoke] pick zettel: ', zettel === ENGINES.zettel ? 'ok' : 'FAIL');
|
|
24
|
+
console.log('[smoke] pick free-form: ', freeForm === ENGINES['free-form'] ? 'ok' : 'FAIL');
|
|
23
25
|
console.log('[smoke] pick unknown → fallback:', pick({ engine: 'xxx', mode: 'xxx' }) === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
|
|
24
26
|
|
|
25
27
|
const intent = 'login form with email and password';
|
|
@@ -34,10 +36,19 @@ const t2 = Date.now();
|
|
|
34
36
|
const r2 = await generateUI({ intent, engine: 'zettel' });
|
|
35
37
|
console.log(`[zettel] ${Date.now() - t2}ms msgs=${r2.messages?.length} valid=${r2.validation?.valid} score=${r2.validation?.score} strategy=${r2.strategy} engine=${r2.engine}`);
|
|
36
38
|
|
|
39
|
+
// Free-form (LLM-driven; runs against the env-resolved adapter via
|
|
40
|
+
// generateUI). With an LLM key → `free-form-composed`; without →
|
|
41
|
+
// `free-form-no-llm`. Smoke verifies the dispatch + shape, not the
|
|
42
|
+
// strategy outcome (which depends on env).
|
|
43
|
+
const t3 = Date.now();
|
|
44
|
+
const r3 = await generateUI({ intent, engine: 'free-form' });
|
|
45
|
+
console.log(`[free-form] ${Date.now() - t3}ms msgs=${r3.messages?.length} strategy=${r3.strategy} engine=${r3.engine}`);
|
|
46
|
+
|
|
37
47
|
// Shape invariants
|
|
38
48
|
const ok =
|
|
39
49
|
Array.isArray(r1.messages) && r1.executionId && r1.validation &&
|
|
40
|
-
Array.isArray(r2.messages) && r2.validation
|
|
50
|
+
Array.isArray(r2.messages) && r2.validation &&
|
|
51
|
+
Array.isArray(r3.messages) && r3.validation && r3.engine === 'free-form';
|
|
41
52
|
console.log(`\n[smoke] shape invariants: ${ok ? 'ok' : 'FAIL'}`);
|
|
42
53
|
|
|
43
54
|
// Retrieval-quality probe — for each canonical intent, the generated
|
|
@@ -7,8 +7,9 @@ const t = (label, ok, detail = '') => {
|
|
|
7
7
|
else { console.log(` ✗ ${label} ${detail}`); fail++; }
|
|
8
8
|
};
|
|
9
9
|
|
|
10
|
-
// Baseline
|
|
11
|
-
|
|
10
|
+
// Baseline — 6 built-ins post-§88 (monolithic-instant, monolithic-pro,
|
|
11
|
+
// monolithic-thinking, zettel, chunk-zettel, free-form).
|
|
12
|
+
t('six built-ins registered', listEngines().length === 6);
|
|
12
13
|
|
|
13
14
|
// Happy path
|
|
14
15
|
let customCalled = null;
|