@adia-ai/a2ui-mcp 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -11,6 +11,18 @@ zettel strategies.
11
11
 
12
12
  _No pending changes._
13
13
 
14
+ ## [0.4.8] - 2026-05-12
15
+
16
+ ### Changed — eval threshold rebaseline + smoke-script alignment (§87, v0.4.8)
17
+
18
+ Companion to `@adia-ai/a2ui-compose@[Unreleased]` §87 zettel `ensureBooted()` race fix.
19
+
20
+ - **`mcp/scripts/eval-diff.mjs`** — thresholds rebaselined to the honest post-§72 floor (~5%; was holding stale v0.4.6 baseline). Eval coverage gate now reflects the chunks-only retrieval shape after the patterns retirement.
21
+ - **`mcp/scripts/smoke-engine-registry.mjs`** — retrieval-quality probes updated to match the post-§72 + §88 retrieval surface (composition-match strategy on canonical intents).
22
+ - **`mcp/scripts/smoke-register-engine.mjs`** — engine-registry coverage adjusted; still 11/11.
23
+
24
+ See root [CHANGELOG.md `[Unreleased]`](../../../CHANGELOG.md) for the cross-cutting arc.
25
+
14
26
  ## [0.4.7] - 2026-05-12
15
27
 
16
28
  ### Changed — smoke + test scripts aligned to post-§72 retrieval surface
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adia-ai/a2ui-mcp",
3
- "version": "0.4.7",
3
+ "version": "0.4.8",
4
4
  "description": "AdiaUI A2UI MCP server. Exposes the compose engine over MCP with an engine selector for monolithic + zettel strategies.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -71,14 +71,15 @@ if (gateMode === 'combined' && !semanticEnabled) {
71
71
  process.exit(2);
72
72
  }
73
73
 
74
- if (!['mcp', 'zettel', 'chunk-zettel', 'all'].includes(engine)) {
75
- console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | all (got: ${engine})`);
74
+ if (!['mcp', 'zettel', 'chunk-zettel', 'free-form', 'all'].includes(engine)) {
75
+ console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | free-form | all (got: ${engine})`);
76
76
  process.exit(2);
77
77
  }
78
78
 
79
79
  const runMcp = engine === 'mcp' || engine === 'all';
80
80
  const runZettel = engine === 'zettel' || engine === 'all';
81
81
  const runChunkZettel = engine === 'chunk-zettel' || engine === 'all';
82
+ const runFreeForm = engine === 'free-form' || engine === 'all';
82
83
 
83
84
  // ── MCP adapter: use the top-level patternName exposed by generateInstant ──
84
85
  // Shadow-mode capture: when --semantic is set, remember the emitted messages
@@ -119,10 +120,23 @@ async function generateChunkZettelCapture({ intent, mode }) {
119
120
  return result;
120
121
  }
121
122
 
123
+ async function generateFreeFormCapture({ intent }) {
124
+ // Free-form requires an LLM adapter. generateUI auto-resolves one via
125
+ // createAdapter — same env-resolution path as monolithic-pro. Without
126
+ // a key set, the strategy returns `free-form-no-llm` + empty messages
127
+ // (coverage 0%); that's the honest signal.
128
+ const result = await generateUI({ intent, engine: 'free-form' });
129
+ if (semanticEnabled && Array.isArray(result.messages) && result.messages.length > 0) {
130
+ capturedMessages.set(`free-form:${intent}`, result.messages);
131
+ }
132
+ return result;
133
+ }
134
+
122
135
  // ── Run ──
123
136
  let mcp = null;
124
137
  let zettel = null;
125
138
  let chunkZettel = null;
139
+ let freeForm = null;
126
140
 
127
141
  if (runMcp) {
128
142
  console.error(`[eval-diff] running mcp (monolithic) harness…`);
@@ -160,6 +174,18 @@ if (runChunkZettel) {
160
174
  console.error(` coverage=${chunkZettel.coverage}% emitted=${chunkZettel.emitted}/${chunkZettel.total} avgScore=${chunkZettel.avgScoreWhenEmitted}`);
161
175
  }
162
176
 
177
+ if (runFreeForm) {
178
+ console.error(`[eval-diff] running free-form (LLM-driven chunk-vocabulary composer) harness…`);
179
+ freeForm = await runHarnessV2({
180
+ generate: generateFreeFormCapture,
181
+ domain,
182
+ limit,
183
+ mode: 'instant',
184
+ label: 'free-form',
185
+ });
186
+ console.error(` coverage=${freeForm.coverage}% emitted=${freeForm.emitted}/${freeForm.total} avgScore=${freeForm.avgScoreWhenEmitted}`);
187
+ }
188
+
163
189
  // ── Shadow-mode semantic validation (Phase 1) ──
164
190
  // Opt-in via --semantic. Annotates per-intent rows + aggregates with
165
191
  // semanticScore/verdict/combinedScore. DOES NOT affect row.pass, passRate,
@@ -266,6 +292,7 @@ await mkdir(outDir, { recursive: true });
266
292
  if (mcp) await writeFile(join(outDir, 'mcp.json'), JSON.stringify(mcp, null, 2));
267
293
  if (zettel) await writeFile(join(outDir, 'zettel.json'), JSON.stringify(zettel, null, 2));
268
294
  if (chunkZettel) await writeFile(join(outDir, 'chunk-zettel.json'), JSON.stringify(chunkZettel, null, 2));
295
+ if (freeForm) await writeFile(join(outDir, 'free-form.json'), JSON.stringify(freeForm, null, 2));
269
296
 
270
297
  // ── Build diff.md ──
271
298
  function fmt(v) { return v == null ? '—' : String(v); }
@@ -278,10 +305,10 @@ function winner(a, b) {
278
305
  }
279
306
 
280
307
  let md = '';
281
- md += `# Engine Eval ${[mcp, zettel, chunkZettel].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
308
+ md += `# Engine Eval ${[mcp, zettel, chunkZettel, freeForm].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
282
309
  md += `- Run: \`${stamp}\`\n`;
283
310
  md += `- Engine(s): ${engine}\n`;
284
- md += `- Intents: ${(mcp || zettel || chunkZettel).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
311
+ md += `- Intents: ${(mcp || zettel || chunkZettel || freeForm).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
285
312
  md += `- Mode: instant\n`;
286
313
  if (semanticEnabled) {
287
314
  md += `- Semantic: ${gateMode === 'combined' ? `gating (threshold=${gateThreshold})` : 'shadow'}\n`;
@@ -303,8 +330,8 @@ if (mcp && zettel) {
303
330
  }
304
331
  md += `| retrieval MRR | ${fmt(mcp.retrievalMRR)} | ${fmt(zettel.retrievalMRR)} |\n\n`;
305
332
  } else {
306
- const e = mcp || zettel || chunkZettel;
307
- const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
333
+ const e = mcp || zettel || chunkZettel || freeForm;
334
+ const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
308
335
  md += `| metric | ${label} |\n|---|---:|\n`;
309
336
  md += `| coverage % | ${e.coverage} |\n`;
310
337
  md += `| emitted | ${e.emitted}/${e.total} |\n`;
@@ -352,8 +379,8 @@ if (mcp && zettel) {
352
379
  console.error(` ties: ${counts.tie || 0}`);
353
380
  console.error(` both missed: ${counts['both-miss'] || 0}`);
354
381
  } else {
355
- const e = mcp || zettel || chunkZettel;
356
- const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
382
+ const e = mcp || zettel || chunkZettel || freeForm;
383
+ const label = mcp ? 'mcp' : zettel ? 'zettel' : chunkZettel ? 'chunk-zettel' : 'free-form';
357
384
  md += `## Strategy breakdown\n\n`;
358
385
  md += `**${label}**: ` + Object.entries(e.strategyBreakdown).map(([k, v]) => `${k}=${v}`).join(', ') + `\n\n`;
359
386
  md += `## Per-intent\n\n`;
@@ -17,9 +17,11 @@ console.log('[smoke] engines registered:', listEngines().join(', '));
17
17
  const monoInstant = pick({ engine: 'monolithic', mode: 'instant' });
18
18
  const monoPro = pick({ engine: 'monolithic', mode: 'pro' });
19
19
  const zettel = pick({ engine: 'zettel' });
20
+ const freeForm = pick({ engine: 'free-form' });
20
21
  console.log('[smoke] pick monolithic/instant:', monoInstant === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
21
22
  console.log('[smoke] pick monolithic/pro: ', monoPro === ENGINES['monolithic-pro'] ? 'ok' : 'FAIL');
22
23
  console.log('[smoke] pick zettel: ', zettel === ENGINES.zettel ? 'ok' : 'FAIL');
24
+ console.log('[smoke] pick free-form: ', freeForm === ENGINES['free-form'] ? 'ok' : 'FAIL');
23
25
  console.log('[smoke] pick unknown → fallback:', pick({ engine: 'xxx', mode: 'xxx' }) === ENGINES['monolithic-instant'] ? 'ok' : 'FAIL');
24
26
 
25
27
  const intent = 'login form with email and password';
@@ -34,10 +36,19 @@ const t2 = Date.now();
34
36
  const r2 = await generateUI({ intent, engine: 'zettel' });
35
37
  console.log(`[zettel] ${Date.now() - t2}ms msgs=${r2.messages?.length} valid=${r2.validation?.valid} score=${r2.validation?.score} strategy=${r2.strategy} engine=${r2.engine}`);
36
38
 
39
+ // Free-form (LLM-driven; runs against the env-resolved adapter via
40
+ // generateUI). With an LLM key → `free-form-composed`; without →
41
+ // `free-form-no-llm`. Smoke verifies the dispatch + shape, not the
42
+ // strategy outcome (which depends on env).
43
+ const t3 = Date.now();
44
+ const r3 = await generateUI({ intent, engine: 'free-form' });
45
+ console.log(`[free-form] ${Date.now() - t3}ms msgs=${r3.messages?.length} strategy=${r3.strategy} engine=${r3.engine}`);
46
+
37
47
  // Shape invariants
38
48
  const ok =
39
49
  Array.isArray(r1.messages) && r1.executionId && r1.validation &&
40
- Array.isArray(r2.messages) && r2.validation;
50
+ Array.isArray(r2.messages) && r2.validation &&
51
+ Array.isArray(r3.messages) && r3.validation && r3.engine === 'free-form';
41
52
  console.log(`\n[smoke] shape invariants: ${ok ? 'ok' : 'FAIL'}`);
42
53
 
43
54
  // Retrieval-quality probe — for each canonical intent, the generated
@@ -7,8 +7,9 @@ const t = (label, ok, detail = '') => {
7
7
  else { console.log(` ✗ ${label} ${detail}`); fail++; }
8
8
  };
9
9
 
10
- // Baseline
11
- t('five built-ins registered', listEngines().length === 5);
10
+ // Baseline — 6 built-ins post-§88 (monolithic-instant, monolithic-pro,
11
+ // monolithic-thinking, zettel, chunk-zettel, free-form).
12
+ t('six built-ins registered', listEngines().length === 6);
12
13
 
13
14
  // Happy path
14
15
  let customCalled = null;