@adia-ai/a2ui-mcp 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +6 -6
- package/scripts/eval-diff.mjs +33 -8
package/CHANGELOG.md
CHANGED
|
@@ -11,6 +11,20 @@ zettel strategies.
|
|
|
11
11
|
|
|
12
12
|
_No pending changes._
|
|
13
13
|
|
|
14
|
+
## [0.4.0] - 2026-05-10
|
|
15
|
+
|
|
16
|
+
### Ride-along (no source changes)
|
|
17
|
+
|
|
18
|
+
Lockstep MINOR cut alongside `@adia-ai/web-modules@0.4.0` (ADR-0024 legacy shell shapes retired). Source byte-identical to v0.3.6.
|
|
19
|
+
|
|
20
|
+
Internal `@adia-ai/*` dep ranges bumped from `^0.3.0` to `^0.4.0`. See root [CHANGELOG.md `## [0.4.0]`](../../../CHANGELOG.md) for the cut narrative.
|
|
21
|
+
|
|
22
|
+
## [0.3.6] - 2026-05-10
|
|
23
|
+
|
|
24
|
+
### Ride-along (no source changes)
|
|
25
|
+
|
|
26
|
+
Lockstep version bump only — source byte-identical to v0.3.5. Internal `@adia-ai/*` dep ranges remain at `^0.3.0`. See root [CHANGELOG.md `## [0.3.6]`](../../../CHANGELOG.md) for the cut narrative.
|
|
27
|
+
|
|
14
28
|
## [0.3.5] - 2026-05-07
|
|
15
29
|
|
|
16
30
|
### Ride-along (no source changes)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adia-ai/a2ui-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "AdiaUI A2UI MCP server. Exposes the compose engine over MCP with an engine selector for monolithic + zettel strategies.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -26,11 +26,11 @@
|
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
28
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
29
|
-
"@adia-ai/a2ui-compose": "^0.
|
|
30
|
-
"@adia-ai/a2ui-retrieval": "^0.
|
|
31
|
-
"@adia-ai/a2ui-validator": "^0.
|
|
32
|
-
"@adia-ai/a2ui-corpus": "^0.
|
|
33
|
-
"@adia-ai/llm": "^0.
|
|
29
|
+
"@adia-ai/a2ui-compose": "^0.4.0",
|
|
30
|
+
"@adia-ai/a2ui-retrieval": "^0.4.0",
|
|
31
|
+
"@adia-ai/a2ui-validator": "^0.4.0",
|
|
32
|
+
"@adia-ai/a2ui-corpus": "^0.4.0",
|
|
33
|
+
"@adia-ai/llm": "^0.4.0",
|
|
34
34
|
"zod": "^3.24.0"
|
|
35
35
|
}
|
|
36
36
|
}
|
package/scripts/eval-diff.mjs
CHANGED
|
@@ -30,6 +30,7 @@ import { fileURLToPath } from 'node:url';
|
|
|
30
30
|
|
|
31
31
|
import { generateUI } from '../../compose/core/generator.js';
|
|
32
32
|
import { generateZettel } from '../../compose/strategies/zettel/generator-adapter.js';
|
|
33
|
+
import { ENGINES } from '../../compose/strategies/registry.js';
|
|
33
34
|
import { runHarnessV2 } from '../../compose/evals/harness.mjs';
|
|
34
35
|
import { validateSemantics } from '../../validator/semantic/index.js';
|
|
35
36
|
|
|
@@ -70,13 +71,14 @@ if (gateMode === 'combined' && !semanticEnabled) {
|
|
|
70
71
|
process.exit(2);
|
|
71
72
|
}
|
|
72
73
|
|
|
73
|
-
if (!['mcp', 'zettel', 'all'].includes(engine)) {
|
|
74
|
-
console.error(`[eval-diff] --engine must be one of: mcp | zettel | all (got: ${engine})`);
|
|
74
|
+
if (!['mcp', 'zettel', 'chunk-zettel', 'all'].includes(engine)) {
|
|
75
|
+
console.error(`[eval-diff] --engine must be one of: mcp | zettel | chunk-zettel | all (got: ${engine})`);
|
|
75
76
|
process.exit(2);
|
|
76
77
|
}
|
|
77
78
|
|
|
78
79
|
const runMcp = engine === 'mcp' || engine === 'all';
|
|
79
80
|
const runZettel = engine === 'zettel' || engine === 'all';
|
|
81
|
+
const runChunkZettel = engine === 'chunk-zettel' || engine === 'all';
|
|
80
82
|
|
|
81
83
|
// ── MCP adapter: use the top-level patternName exposed by generateInstant ──
|
|
82
84
|
// Shadow-mode capture: when --semantic is set, remember the emitted messages
|
|
@@ -109,9 +111,18 @@ async function generateZettelCapture({ intent, mode }) {
|
|
|
109
111
|
return result;
|
|
110
112
|
}
|
|
111
113
|
|
|
114
|
+
async function generateChunkZettelCapture({ intent, mode }) {
|
|
115
|
+
const result = await ENGINES['chunk-zettel']({ intent, mode: mode || 'instant' });
|
|
116
|
+
if (semanticEnabled && Array.isArray(result.messages) && result.messages.length > 0) {
|
|
117
|
+
capturedMessages.set(`chunk-zettel:${intent}`, result.messages);
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
|
|
112
122
|
// ── Run ──
|
|
113
123
|
let mcp = null;
|
|
114
124
|
let zettel = null;
|
|
125
|
+
let chunkZettel = null;
|
|
115
126
|
|
|
116
127
|
if (runMcp) {
|
|
117
128
|
console.error(`[eval-diff] running mcp (monolithic) harness…`);
|
|
@@ -137,6 +148,18 @@ if (runZettel) {
|
|
|
137
148
|
console.error(` coverage=${zettel.coverage}% emitted=${zettel.emitted}/${zettel.total} avgScore=${zettel.avgScoreWhenEmitted}`);
|
|
138
149
|
}
|
|
139
150
|
|
|
151
|
+
if (runChunkZettel) {
|
|
152
|
+
console.error(`[eval-diff] running chunk-zettel (chunk-corpus) harness…`);
|
|
153
|
+
chunkZettel = await runHarnessV2({
|
|
154
|
+
generate: generateChunkZettelCapture,
|
|
155
|
+
domain,
|
|
156
|
+
limit,
|
|
157
|
+
mode: 'instant',
|
|
158
|
+
label: 'chunk-zettel',
|
|
159
|
+
});
|
|
160
|
+
console.error(` coverage=${chunkZettel.coverage}% emitted=${chunkZettel.emitted}/${chunkZettel.total} avgScore=${chunkZettel.avgScoreWhenEmitted}`);
|
|
161
|
+
}
|
|
162
|
+
|
|
140
163
|
// ── Shadow-mode semantic validation (Phase 1) ──
|
|
141
164
|
// Opt-in via --semantic. Annotates per-intent rows + aggregates with
|
|
142
165
|
// semanticScore/verdict/combinedScore. DOES NOT affect row.pass, passRate,
|
|
@@ -232,6 +255,7 @@ if (semanticEnabled) {
|
|
|
232
255
|
console.error(`[eval-diff] running semantic validator (${modeNote})…`);
|
|
233
256
|
if (mcp) await annotateSemantic(mcp, 'mcp');
|
|
234
257
|
if (zettel) await annotateSemantic(zettel, 'zettel');
|
|
258
|
+
if (chunkZettel) await annotateSemantic(chunkZettel, 'chunk-zettel');
|
|
235
259
|
}
|
|
236
260
|
}
|
|
237
261
|
|
|
@@ -241,6 +265,7 @@ const outDir = join(repoRoot, 'evals', 'mcp', 'runs', stamp);
|
|
|
241
265
|
await mkdir(outDir, { recursive: true });
|
|
242
266
|
if (mcp) await writeFile(join(outDir, 'mcp.json'), JSON.stringify(mcp, null, 2));
|
|
243
267
|
if (zettel) await writeFile(join(outDir, 'zettel.json'), JSON.stringify(zettel, null, 2));
|
|
268
|
+
if (chunkZettel) await writeFile(join(outDir, 'chunk-zettel.json'), JSON.stringify(chunkZettel, null, 2));
|
|
244
269
|
|
|
245
270
|
// ── Build diff.md ──
|
|
246
271
|
function fmt(v) { return v == null ? '—' : String(v); }
|
|
@@ -253,10 +278,10 @@ function winner(a, b) {
|
|
|
253
278
|
}
|
|
254
279
|
|
|
255
280
|
let md = '';
|
|
256
|
-
md += `# Engine Eval ${mcp
|
|
281
|
+
md += `# Engine Eval ${[mcp, zettel, chunkZettel].filter(Boolean).length > 1 ? 'Diff' : 'Report'}\n\n`;
|
|
257
282
|
md += `- Run: \`${stamp}\`\n`;
|
|
258
283
|
md += `- Engine(s): ${engine}\n`;
|
|
259
|
-
md += `- Intents: ${(mcp || zettel).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
284
|
+
md += `- Intents: ${(mcp || zettel || chunkZettel).total}${domain ? ` (domain: ${domain})` : ''}${limit ? ` (limit: ${limit})` : ''}\n`;
|
|
260
285
|
md += `- Mode: instant\n`;
|
|
261
286
|
if (semanticEnabled) {
|
|
262
287
|
md += `- Semantic: ${gateMode === 'combined' ? `gating (threshold=${gateThreshold})` : 'shadow'}\n`;
|
|
@@ -278,8 +303,8 @@ if (mcp && zettel) {
|
|
|
278
303
|
}
|
|
279
304
|
md += `| retrieval MRR | ${fmt(mcp.retrievalMRR)} | ${fmt(zettel.retrievalMRR)} |\n\n`;
|
|
280
305
|
} else {
|
|
281
|
-
const e = mcp || zettel;
|
|
282
|
-
const label = mcp ? 'mcp' : 'zettel';
|
|
306
|
+
const e = mcp || zettel || chunkZettel;
|
|
307
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
283
308
|
md += `| metric | ${label} |\n|---|---:|\n`;
|
|
284
309
|
md += `| coverage % | ${e.coverage} |\n`;
|
|
285
310
|
md += `| emitted | ${e.emitted}/${e.total} |\n`;
|
|
@@ -327,8 +352,8 @@ if (mcp && zettel) {
|
|
|
327
352
|
console.error(` ties: ${counts.tie || 0}`);
|
|
328
353
|
console.error(` both missed: ${counts['both-miss'] || 0}`);
|
|
329
354
|
} else {
|
|
330
|
-
const e = mcp || zettel;
|
|
331
|
-
const label = mcp ? 'mcp' : 'zettel';
|
|
355
|
+
const e = mcp || zettel || chunkZettel;
|
|
356
|
+
const label = mcp ? 'mcp' : zettel ? 'zettel' : 'chunk-zettel';
|
|
332
357
|
md += `## Strategy breakdown\n\n`;
|
|
333
358
|
md += `**${label}**: ` + Object.entries(e.strategyBreakdown).map(([k, v]) => `${k}=${v}`).join(', ') + `\n\n`;
|
|
334
359
|
md += `## Per-intent\n\n`;
|