@adia-ai/a2ui-mcp 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +131 -0
- package/package.json +3 -3
- package/scripts/eval-refine-synthesis.mjs +270 -0
- package/scripts/smoke-issues.mjs +266 -0
- package/scripts/smoke-refine.mjs +374 -0
- package/scripts/smoke-state-cache.mjs +130 -0
- package/scripts/test-a2ui.mjs +121 -14
- package/server.js +313 -10
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test: state-cache (LRU + state-id minting).
|
|
3
|
+
// Spec: docs/specs/genui-multiturn-architecture.md §2.2 + §4.4.
|
|
4
|
+
import {
|
|
5
|
+
StateCache,
|
|
6
|
+
mintStateId,
|
|
7
|
+
mintNextStateId,
|
|
8
|
+
getStateCache,
|
|
9
|
+
resetStateCache,
|
|
10
|
+
} from '../../compose/engines/zettel/state-cache.js';
|
|
11
|
+
|
|
12
|
+
let pass = 0, fail = 0;
|
|
13
|
+
const t = (label, ok, detail = '') => {
|
|
14
|
+
if (ok) { console.log(` ✓ ${label}`); pass++; }
|
|
15
|
+
else { console.log(` ✗ ${label} ${detail}`); fail++; }
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
console.log('=== State-cache LRU ===');
|
|
19
|
+
|
|
20
|
+
const cache = new StateCache({ maxSize: 3 });
|
|
21
|
+
t('empty cache size 0', cache.size() === 0);
|
|
22
|
+
t('miss returns null', cache.get('nope') === null);
|
|
23
|
+
t('has returns false on miss', cache.has('nope') === false);
|
|
24
|
+
|
|
25
|
+
cache.set('a', { intent: 'a', html: '<a/>' });
|
|
26
|
+
cache.set('b', { intent: 'b', html: '<b/>' });
|
|
27
|
+
cache.set('c', { intent: 'c', html: '<c/>' });
|
|
28
|
+
t('size at capacity', cache.size() === 3);
|
|
29
|
+
t('list returns insertion order', JSON.stringify(cache.list()) === JSON.stringify(['a', 'b', 'c']));
|
|
30
|
+
t('has returns true on hit', cache.has('a') && cache.has('b') && cache.has('c'));
|
|
31
|
+
|
|
32
|
+
// Eviction: 4th set drops oldest
|
|
33
|
+
cache.set('d', { intent: 'd', html: '<d/>' });
|
|
34
|
+
t('LRU evicts oldest on overflow', cache.size() === 3 && !cache.has('a') && cache.has('d'));
|
|
35
|
+
|
|
36
|
+
// Recency touch via get: bump 'b' to most-recent, then set 'e'.
|
|
37
|
+
// Order before: [b, c, d]. After get('b'): [c, d, b]. After set('e'): evict c → [d, b, e].
|
|
38
|
+
cache.get('b');
|
|
39
|
+
cache.set('e', { intent: 'e', html: '<e/>' });
|
|
40
|
+
t('get touches recency', cache.has('b') && cache.has('e') && !cache.has('c'));
|
|
41
|
+
|
|
42
|
+
// peek does NOT touch recency.
|
|
43
|
+
// Order: [d, b, e]. peek('b') → unchanged [d, b, e]. set('f') → evict d → [b, e, f].
|
|
44
|
+
const peeked = cache.peek('b');
|
|
45
|
+
t('peek returns entry', peeked?.intent === 'b');
|
|
46
|
+
cache.set('f', { intent: 'f', html: '<f/>' });
|
|
47
|
+
t('peek does not touch recency', cache.has('b') && cache.has('e') && cache.has('f') && !cache.has('d'));
|
|
48
|
+
|
|
49
|
+
// Update existing key bumps recency.
|
|
50
|
+
// Order before: [b, e, f]. set('b', ...) → delete-then-insert → [e, f, b].
|
|
51
|
+
cache.set('b', { intent: 'b-updated', html: '<b2/>' });
|
|
52
|
+
t('update existing key bumps recency', cache.get('b').intent === 'b-updated');
|
|
53
|
+
t('update preserves cache size', cache.size() === 3);
|
|
54
|
+
|
|
55
|
+
// Evict
|
|
56
|
+
t('evict returns true on hit', cache.evict('e') === true);
|
|
57
|
+
t('evict returns false on miss', cache.evict('e') === false);
|
|
58
|
+
t('evict drops entry', !cache.has('e') && cache.size() === 2);
|
|
59
|
+
|
|
60
|
+
// Clear
|
|
61
|
+
cache.clear();
|
|
62
|
+
t('clear empties cache', cache.size() === 0 && cache.list().length === 0);
|
|
63
|
+
|
|
64
|
+
console.log('\n=== State-id minting ===');
|
|
65
|
+
|
|
66
|
+
const id1 = mintStateId('admin dashboard with KPIs', 1);
|
|
67
|
+
t('id format: prefix-rand4-vN-unixmin', /^admin-[a-f0-9]{4}-v1-\d+$/.test(id1));
|
|
68
|
+
|
|
69
|
+
const id2 = mintStateId('build me a pricing page');
|
|
70
|
+
t('id uses first word as prefix', id2.startsWith('build-'));
|
|
71
|
+
|
|
72
|
+
t('id with empty intent uses fallback', /^state-[a-f0-9]{4}-v1-\d+$/.test(mintStateId('')));
|
|
73
|
+
t('id with whitespace-only intent uses fallback', /^state-[a-f0-9]{4}-v1-\d+$/.test(mintStateId(' ')));
|
|
74
|
+
t('id strips non-alphanumeric chars', /^foo-/.test(mintStateId('foo!@#$ bar')));
|
|
75
|
+
|
|
76
|
+
// Different mints → different ids in the vast majority of cases.
|
|
77
|
+
const ids = new Set();
|
|
78
|
+
for (let i = 0; i < 50; i++) ids.add(mintStateId('dash', 1));
|
|
79
|
+
t('50 mints produce mostly distinct rand4 values', ids.size >= 45);
|
|
80
|
+
|
|
81
|
+
// Next-id chain
|
|
82
|
+
const root = mintStateId('dash', 1);
|
|
83
|
+
const child = mintNextStateId(root, 2);
|
|
84
|
+
const [pref5, rand5] = root.split('-');
|
|
85
|
+
const [pref6, rand6, v6] = child.split('-');
|
|
86
|
+
t('next-id preserves prefix', pref5 === pref6);
|
|
87
|
+
t('next-id preserves rand4', rand5 === rand6);
|
|
88
|
+
t('next-id bumps version', v6 === 'v2');
|
|
89
|
+
|
|
90
|
+
const fallback = mintNextStateId('not-a-valid-id', 2);
|
|
91
|
+
t('next-id falls back on bad parent', /^[a-z]+-[a-f0-9]{4}-v2-\d+$/.test(fallback));
|
|
92
|
+
|
|
93
|
+
const fallbackEmpty = mintNextStateId(undefined, 3);
|
|
94
|
+
t('next-id falls back on undefined parent', /^state-[a-f0-9]{4}-v3-\d+$/.test(fallbackEmpty));
|
|
95
|
+
|
|
96
|
+
console.log('\n=== Singleton ===');
|
|
97
|
+
|
|
98
|
+
resetStateCache({ maxSize: 5 });
|
|
99
|
+
const c1 = getStateCache();
|
|
100
|
+
const c2 = getStateCache();
|
|
101
|
+
t('singleton returns same instance', c1 === c2);
|
|
102
|
+
c1.set('x', { intent: 'x' });
|
|
103
|
+
t('singleton state shared', c2.has('x'));
|
|
104
|
+
|
|
105
|
+
const c3 = resetStateCache();
|
|
106
|
+
t('reset replaces singleton', c3 !== c1 && c3.size() === 0);
|
|
107
|
+
|
|
108
|
+
console.log('\n=== Env-driven sizing ===');
|
|
109
|
+
|
|
110
|
+
process.env.A2UI_STATE_CACHE_SIZE = '7';
|
|
111
|
+
const sized = resetStateCache();
|
|
112
|
+
t('env-driven maxSize', sized.maxSize === 7);
|
|
113
|
+
|
|
114
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
115
|
+
const defaulted = resetStateCache();
|
|
116
|
+
t('default maxSize 64', defaulted.maxSize === 64);
|
|
117
|
+
|
|
118
|
+
process.env.A2UI_STATE_CACHE_SIZE = 'not-a-number';
|
|
119
|
+
const garbage = resetStateCache();
|
|
120
|
+
t('garbage env value falls back to default', garbage.maxSize === 64);
|
|
121
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
122
|
+
|
|
123
|
+
// Constructor opts override env
|
|
124
|
+
process.env.A2UI_STATE_CACHE_SIZE = '7';
|
|
125
|
+
const explicit = resetStateCache({ maxSize: 12 });
|
|
126
|
+
t('constructor maxSize wins over env', explicit.maxSize === 12);
|
|
127
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
128
|
+
|
|
129
|
+
console.log(`\n${pass} passed, ${fail} failed`);
|
|
130
|
+
process.exit(fail ? 1 : 0);
|
package/scripts/test-a2ui.mjs
CHANGED
|
@@ -236,28 +236,135 @@ if (!THINKING) {
|
|
|
236
236
|
}
|
|
237
237
|
}
|
|
238
238
|
|
|
239
|
-
// ── Test 6: Training
|
|
239
|
+
// ── Test 6: Training corpus surfaces ────────────────────────────────
|
|
240
|
+
// (The legacy exemplar extract → ingest path was retired 2026-04-28 in
|
|
241
|
+
// mcp 0.0.5. The chunk corpus is the training surface now.)
|
|
240
242
|
|
|
241
|
-
console.log('\n6. Training
|
|
243
|
+
console.log('\n6. Training corpus surfaces');
|
|
242
244
|
|
|
245
|
+
// 6a. Hand-authored pattern library — should be ≥ 100 entries.
|
|
246
|
+
const patterns = listPatterns();
|
|
247
|
+
if (patterns.length >= 100) {
|
|
248
|
+
ok('Pattern library', `${patterns.length} hand-authored patterns`);
|
|
249
|
+
} else {
|
|
250
|
+
bad('Pattern library', `only ${patterns.length} (expected ≥ 100)`);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// 6b. Gen-UI chunk corpus — should be ≥ 500 unique chunks across
|
|
254
|
+
// page / panel / block kinds.
|
|
243
255
|
try {
|
|
244
|
-
const {
|
|
245
|
-
const
|
|
246
|
-
if (
|
|
247
|
-
ok('
|
|
256
|
+
const { getChunkIndex } = await import('../../corpus/scripts/chunk-library.js');
|
|
257
|
+
const idx = getChunkIndex();
|
|
258
|
+
if (idx && idx.unique_names >= 500 && idx.by_kind.block && idx.by_kind.page) {
|
|
259
|
+
ok('Chunk corpus', `${idx.unique_names} chunks (${idx.total_instances} instances; block=${idx.by_kind.block}, panel=${idx.by_kind.panel || 0}, page=${idx.by_kind.page})`);
|
|
248
260
|
} else {
|
|
249
|
-
bad('
|
|
261
|
+
bad('Chunk corpus', `unexpected index: ${JSON.stringify(idx)}`);
|
|
250
262
|
}
|
|
251
263
|
} catch (e) {
|
|
252
|
-
bad('
|
|
264
|
+
bad('Chunk corpus', e.message);
|
|
253
265
|
}
|
|
254
266
|
|
|
255
|
-
//
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
267
|
+
// ── Test 7: Multi-turn refinement modules (Phase A) ─────────────────
|
|
268
|
+
// Spec: docs/specs/genui-multiturn-architecture.md.
|
|
269
|
+
|
|
270
|
+
console.log('\n7. Multi-turn refinement (Phase A)');
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
const { StateCache, mintStateId, mintNextStateId } = await import('../../compose/engines/zettel/state-cache.js');
|
|
274
|
+
const { createIssueAccumulator } = await import('../../compose/engines/zettel/issue-reporter.js');
|
|
275
|
+
const { refineFromIntent, applyOps, opsToA2UI, validateOps } = await import('../../compose/engines/zettel/chunk-refiner.js');
|
|
276
|
+
const { listChunksByKind } = await import('../../corpus/scripts/chunk-library.js');
|
|
277
|
+
|
|
278
|
+
// 7a. State cache + state-id chain
|
|
279
|
+
const cache = new StateCache({ maxSize: 8 });
|
|
280
|
+
const root = mintStateId('admin dashboard', 1);
|
|
281
|
+
cache.set(root, { state_id: root, intent: 'admin dashboard', plan: { page: 'p', slot_bindings: {} }, ops_history: [], version: 1 });
|
|
282
|
+
const child = mintNextStateId(root, 2);
|
|
283
|
+
if (child.startsWith(root.split('-').slice(0, 2).join('-')) && child.includes('v2')) {
|
|
284
|
+
ok('State-id chain preserves stem + bumps version');
|
|
285
|
+
} else {
|
|
286
|
+
bad('State-id chain', `child=${child} root=${root}`);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// 7b. validateOps catches unknown chunks
|
|
290
|
+
const pages = listChunksByKind('page');
|
|
291
|
+
const blocks = listChunksByKind('block');
|
|
292
|
+
if (pages.length === 0 || blocks.length < 3) {
|
|
293
|
+
bad('Refiner test setup', 'corpus missing chunks');
|
|
294
|
+
} else {
|
|
295
|
+
const samplePage = pages.find((p) => (p.slots || p.instances?.[0]?.slots || []).length >= 1) || pages[0];
|
|
296
|
+
const samplePageSlots = (samplePage.slots || samplePage.instances?.[0]?.slots || []).map((s) => s.name);
|
|
297
|
+
const targetedSlot = samplePageSlots[0];
|
|
298
|
+
const priorState = {
|
|
299
|
+
state_id: root,
|
|
300
|
+
intent: 'admin dashboard',
|
|
301
|
+
plan: { page: samplePage.name, slot_bindings: { [targetedSlot]: [blocks[0].name] } },
|
|
302
|
+
html: '<placeholder/>',
|
|
303
|
+
version: 1,
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
const badOps = validateOps(
|
|
307
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: ['no-such-chunk-zzz'] }],
|
|
308
|
+
priorState,
|
|
309
|
+
);
|
|
310
|
+
if (!badOps.ok && badOps.errors.some((e) => /not found/.test(e))) {
|
|
311
|
+
ok('validateOps rejects unknown chunk');
|
|
312
|
+
} else {
|
|
313
|
+
bad('validateOps unknown chunk', JSON.stringify(badOps));
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// 7c. applyOps produces materialized HTML
|
|
317
|
+
const applied = await applyOps({
|
|
318
|
+
priorState,
|
|
319
|
+
ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [blocks[1].name] }],
|
|
320
|
+
});
|
|
321
|
+
if (applied.ops_applied.length === 1 && applied.newState.html) {
|
|
322
|
+
ok('applyOps materializes new HTML');
|
|
323
|
+
} else {
|
|
324
|
+
bad('applyOps materialize', `applied=${applied.ops_applied.length} html=${!!applied.newState.html}`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// 7d. Two-pass refineFromIntent with stub LLM
|
|
328
|
+
const stubResponses = [
|
|
329
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
330
|
+
{ ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [blocks[1].name] }], delta_summary: 'test' },
|
|
331
|
+
];
|
|
332
|
+
let qIdx = 0;
|
|
333
|
+
const stubLLM = {
|
|
334
|
+
complete: async () => ({ content: JSON.stringify(stubResponses[qIdx++]) }),
|
|
335
|
+
};
|
|
336
|
+
const refined = await refineFromIntent({
|
|
337
|
+
priorState,
|
|
338
|
+
intent: `change ${targetedSlot}`,
|
|
339
|
+
llmAdapter: stubLLM,
|
|
340
|
+
catalog: [{ name: blocks[1].name, kind: 'block', primary: 'div', slots: [] }],
|
|
341
|
+
});
|
|
342
|
+
if (refined.ops.length === 1 && refined.synthesis.targeted) {
|
|
343
|
+
ok('refineFromIntent two-pass synthesis');
|
|
344
|
+
} else {
|
|
345
|
+
bad('refineFromIntent', `ops=${refined.ops.length} targeted=${refined.synthesis.targeted}`);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// 7e. opsToA2UI emits updateComponents wire-format
|
|
349
|
+
const wire = opsToA2UI(refined.ops, applied.newState);
|
|
350
|
+
if (wire.length === 1 && wire[0].type === 'updateComponents' && wire[0].surfaceId === 'main') {
|
|
351
|
+
ok('opsToA2UI wraps as updateComponents');
|
|
352
|
+
} else {
|
|
353
|
+
bad('opsToA2UI', JSON.stringify(wire[0]));
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// 7f. IssueAccumulator coalesces via reasons()
|
|
357
|
+
const acc = createIssueAccumulator();
|
|
358
|
+
acc.add('locator-empty-targets', { intent: 't' });
|
|
359
|
+
acc.add('validator-exhausted', { tool: 't' });
|
|
360
|
+
if (acc.size() === 2 && acc.reasons().length === 2) {
|
|
361
|
+
ok('IssueAccumulator collects multiple auto-fires');
|
|
362
|
+
} else {
|
|
363
|
+
bad('IssueAccumulator', `size=${acc.size()}`);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
} catch (e) {
|
|
367
|
+
bad('Multi-turn modules', e.message);
|
|
261
368
|
}
|
|
262
369
|
|
|
263
370
|
// ── Summary ─────────────────────────────────────────────────────────
|
package/server.js
CHANGED
|
@@ -668,6 +668,37 @@ import { composeFromIntent as composeFromChunksImpl } from '../compose/engines/z
|
|
|
668
668
|
import { composeFromPlan, validatePlan } from '../compose/engines/zettel/chunk-composer.js';
|
|
669
669
|
import { createAdapter as createLLMAdapter } from '../compose/llm/llm-bridge.js';
|
|
670
670
|
|
|
671
|
+
// ── Multi-turn architecture (Phase A) ────────────────────────────────
|
|
672
|
+
// Spec: docs/specs/genui-multiturn-architecture.md (Draft v0.1.0).
|
|
673
|
+
// Plan: docs/plans/genui-multiturn-rollout-2026-04-28.md (Phase A scoped).
|
|
674
|
+
|
|
675
|
+
import {
|
|
676
|
+
getStateCache,
|
|
677
|
+
mintStateId,
|
|
678
|
+
mintNextStateId,
|
|
679
|
+
} from '../compose/engines/zettel/state-cache.js';
|
|
680
|
+
import {
|
|
681
|
+
reportIssue as reportIssueImpl,
|
|
682
|
+
autoReport,
|
|
683
|
+
createIssueAccumulator,
|
|
684
|
+
} from '../compose/engines/zettel/issue-reporter.js';
|
|
685
|
+
import {
|
|
686
|
+
refineFromIntent,
|
|
687
|
+
applyOps,
|
|
688
|
+
opsToA2UI,
|
|
689
|
+
validateOps,
|
|
690
|
+
} from '../compose/engines/zettel/chunk-refiner.js';
|
|
691
|
+
|
|
692
|
+
const stateCache = getStateCache();
|
|
693
|
+
|
|
694
|
+
const ENGINE_VERSION_INFO = {
|
|
695
|
+
mcp: '0.1.0',
|
|
696
|
+
corpus: '0.0.6',
|
|
697
|
+
engine: 'zettel',
|
|
698
|
+
llm_adapter: 'anthropic',
|
|
699
|
+
model: process.env.ANTHROPIC_MODEL || 'claude-opus-4-7',
|
|
700
|
+
};
|
|
701
|
+
|
|
671
702
|
server.tool(
|
|
672
703
|
'compose_from_chunks',
|
|
673
704
|
`Compose a UI page from training chunks — retrieval-first, synthesis-fallback.
|
|
@@ -704,8 +735,20 @@ plan to skip the LLM call and just materialize HTML.`,
|
|
|
704
735
|
};
|
|
705
736
|
}
|
|
706
737
|
const result = composeFromPlan(plan);
|
|
738
|
+
const state_id = mintStateId(intent || plan.page || 'plan', 1);
|
|
739
|
+
stateCache.set(state_id, {
|
|
740
|
+
state_id,
|
|
741
|
+
intent: intent || `(plan) ${plan.page}`,
|
|
742
|
+
plan: result.plan,
|
|
743
|
+
html: result.html,
|
|
744
|
+
source: 'plan',
|
|
745
|
+
ops_history: [],
|
|
746
|
+
parent_state_id: null,
|
|
747
|
+
created_at: new Date().toISOString(),
|
|
748
|
+
});
|
|
707
749
|
return {
|
|
708
750
|
content: [{ type: 'text', text: JSON.stringify({
|
|
751
|
+
state_id,
|
|
709
752
|
html: result.html,
|
|
710
753
|
plan: result.plan,
|
|
711
754
|
warnings: result.warnings,
|
|
@@ -728,8 +771,23 @@ plan to skip the LLM call and just materialize HTML.`,
|
|
|
728
771
|
llmAdapter,
|
|
729
772
|
maxAttempts: max_attempts,
|
|
730
773
|
});
|
|
774
|
+
const state_id = mintStateId(intent, 1);
|
|
775
|
+
stateCache.set(state_id, {
|
|
776
|
+
state_id,
|
|
777
|
+
intent,
|
|
778
|
+
plan: result.plan,
|
|
779
|
+
html: result.html,
|
|
780
|
+
source: result.source,
|
|
781
|
+
score: result.score,
|
|
782
|
+
ops_history: [],
|
|
783
|
+
parent_state_id: null,
|
|
784
|
+
warnings: result.warnings,
|
|
785
|
+
synthesis: result.synthesis,
|
|
786
|
+
created_at: new Date().toISOString(),
|
|
787
|
+
});
|
|
731
788
|
return {
|
|
732
789
|
content: [{ type: 'text', text: JSON.stringify({
|
|
790
|
+
state_id,
|
|
733
791
|
html: result.html,
|
|
734
792
|
plan: result.plan,
|
|
735
793
|
source: result.source,
|
|
@@ -747,21 +805,266 @@ plan to skip the LLM call and just materialize HTML.`,
|
|
|
747
805
|
},
|
|
748
806
|
);
|
|
749
807
|
|
|
808
|
+
// ── Multi-turn refinement tools (Phase A) ─────────────────────────────
|
|
809
|
+
// Spec: docs/specs/genui-multiturn-architecture.md §3.
|
|
810
|
+
|
|
811
|
+
server.tool(
|
|
812
|
+
'refine_composition',
|
|
813
|
+
`Refine an existing chunk-composed UI based on a natural-language intent or an explicit op-list.
|
|
814
|
+
|
|
815
|
+
Use when the user wants to modify an *existing* UI. Triggers on "change", "update", "modify", "add to", "remove from", "this", "it", "the X". Requires \`state_id\` from a prior \`compose_from_chunks\` call.
|
|
816
|
+
|
|
817
|
+
Two modes:
|
|
818
|
+
- **Intent-driven** — pass \`intent\`. Engine runs two-pass synthesis (locator pass identifies which slots to modify; modifier pass emits chunk-plan ops). Validator-driven retry on op-validation failure.
|
|
819
|
+
- **Explicit ops** — pass \`ops\` directly. Skips the LLM entirely; engine applies + materializes.
|
|
820
|
+
|
|
821
|
+
Returns a new \`state_id\` (versioned chain from the parent), the A2UI op-list applied, the post-op HTML, and a delta summary. Failed ops are reported in \`ops_failed\` with reasons.
|
|
822
|
+
|
|
823
|
+
For *fresh creation* use \`compose_from_chunks\`, not this tool.`,
|
|
824
|
+
{
|
|
825
|
+
state_id: z.string().describe('State id from a prior compose_from_chunks or refine_composition call'),
|
|
826
|
+
intent: z.string().optional().describe('Natural-language description of what to change (e.g. "add a country list to page-content")'),
|
|
827
|
+
ops: z.array(z.any()).optional().describe('Pre-computed chunk-plan ops to apply directly (skips the LLM)'),
|
|
828
|
+
max_attempts: z.number().int().min(1).max(5).default(2).describe('Validator retry budget for synthesis'),
|
|
829
|
+
},
|
|
830
|
+
async ({ state_id, intent, ops, max_attempts }) => {
|
|
831
|
+
const priorState = stateCache.get(state_id);
|
|
832
|
+
if (!priorState) {
|
|
833
|
+
await autoReport(
|
|
834
|
+
'cache-miss-on-known-state',
|
|
835
|
+
{ state_id, tool: 'refine_composition' },
|
|
836
|
+
{ cache: stateCache, versionInfo: ENGINE_VERSION_INFO }
|
|
837
|
+
);
|
|
838
|
+
return {
|
|
839
|
+
isError: true,
|
|
840
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
841
|
+
error: 'state_id not found in cache',
|
|
842
|
+
hint: 'state cache is in-memory and bounded; re-run compose_from_chunks to mint a fresh state_id',
|
|
843
|
+
state_id,
|
|
844
|
+
}, null, 2) }],
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (!intent && !ops) {
|
|
849
|
+
return {
|
|
850
|
+
isError: true,
|
|
851
|
+
content: [{ type: 'text', text: JSON.stringify({ error: 'must provide either intent or ops' }, null, 2) }],
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
const issueAccumulator = createIssueAccumulator();
|
|
856
|
+
const issueCtx = { cache: stateCache, versionInfo: ENGINE_VERSION_INFO };
|
|
857
|
+
const startedAt = Date.now();
|
|
858
|
+
|
|
859
|
+
try {
|
|
860
|
+
let resolvedOps;
|
|
861
|
+
let delta_summary = '';
|
|
862
|
+
let synthesis = null;
|
|
863
|
+
let warnings = [];
|
|
864
|
+
|
|
865
|
+
if (ops && Array.isArray(ops)) {
|
|
866
|
+
// Explicit ops path — validate then apply
|
|
867
|
+
const validation = validateOps(ops, priorState);
|
|
868
|
+
if (!validation.ok) {
|
|
869
|
+
await issueAccumulator.flush(issueCtx);
|
|
870
|
+
return {
|
|
871
|
+
isError: true,
|
|
872
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
873
|
+
error: 'ops failed validation',
|
|
874
|
+
errors: validation.errors,
|
|
875
|
+
}, null, 2) }],
|
|
876
|
+
};
|
|
877
|
+
}
|
|
878
|
+
resolvedOps = ops;
|
|
879
|
+
delta_summary = `applied ${ops.length} explicit op(s)`;
|
|
880
|
+
} else {
|
|
881
|
+
// Intent path — two-pass synthesis with stub-friendly LLM bridge
|
|
882
|
+
const llmAdapter = await createLLMAdapter();
|
|
883
|
+
const refined = await refineFromIntent({
|
|
884
|
+
priorState,
|
|
885
|
+
intent,
|
|
886
|
+
llmAdapter,
|
|
887
|
+
maxAttempts: max_attempts,
|
|
888
|
+
issueAccumulator,
|
|
889
|
+
});
|
|
890
|
+
resolvedOps = refined.ops;
|
|
891
|
+
delta_summary = refined.delta_summary || '';
|
|
892
|
+
synthesis = refined.synthesis;
|
|
893
|
+
warnings = refined.warnings;
|
|
894
|
+
|
|
895
|
+
if (resolvedOps.length === 0) {
|
|
896
|
+
// Synthesizer gave up. Auto-fires already accumulated.
|
|
897
|
+
await issueAccumulator.flush(issueCtx);
|
|
898
|
+
const childId = mintNextStateId(state_id, (priorState.version || 1) + 1);
|
|
899
|
+
return {
|
|
900
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
901
|
+
state_id: childId,
|
|
902
|
+
ops_applied: [],
|
|
903
|
+
ops_failed: [],
|
|
904
|
+
delta_summary: '',
|
|
905
|
+
warnings,
|
|
906
|
+
synthesis: synthesis ? { attempts: synthesis.attempts, targeted: synthesis.targeted } : null,
|
|
907
|
+
html: priorState.html,
|
|
908
|
+
}, null, 2) }],
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
const applied = await applyOps({ priorState, ops: resolvedOps });
|
|
914
|
+
|
|
915
|
+
if (applied.ops_failed.length > 0) {
|
|
916
|
+
issueAccumulator.add('ops-failed-after-apply', {
|
|
917
|
+
state_id,
|
|
918
|
+
tool: 'refine_composition',
|
|
919
|
+
intent,
|
|
920
|
+
});
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
const a2uiMessages = opsToA2UI(applied.ops_applied, applied.newState);
|
|
924
|
+
|
|
925
|
+
const parentVersion = priorState.version || 1;
|
|
926
|
+
const newVersion = parentVersion + 1;
|
|
927
|
+
const newStateId = mintNextStateId(state_id, newVersion);
|
|
928
|
+
|
|
929
|
+
stateCache.set(newStateId, {
|
|
930
|
+
state_id: newStateId,
|
|
931
|
+
intent: intent || `(ops) ${priorState.intent}`,
|
|
932
|
+
plan: applied.newState.plan,
|
|
933
|
+
html: applied.newState.html,
|
|
934
|
+
source: 'refinement',
|
|
935
|
+
version: newVersion,
|
|
936
|
+
ops_history: [...(priorState.ops_history || []), ...a2uiMessages],
|
|
937
|
+
parent_state_id: state_id,
|
|
938
|
+
warnings: applied.newState.warnings,
|
|
939
|
+
delta_summary,
|
|
940
|
+
synthesis,
|
|
941
|
+
created_at: new Date().toISOString(),
|
|
942
|
+
duration_ms: Date.now() - startedAt,
|
|
943
|
+
});
|
|
944
|
+
|
|
945
|
+
await issueAccumulator.flush(issueCtx);
|
|
946
|
+
|
|
947
|
+
return {
|
|
948
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
949
|
+
state_id: newStateId,
|
|
950
|
+
ops_applied: a2uiMessages,
|
|
951
|
+
ops_failed: applied.ops_failed,
|
|
952
|
+
delta_summary,
|
|
953
|
+
warnings: [...warnings, ...(applied.newState.warnings || [])],
|
|
954
|
+
synthesis: synthesis ? { attempts: synthesis.attempts, targeted: synthesis.targeted, locatedTargets: synthesis.locatedTargets } : null,
|
|
955
|
+
html: applied.newState.html,
|
|
956
|
+
}, null, 2) }],
|
|
957
|
+
};
|
|
958
|
+
} catch (e) {
|
|
959
|
+
await issueAccumulator.flush(issueCtx);
|
|
960
|
+
return {
|
|
961
|
+
isError: true,
|
|
962
|
+
content: [{ type: 'text', text: JSON.stringify({ error: e.message }, null, 2) }],
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
},
|
|
966
|
+
);
|
|
967
|
+
|
|
968
|
+
server.tool(
|
|
969
|
+
'get_state',
|
|
970
|
+
`Inspect a cached composition state by state_id.
|
|
971
|
+
|
|
972
|
+
Returns the full cache entry including the materialized HTML, the chunk binding plan, the chronological ops history (every refinement applied to this state's lineage), and the parent state_id (chain-back to the originating compose_from_chunks call).
|
|
973
|
+
|
|
974
|
+
Useful for debugging refinement sequences, replaying a state's history, or verifying that a state_id is still cached before issuing a refine_composition call.
|
|
975
|
+
|
|
976
|
+
Auto-fires a low-severity \`cache-miss-on-known-state\` issue when the state_id is not in the cache (the cache is bounded LRU; long-paused conversations may evict their state).`,
|
|
977
|
+
{
|
|
978
|
+
state_id: z.string().describe('State id from a prior compose_from_chunks or refine_composition call'),
|
|
979
|
+
},
|
|
980
|
+
async ({ state_id }) => {
|
|
981
|
+
const entry = stateCache.peek(state_id);
|
|
982
|
+
if (!entry) {
|
|
983
|
+
await autoReport(
|
|
984
|
+
'cache-miss-on-known-state',
|
|
985
|
+
{ state_id, tool: 'get_state' },
|
|
986
|
+
{ cache: stateCache, versionInfo: ENGINE_VERSION_INFO }
|
|
987
|
+
);
|
|
988
|
+
return {
|
|
989
|
+
isError: true,
|
|
990
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
991
|
+
error: 'state_id not found in cache',
|
|
992
|
+
state_id,
|
|
993
|
+
}, null, 2) }],
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
return {
|
|
997
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
998
|
+
state_id: entry.state_id,
|
|
999
|
+
intent: entry.intent,
|
|
1000
|
+
plan: entry.plan,
|
|
1001
|
+
html: entry.html,
|
|
1002
|
+
source: entry.source,
|
|
1003
|
+
version: entry.version || 1,
|
|
1004
|
+
parent_state_id: entry.parent_state_id || null,
|
|
1005
|
+
ops_history: entry.ops_history || [],
|
|
1006
|
+
warnings: entry.warnings || [],
|
|
1007
|
+
created_at: entry.created_at,
|
|
1008
|
+
}, null, 2) }],
|
|
1009
|
+
};
|
|
1010
|
+
},
|
|
1011
|
+
);
|
|
1012
|
+
|
|
1013
|
+
server.tool(
|
|
1014
|
+
'report_issue',
|
|
1015
|
+
`File a structured issue ticket when something is wrong with the gen-UI output, the tool surface, or the training data.
|
|
1016
|
+
|
|
1017
|
+
Use when:
|
|
1018
|
+
(a) the user explicitly says the output is broken / wrong / missing,
|
|
1019
|
+
(b) you cannot satisfy the user's intent after retrying,
|
|
1020
|
+
(c) you detect a mismatch between requested and produced output that you cannot fix.
|
|
1021
|
+
|
|
1022
|
+
Include \`state_id\` for full trace attachment (input + output + LLM prompts/responses + validator results, when available in the cache).
|
|
1023
|
+
|
|
1024
|
+
Do NOT call this for ordinary clarification or for output the user has not yet seen.
|
|
1025
|
+
|
|
1026
|
+
Issue files land at \`.brain/audit-history/issues/<issue_id>.json\` (immutable; resolution lands in a sidecar file). Severity taxonomy matches the project's coherence-audit vocabulary: blocker = contract violation; drift = quality erosion; nit = cosmetic.`,
|
|
1027
|
+
{
|
|
1028
|
+
type: z.enum(['bug', 'training-gap', 'protocol-gap', 'ux-feedback']).describe('Issue category'),
|
|
1029
|
+
severity: z.enum(['blocker', 'drift', 'nit']).describe('Severity tier'),
|
|
1030
|
+
title: z.string().max(80).describe('One-line title (≤ 80 chars)'),
|
|
1031
|
+
body: z.string().describe('Markdown body — observed vs expected, repro steps'),
|
|
1032
|
+
state_id: z.string().optional().describe('State id from a prior tool call; auto-attaches the trace'),
|
|
1033
|
+
trace: z.enum(['full', 'summary', 'none']).optional().describe('Trace depth (default: summary if state_id provided, else none)'),
|
|
1034
|
+
suggested_owner: z.enum(['synthesis', 'retrieval', 'validator', 'chunk-corpus', 'mcp-protocol', 'unknown']).optional().describe('Best-guess owner for triage'),
|
|
1035
|
+
tags: z.array(z.string()).optional().describe('Free-form tags for filtering'),
|
|
1036
|
+
},
|
|
1037
|
+
async ({ type, severity, title, body, state_id, trace, suggested_owner, tags }) => {
|
|
1038
|
+
try {
|
|
1039
|
+
const result = await reportIssueImpl(
|
|
1040
|
+
{ type, severity, title, body, state_id, trace, suggested_owner, tags },
|
|
1041
|
+
{
|
|
1042
|
+
cache: stateCache,
|
|
1043
|
+
versionInfo: ENGINE_VERSION_INFO,
|
|
1044
|
+
reporter: 'llm',
|
|
1045
|
+
}
|
|
1046
|
+
);
|
|
1047
|
+
return {
|
|
1048
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
1049
|
+
};
|
|
1050
|
+
} catch (e) {
|
|
1051
|
+
return {
|
|
1052
|
+
isError: true,
|
|
1053
|
+
content: [{ type: 'text', text: JSON.stringify({ error: e.message }, null, 2) }],
|
|
1054
|
+
};
|
|
1055
|
+
}
|
|
1056
|
+
},
|
|
1057
|
+
);
|
|
1058
|
+
|
|
750
1059
|
// ── Start ──
|
|
751
1060
|
|
|
752
1061
|
async function main() {
|
|
753
1062
|
const transport = new StdioServerTransport();
|
|
754
1063
|
|
|
755
|
-
// Auto-ingest
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
if (result.registered > 0 || result.replaced > 0) {
|
|
760
|
-
console.error(`Training: ingested ${result.registered} new + ${result.replaced} replaced patterns (${result.pages} pages, ${result.chunks} chunks)`);
|
|
761
|
-
}
|
|
762
|
-
} catch (e) {
|
|
763
|
-
console.error(`Training: ingest skipped — ${e.message}`);
|
|
764
|
-
}
|
|
1064
|
+
// (Auto-ingest of exemplar-derived patterns retired 2026-04-28 in
|
|
1065
|
+
// mcp 0.0.5. The chunk corpus + chunk-aware synthesizer are the
|
|
1066
|
+
// training surface now; the legacy extract → ingest path that pulled
|
|
1067
|
+
// 70 patterns from prose exemplars on every server boot is gone.)
|
|
765
1068
|
|
|
766
1069
|
await server.connect(transport);
|
|
767
1070
|
const catalog = await getCatalog();
|