@adia-ai/a2ui-mcp 0.0.5 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +166 -0
- package/package.json +2 -2
- package/scripts/eval-diff.mjs +62 -6
- package/scripts/eval-refine-synthesis.mjs +270 -0
- package/scripts/semantic-stats.mjs +113 -0
- package/scripts/smoke-issues.mjs +266 -0
- package/scripts/smoke-refine.mjs +374 -0
- package/scripts/smoke-state-cache.mjs +130 -0
- package/scripts/test-a2ui.mjs +103 -0
- package/server.js +309 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test: chunk-refiner (validator + applier + 2-pass synthesis with stub LLM).
|
|
3
|
+
// Spec: docs/specs/genui-multiturn-architecture.md §4.
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
refineFromIntent,
|
|
7
|
+
applyOps,
|
|
8
|
+
validateOps,
|
|
9
|
+
opsToA2UI,
|
|
10
|
+
} from '../../compose/engines/zettel/chunk-refiner.js';
|
|
11
|
+
import {
|
|
12
|
+
StateCache,
|
|
13
|
+
mintStateId,
|
|
14
|
+
mintNextStateId,
|
|
15
|
+
} from '../../compose/engines/zettel/state-cache.js';
|
|
16
|
+
import { createIssueAccumulator } from '../../compose/engines/zettel/issue-reporter.js';
|
|
17
|
+
import {
|
|
18
|
+
getChunk,
|
|
19
|
+
listChunksByKind,
|
|
20
|
+
} from '../../../a2ui/corpus/scripts/chunk-library.js';
|
|
21
|
+
|
|
22
|
+
let pass = 0, fail = 0;
|
|
23
|
+
const t = (label, ok, detail = '') => {
|
|
24
|
+
if (ok) { console.log(` ✓ ${label}`); pass++; }
|
|
25
|
+
else { console.log(` ✗ ${label} ${detail}`); fail++; }
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// --- Discover real corpus chunks for happy-path tests ---
|
|
29
|
+
const pageChunks = listChunksByKind('page');
|
|
30
|
+
const panelChunks = listChunksByKind('panel');
|
|
31
|
+
const blockChunks = listChunksByKind('block');
|
|
32
|
+
|
|
33
|
+
if (pageChunks.length === 0 || blockChunks.length < 3) {
|
|
34
|
+
console.error(`Corpus missing required chunks (pages=${pageChunks.length}, blocks=${blockChunks.length}). Aborting.`);
|
|
35
|
+
process.exit(2);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Pick a page chunk that actually has slots (not all do)
|
|
39
|
+
const samplePage = pageChunks.find((p) => (p.slots || p.instances?.[0]?.slots || []).length >= 1)
|
|
40
|
+
|| panelChunks.find((p) => (p.slots || p.instances?.[0]?.slots || []).length >= 1)
|
|
41
|
+
|| pageChunks[0];
|
|
42
|
+
const samplePageSlots = (samplePage.slots || samplePage.instances?.[0]?.slots || []).map((s) => s.name);
|
|
43
|
+
|
|
44
|
+
if (samplePageSlots.length === 0) {
|
|
45
|
+
console.error('No page chunk with declared slots; skipping LLM-driven tests.');
|
|
46
|
+
process.exit(2);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const sampleBlocks = blockChunks.slice(0, 5).map((b) => b.name);
|
|
50
|
+
const targetedSlot = samplePageSlots[0];
|
|
51
|
+
|
|
52
|
+
console.log(`Using page: ${samplePage.name} (slots: ${samplePageSlots.join(', ')})`);
|
|
53
|
+
console.log(`Sample blocks: ${sampleBlocks.slice(0, 3).join(', ')}\n`);
|
|
54
|
+
|
|
55
|
+
const priorState = {
|
|
56
|
+
state_id: mintStateId('test-prior', 1),
|
|
57
|
+
intent: 'test prior composition',
|
|
58
|
+
plan: {
|
|
59
|
+
page: samplePage.name,
|
|
60
|
+
slot_bindings: {
|
|
61
|
+
[targetedSlot]: [sampleBlocks[0]],
|
|
62
|
+
...(samplePageSlots[1] ? { [samplePageSlots[1]]: [sampleBlocks[1], sampleBlocks[2]] } : {}),
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
html: '<placeholder/>',
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
console.log('=== validateOps ===');
|
|
69
|
+
|
|
70
|
+
t('rejects non-array ops',
|
|
71
|
+
validateOps('not-array', priorState).ok === false);
|
|
72
|
+
t('rejects priorState without plan',
|
|
73
|
+
validateOps([], { state_id: 'x' }).ok === false);
|
|
74
|
+
t('accepts empty ops list', validateOps([], priorState).ok === true);
|
|
75
|
+
|
|
76
|
+
const okRebind = validateOps(
|
|
77
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
78
|
+
priorState,
|
|
79
|
+
);
|
|
80
|
+
t('accepts rebindSlot with valid chunk', okRebind.ok === true);
|
|
81
|
+
|
|
82
|
+
const okAppend = validateOps(
|
|
83
|
+
[{ type: 'appendToSlot', slot: targetedSlot, chunks: [sampleBlocks[3], sampleBlocks[4]] }],
|
|
84
|
+
priorState,
|
|
85
|
+
);
|
|
86
|
+
t('accepts appendToSlot with valid chunks', okAppend.ok === true);
|
|
87
|
+
|
|
88
|
+
const okRemove = validateOps(
|
|
89
|
+
[{ type: 'removeFromSlot', slot: targetedSlot, indices: [0] }],
|
|
90
|
+
priorState,
|
|
91
|
+
);
|
|
92
|
+
t('accepts removeFromSlot with valid index', okRemove.ok === true);
|
|
93
|
+
|
|
94
|
+
// Negative cases
|
|
95
|
+
const badType = validateOps([{ type: 'frobnicate', slot: targetedSlot }], priorState);
|
|
96
|
+
t('rejects unknown op type',
|
|
97
|
+
badType.ok === false && /unknown op type/.test(badType.errors[0]));
|
|
98
|
+
|
|
99
|
+
const badSlot = validateOps(
|
|
100
|
+
[{ type: 'rebindSlot', slot: 'not-a-real-slot', chunks: [sampleBlocks[0]] }],
|
|
101
|
+
priorState,
|
|
102
|
+
);
|
|
103
|
+
t('rejects unknown slot',
|
|
104
|
+
badSlot.ok === false && badSlot.errors.some((e) => /not in prior plan/.test(e)));
|
|
105
|
+
|
|
106
|
+
const badChunk = validateOps(
|
|
107
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: ['not-a-real-chunk-xyz'] }],
|
|
108
|
+
priorState,
|
|
109
|
+
);
|
|
110
|
+
t('rejects unknown chunk reference',
|
|
111
|
+
badChunk.ok === false && badChunk.errors.some((e) => /not found/.test(e)));
|
|
112
|
+
|
|
113
|
+
const badIndex = validateOps(
|
|
114
|
+
[{ type: 'removeFromSlot', slot: targetedSlot, indices: [99] }],
|
|
115
|
+
priorState,
|
|
116
|
+
);
|
|
117
|
+
t('rejects out-of-range index',
|
|
118
|
+
badIndex.ok === false && badIndex.errors.some((e) => /out of range/.test(e)));
|
|
119
|
+
|
|
120
|
+
const emptyChunks = validateOps(
|
|
121
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: [] }],
|
|
122
|
+
priorState,
|
|
123
|
+
);
|
|
124
|
+
t('rejects empty chunks array',
|
|
125
|
+
emptyChunks.ok === false && emptyChunks.errors.some((e) => /non-empty/.test(e)));
|
|
126
|
+
|
|
127
|
+
const replacePageOk = validateOps(
|
|
128
|
+
[{ type: 'replacePage', page: samplePage.name, slot_bindings: {} }],
|
|
129
|
+
priorState,
|
|
130
|
+
);
|
|
131
|
+
t('accepts replacePage with valid page', replacePageOk.ok === true);
|
|
132
|
+
|
|
133
|
+
const replacePageBad = validateOps(
|
|
134
|
+
[{ type: 'replacePage', page: 'no-such-page' }],
|
|
135
|
+
priorState,
|
|
136
|
+
);
|
|
137
|
+
t('rejects replacePage with unknown page',
|
|
138
|
+
replacePageBad.ok === false && replacePageBad.errors.some((e) => /not found/.test(e)));
|
|
139
|
+
|
|
140
|
+
console.log('\n=== applyOps ===');
|
|
141
|
+
|
|
142
|
+
const applyRebind = await applyOps({
|
|
143
|
+
priorState,
|
|
144
|
+
ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
145
|
+
});
|
|
146
|
+
t('rebindSlot replaces slot bindings',
|
|
147
|
+
applyRebind.newState.plan.slot_bindings[targetedSlot].length === 1 &&
|
|
148
|
+
applyRebind.newState.plan.slot_bindings[targetedSlot][0] === sampleBlocks[3]);
|
|
149
|
+
t('rebindSlot returns op in ops_applied', applyRebind.ops_applied.length === 1);
|
|
150
|
+
t('rebindSlot returns empty ops_failed', applyRebind.ops_failed.length === 0);
|
|
151
|
+
|
|
152
|
+
const applyAppend = await applyOps({
|
|
153
|
+
priorState,
|
|
154
|
+
ops: [{ type: 'appendToSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
155
|
+
});
|
|
156
|
+
t('appendToSlot adds to existing bindings',
|
|
157
|
+
applyAppend.newState.plan.slot_bindings[targetedSlot].length === 2 &&
|
|
158
|
+
applyAppend.newState.plan.slot_bindings[targetedSlot].at(-1) === sampleBlocks[3]);
|
|
159
|
+
|
|
160
|
+
const applyRemove = await applyOps({
|
|
161
|
+
priorState,
|
|
162
|
+
ops: [{ type: 'removeFromSlot', slot: targetedSlot, indices: [0] }],
|
|
163
|
+
});
|
|
164
|
+
t('removeFromSlot drops indexed entry',
|
|
165
|
+
applyRemove.newState.plan.slot_bindings[targetedSlot].length === 0);
|
|
166
|
+
|
|
167
|
+
const applyReplace = await applyOps({
|
|
168
|
+
priorState,
|
|
169
|
+
ops: [{
|
|
170
|
+
type: 'replacePage',
|
|
171
|
+
page: samplePage.name,
|
|
172
|
+
slot_bindings: { [targetedSlot]: [sampleBlocks[4]] },
|
|
173
|
+
}],
|
|
174
|
+
});
|
|
175
|
+
t('replacePage swaps page reference', applyReplace.newState.plan.page === samplePage.name);
|
|
176
|
+
t('replacePage replaces slot_bindings entirely',
|
|
177
|
+
applyReplace.newState.plan.slot_bindings[targetedSlot].length === 1);
|
|
178
|
+
|
|
179
|
+
const applyMixed = await applyOps({
|
|
180
|
+
priorState,
|
|
181
|
+
ops: [
|
|
182
|
+
{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] },
|
|
183
|
+
{ type: 'appendToSlot', slot: targetedSlot, chunks: [sampleBlocks[4]] },
|
|
184
|
+
],
|
|
185
|
+
});
|
|
186
|
+
t('multiple ops apply in sequence',
|
|
187
|
+
applyMixed.newState.plan.slot_bindings[targetedSlot].length === 2 &&
|
|
188
|
+
applyMixed.newState.plan.slot_bindings[targetedSlot][0] === sampleBlocks[3] &&
|
|
189
|
+
applyMixed.newState.plan.slot_bindings[targetedSlot][1] === sampleBlocks[4]);
|
|
190
|
+
|
|
191
|
+
const applyOriginalUntouched = priorState.plan.slot_bindings[targetedSlot];
|
|
192
|
+
t('priorState plan not mutated by applyOps',
|
|
193
|
+
Array.isArray(applyOriginalUntouched) && applyOriginalUntouched[0] === sampleBlocks[0]);
|
|
194
|
+
|
|
195
|
+
const applyMixedFail = await applyOps({
|
|
196
|
+
priorState,
|
|
197
|
+
ops: [
|
|
198
|
+
{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] },
|
|
199
|
+
{ type: 'frobnicate' },
|
|
200
|
+
],
|
|
201
|
+
});
|
|
202
|
+
t('apply continues past failed ops',
|
|
203
|
+
applyMixedFail.ops_applied.length === 1 && applyMixedFail.ops_failed.length === 1);
|
|
204
|
+
|
|
205
|
+
console.log('\n=== opsToA2UI ===');
|
|
206
|
+
|
|
207
|
+
const wireMessages = opsToA2UI(
|
|
208
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
209
|
+
applyRebind.newState,
|
|
210
|
+
);
|
|
211
|
+
t('opsToA2UI emits one updateComponents per op',
|
|
212
|
+
wireMessages.length === 1 && wireMessages[0].type === 'updateComponents');
|
|
213
|
+
t('opsToA2UI components target slot id', wireMessages[0].components[0].id === `slot-${targetedSlot}`);
|
|
214
|
+
t('opsToA2UI carries chunk_op echo', wireMessages[0].components[0].chunk_op?.type === 'rebindSlot');
|
|
215
|
+
t('opsToA2UI sets surfaceId', wireMessages[0].surfaceId === 'main');
|
|
216
|
+
|
|
217
|
+
const wireReplace = opsToA2UI(
|
|
218
|
+
[{ type: 'replacePage', page: samplePage.name, slot_bindings: {} }],
|
|
219
|
+
applyReplace.newState,
|
|
220
|
+
);
|
|
221
|
+
t('opsToA2UI replacePage targets surface id', wireReplace[0].components[0].id === 'main');
|
|
222
|
+
|
|
223
|
+
console.log('\n=== refineFromIntent (stub LLM) ===');
|
|
224
|
+
|
|
225
|
+
function makeStubLLM(queue) {
|
|
226
|
+
const responses = [...queue];
|
|
227
|
+
return {
|
|
228
|
+
complete: async () => {
|
|
229
|
+
if (responses.length === 0) throw new Error('stub LLM: queue empty');
|
|
230
|
+
const r = responses.shift();
|
|
231
|
+
return { content: typeof r === 'string' ? r : JSON.stringify(r) };
|
|
232
|
+
},
|
|
233
|
+
remaining: () => responses.length,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Happy path — targeted refinement
|
|
238
|
+
const stubHappy = makeStubLLM([
|
|
239
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
240
|
+
{
|
|
241
|
+
ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
242
|
+
delta_summary: `replaced ${targetedSlot} binding`,
|
|
243
|
+
},
|
|
244
|
+
]);
|
|
245
|
+
const refineHappy = await refineFromIntent({
|
|
246
|
+
priorState,
|
|
247
|
+
intent: `change ${targetedSlot} to use ${sampleBlocks[3]}`,
|
|
248
|
+
llmAdapter: stubHappy,
|
|
249
|
+
catalog: [{ name: sampleBlocks[3], kind: 'block', primary: 'div', slots: [] }],
|
|
250
|
+
});
|
|
251
|
+
t('happy-path: returns ops', refineHappy.ops.length === 1);
|
|
252
|
+
t('happy-path: synthesis.targeted=true', refineHappy.synthesis.targeted === true);
|
|
253
|
+
t('happy-path: synthesis.locatedTargets matches', refineHappy.synthesis.locatedTargets[0] === targetedSlot);
|
|
254
|
+
t('happy-path: synthesis.attempts=1', refineHappy.synthesis.attempts === 1);
|
|
255
|
+
t('happy-path: delta_summary set', !!refineHappy.delta_summary);
|
|
256
|
+
t('happy-path: empty warnings', refineHappy.warnings.length === 0);
|
|
257
|
+
t('happy-path: stub LLM exhausted', stubHappy.remaining() === 0);
|
|
258
|
+
|
|
259
|
+
// Validator-driven retry — first response invalid, second valid
|
|
260
|
+
const stubRetry = makeStubLLM([
|
|
261
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
262
|
+
{ ops: [{ type: 'rebindSlot', slot: 'not-real', chunks: [sampleBlocks[3]] }], delta_summary: 'bad' },
|
|
263
|
+
{ ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }], delta_summary: 'fixed' },
|
|
264
|
+
]);
|
|
265
|
+
const refineRetry = await refineFromIntent({
|
|
266
|
+
priorState,
|
|
267
|
+
intent: `change ${targetedSlot}`,
|
|
268
|
+
llmAdapter: stubRetry,
|
|
269
|
+
maxAttempts: 2,
|
|
270
|
+
catalog: [{ name: sampleBlocks[3], kind: 'block', primary: 'div', slots: [] }],
|
|
271
|
+
});
|
|
272
|
+
t('retry: succeeds on second attempt', refineRetry.ops.length === 1);
|
|
273
|
+
t('retry: synthesis.attempts=2', refineRetry.synthesis.attempts === 2);
|
|
274
|
+
t('retry: attemptsLog has both attempts', refineRetry.synthesis.attemptsLog.length === 2);
|
|
275
|
+
|
|
276
|
+
// Validator-exhausted — both attempts invalid
|
|
277
|
+
const accExhausted = createIssueAccumulator();
|
|
278
|
+
const stubExhaust = makeStubLLM([
|
|
279
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
280
|
+
{ ops: [{ type: 'rebindSlot', slot: 'not-real-1', chunks: [sampleBlocks[3]] }], delta_summary: 'bad1' },
|
|
281
|
+
{ ops: [{ type: 'rebindSlot', slot: 'not-real-2', chunks: [sampleBlocks[3]] }], delta_summary: 'bad2' },
|
|
282
|
+
]);
|
|
283
|
+
const refineExhaust = await refineFromIntent({
|
|
284
|
+
priorState,
|
|
285
|
+
intent: `change ${targetedSlot}`,
|
|
286
|
+
llmAdapter: stubExhaust,
|
|
287
|
+
maxAttempts: 2,
|
|
288
|
+
issueAccumulator: accExhausted,
|
|
289
|
+
catalog: [{ name: sampleBlocks[3], kind: 'block', primary: 'div', slots: [] }],
|
|
290
|
+
});
|
|
291
|
+
t('exhausted: returns empty ops', refineExhaust.ops.length === 0);
|
|
292
|
+
t('exhausted: warnings include "failed after 2 attempts"',
|
|
293
|
+
refineExhaust.warnings[0].includes('failed after 2 attempts'));
|
|
294
|
+
t('exhausted: auto-fires "validator-exhausted"',
|
|
295
|
+
accExhausted.reasons().includes('validator-exhausted'));
|
|
296
|
+
|
|
297
|
+
// locator-empty-targets: targeted=true but no slots returned
|
|
298
|
+
const accEmpty = createIssueAccumulator();
|
|
299
|
+
const stubEmpty = makeStubLLM([
|
|
300
|
+
{ targeted: true, target_slots: [] },
|
|
301
|
+
{ ops: [], delta_summary: 'no-op' },
|
|
302
|
+
]);
|
|
303
|
+
await refineFromIntent({
|
|
304
|
+
priorState,
|
|
305
|
+
intent: 'change something specific',
|
|
306
|
+
llmAdapter: stubEmpty,
|
|
307
|
+
issueAccumulator: accEmpty,
|
|
308
|
+
catalog: [],
|
|
309
|
+
});
|
|
310
|
+
t('locator-empty: auto-fires "locator-empty-targets"',
|
|
311
|
+
accEmpty.reasons().includes('locator-empty-targets'));
|
|
312
|
+
|
|
313
|
+
// Untargeted intent — no auto-fire on locator-empty
|
|
314
|
+
const accUntargeted = createIssueAccumulator();
|
|
315
|
+
const stubUntargeted = makeStubLLM([
|
|
316
|
+
{ targeted: false, target_slots: [] },
|
|
317
|
+
{ ops: [], delta_summary: 'no-op' },
|
|
318
|
+
]);
|
|
319
|
+
await refineFromIntent({
|
|
320
|
+
priorState,
|
|
321
|
+
intent: 'preserve everything',
|
|
322
|
+
llmAdapter: stubUntargeted,
|
|
323
|
+
issueAccumulator: accUntargeted,
|
|
324
|
+
catalog: [],
|
|
325
|
+
});
|
|
326
|
+
t('untargeted: no auto-fire on locator-empty',
|
|
327
|
+
!accUntargeted.reasons().includes('locator-empty-targets'));
|
|
328
|
+
|
|
329
|
+
// Missing llmAdapter — graceful fail
|
|
330
|
+
const refineNoLLM = await refineFromIntent({
|
|
331
|
+
priorState,
|
|
332
|
+
intent: 'anything',
|
|
333
|
+
llmAdapter: null,
|
|
334
|
+
});
|
|
335
|
+
t('no llmAdapter: returns empty ops', refineNoLLM.ops.length === 0);
|
|
336
|
+
t('no llmAdapter: warnings include "no llmAdapter"',
|
|
337
|
+
refineNoLLM.warnings[0].includes('no llmAdapter'));
|
|
338
|
+
|
|
339
|
+
// Missing priorState plan — graceful fail
|
|
340
|
+
const refineNoPlan = await refineFromIntent({
|
|
341
|
+
priorState: { state_id: 'x' },
|
|
342
|
+
intent: 'anything',
|
|
343
|
+
llmAdapter: makeStubLLM([{ targeted: false, target_slots: [] }]),
|
|
344
|
+
});
|
|
345
|
+
t('no priorState.plan: returns empty ops', refineNoPlan.ops.length === 0);
|
|
346
|
+
t('no priorState.plan: warnings flag the issue',
|
|
347
|
+
refineNoPlan.warnings[0].includes('no plan'));
|
|
348
|
+
|
|
349
|
+
console.log('\n=== integration: refine → apply → wire ===');
|
|
350
|
+
|
|
351
|
+
const stubE2E = makeStubLLM([
|
|
352
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
353
|
+
{
|
|
354
|
+
ops: [{ type: 'appendToSlot', slot: targetedSlot, chunks: [sampleBlocks[3]] }],
|
|
355
|
+
delta_summary: `appended ${sampleBlocks[3]} to ${targetedSlot}`,
|
|
356
|
+
},
|
|
357
|
+
]);
|
|
358
|
+
const refineE2E = await refineFromIntent({
|
|
359
|
+
priorState,
|
|
360
|
+
intent: `add ${sampleBlocks[3]} to ${targetedSlot}`,
|
|
361
|
+
llmAdapter: stubE2E,
|
|
362
|
+
catalog: [{ name: sampleBlocks[3], kind: 'block', primary: 'div', slots: [] }],
|
|
363
|
+
});
|
|
364
|
+
const applied = await applyOps({ priorState, ops: refineE2E.ops });
|
|
365
|
+
const a2uiMsgs = opsToA2UI(refineE2E.ops, applied.newState);
|
|
366
|
+
|
|
367
|
+
t('e2e: refine produces 1 op', refineE2E.ops.length === 1);
|
|
368
|
+
t('e2e: apply succeeds', applied.ops_applied.length === 1 && applied.ops_failed.length === 0);
|
|
369
|
+
t('e2e: a2ui message has updateComponents type', a2uiMsgs[0].type === 'updateComponents');
|
|
370
|
+
t('e2e: applied newState has materialized HTML',
|
|
371
|
+
applied.newState.html !== null && applied.newState.html.length > 0);
|
|
372
|
+
|
|
373
|
+
console.log(`\n${pass} passed, ${fail} failed`);
|
|
374
|
+
process.exit(fail ? 1 : 0);
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test: state-cache (LRU + state-id minting).
|
|
3
|
+
// Spec: docs/specs/genui-multiturn-architecture.md §2.2 + §4.4.
|
|
4
|
+
import {
|
|
5
|
+
StateCache,
|
|
6
|
+
mintStateId,
|
|
7
|
+
mintNextStateId,
|
|
8
|
+
getStateCache,
|
|
9
|
+
resetStateCache,
|
|
10
|
+
} from '../../compose/engines/zettel/state-cache.js';
|
|
11
|
+
|
|
12
|
+
let pass = 0, fail = 0;
|
|
13
|
+
const t = (label, ok, detail = '') => {
|
|
14
|
+
if (ok) { console.log(` ✓ ${label}`); pass++; }
|
|
15
|
+
else { console.log(` ✗ ${label} ${detail}`); fail++; }
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
console.log('=== State-cache LRU ===');
|
|
19
|
+
|
|
20
|
+
const cache = new StateCache({ maxSize: 3 });
|
|
21
|
+
t('empty cache size 0', cache.size() === 0);
|
|
22
|
+
t('miss returns null', cache.get('nope') === null);
|
|
23
|
+
t('has returns false on miss', cache.has('nope') === false);
|
|
24
|
+
|
|
25
|
+
cache.set('a', { intent: 'a', html: '<a/>' });
|
|
26
|
+
cache.set('b', { intent: 'b', html: '<b/>' });
|
|
27
|
+
cache.set('c', { intent: 'c', html: '<c/>' });
|
|
28
|
+
t('size at capacity', cache.size() === 3);
|
|
29
|
+
t('list returns insertion order', JSON.stringify(cache.list()) === JSON.stringify(['a', 'b', 'c']));
|
|
30
|
+
t('has returns true on hit', cache.has('a') && cache.has('b') && cache.has('c'));
|
|
31
|
+
|
|
32
|
+
// Eviction: 4th set drops oldest
|
|
33
|
+
cache.set('d', { intent: 'd', html: '<d/>' });
|
|
34
|
+
t('LRU evicts oldest on overflow', cache.size() === 3 && !cache.has('a') && cache.has('d'));
|
|
35
|
+
|
|
36
|
+
// Recency touch via get: bump 'b' to most-recent, then set 'e'.
|
|
37
|
+
// Order before: [b, c, d]. After get('b'): [c, d, b]. After set('e'): evict c → [d, b, e].
|
|
38
|
+
cache.get('b');
|
|
39
|
+
cache.set('e', { intent: 'e', html: '<e/>' });
|
|
40
|
+
t('get touches recency', cache.has('b') && cache.has('e') && !cache.has('c'));
|
|
41
|
+
|
|
42
|
+
// peek does NOT touch recency.
|
|
43
|
+
// Order: [d, b, e]. peek('b') → unchanged [d, b, e]. set('f') → evict d → [b, e, f].
|
|
44
|
+
const peeked = cache.peek('b');
|
|
45
|
+
t('peek returns entry', peeked?.intent === 'b');
|
|
46
|
+
cache.set('f', { intent: 'f', html: '<f/>' });
|
|
47
|
+
t('peek does not touch recency', cache.has('b') && cache.has('e') && cache.has('f') && !cache.has('d'));
|
|
48
|
+
|
|
49
|
+
// Update existing key bumps recency.
|
|
50
|
+
// Order before: [b, e, f]. set('b', ...) → delete-then-insert → [e, f, b].
|
|
51
|
+
cache.set('b', { intent: 'b-updated', html: '<b2/>' });
|
|
52
|
+
t('update existing key bumps recency', cache.get('b').intent === 'b-updated');
|
|
53
|
+
t('update preserves cache size', cache.size() === 3);
|
|
54
|
+
|
|
55
|
+
// Evict
|
|
56
|
+
t('evict returns true on hit', cache.evict('e') === true);
|
|
57
|
+
t('evict returns false on miss', cache.evict('e') === false);
|
|
58
|
+
t('evict drops entry', !cache.has('e') && cache.size() === 2);
|
|
59
|
+
|
|
60
|
+
// Clear
|
|
61
|
+
cache.clear();
|
|
62
|
+
t('clear empties cache', cache.size() === 0 && cache.list().length === 0);
|
|
63
|
+
|
|
64
|
+
console.log('\n=== State-id minting ===');
|
|
65
|
+
|
|
66
|
+
const id1 = mintStateId('admin dashboard with KPIs', 1);
|
|
67
|
+
t('id format: prefix-rand4-vN-unixmin', /^admin-[a-f0-9]{4}-v1-\d+$/.test(id1));
|
|
68
|
+
|
|
69
|
+
const id2 = mintStateId('build me a pricing page');
|
|
70
|
+
t('id uses first word as prefix', id2.startsWith('build-'));
|
|
71
|
+
|
|
72
|
+
t('id with empty intent uses fallback', /^state-[a-f0-9]{4}-v1-\d+$/.test(mintStateId('')));
|
|
73
|
+
t('id with whitespace-only intent uses fallback', /^state-[a-f0-9]{4}-v1-\d+$/.test(mintStateId(' ')));
|
|
74
|
+
t('id strips non-alphanumeric chars', /^foo-/.test(mintStateId('foo!@#$ bar')));
|
|
75
|
+
|
|
76
|
+
// Different mints → different ids in the vast majority of cases.
|
|
77
|
+
const ids = new Set();
|
|
78
|
+
for (let i = 0; i < 50; i++) ids.add(mintStateId('dash', 1));
|
|
79
|
+
t('50 mints produce mostly distinct rand4 values', ids.size >= 45);
|
|
80
|
+
|
|
81
|
+
// Next-id chain
|
|
82
|
+
const root = mintStateId('dash', 1);
|
|
83
|
+
const child = mintNextStateId(root, 2);
|
|
84
|
+
const [pref5, rand5] = root.split('-');
|
|
85
|
+
const [pref6, rand6, v6] = child.split('-');
|
|
86
|
+
t('next-id preserves prefix', pref5 === pref6);
|
|
87
|
+
t('next-id preserves rand4', rand5 === rand6);
|
|
88
|
+
t('next-id bumps version', v6 === 'v2');
|
|
89
|
+
|
|
90
|
+
const fallback = mintNextStateId('not-a-valid-id', 2);
|
|
91
|
+
t('next-id falls back on bad parent', /^[a-z]+-[a-f0-9]{4}-v2-\d+$/.test(fallback));
|
|
92
|
+
|
|
93
|
+
const fallbackEmpty = mintNextStateId(undefined, 3);
|
|
94
|
+
t('next-id falls back on undefined parent', /^state-[a-f0-9]{4}-v3-\d+$/.test(fallbackEmpty));
|
|
95
|
+
|
|
96
|
+
console.log('\n=== Singleton ===');
|
|
97
|
+
|
|
98
|
+
resetStateCache({ maxSize: 5 });
|
|
99
|
+
const c1 = getStateCache();
|
|
100
|
+
const c2 = getStateCache();
|
|
101
|
+
t('singleton returns same instance', c1 === c2);
|
|
102
|
+
c1.set('x', { intent: 'x' });
|
|
103
|
+
t('singleton state shared', c2.has('x'));
|
|
104
|
+
|
|
105
|
+
const c3 = resetStateCache();
|
|
106
|
+
t('reset replaces singleton', c3 !== c1 && c3.size() === 0);
|
|
107
|
+
|
|
108
|
+
console.log('\n=== Env-driven sizing ===');
|
|
109
|
+
|
|
110
|
+
process.env.A2UI_STATE_CACHE_SIZE = '7';
|
|
111
|
+
const sized = resetStateCache();
|
|
112
|
+
t('env-driven maxSize', sized.maxSize === 7);
|
|
113
|
+
|
|
114
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
115
|
+
const defaulted = resetStateCache();
|
|
116
|
+
t('default maxSize 64', defaulted.maxSize === 64);
|
|
117
|
+
|
|
118
|
+
process.env.A2UI_STATE_CACHE_SIZE = 'not-a-number';
|
|
119
|
+
const garbage = resetStateCache();
|
|
120
|
+
t('garbage env value falls back to default', garbage.maxSize === 64);
|
|
121
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
122
|
+
|
|
123
|
+
// Constructor opts override env
|
|
124
|
+
process.env.A2UI_STATE_CACHE_SIZE = '7';
|
|
125
|
+
const explicit = resetStateCache({ maxSize: 12 });
|
|
126
|
+
t('constructor maxSize wins over env', explicit.maxSize === 12);
|
|
127
|
+
delete process.env.A2UI_STATE_CACHE_SIZE;
|
|
128
|
+
|
|
129
|
+
console.log(`\n${pass} passed, ${fail} failed`);
|
|
130
|
+
process.exit(fail ? 1 : 0);
|
package/scripts/test-a2ui.mjs
CHANGED
|
@@ -264,6 +264,109 @@ try {
|
|
|
264
264
|
bad('Chunk corpus', e.message);
|
|
265
265
|
}
|
|
266
266
|
|
|
267
|
+
// ── Test 7: Multi-turn refinement modules (Phase A) ─────────────────
|
|
268
|
+
// Spec: docs/specs/genui-multiturn-architecture.md.
|
|
269
|
+
|
|
270
|
+
console.log('\n7. Multi-turn refinement (Phase A)');
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
const { StateCache, mintStateId, mintNextStateId } = await import('../../compose/engines/zettel/state-cache.js');
|
|
274
|
+
const { createIssueAccumulator } = await import('../../compose/engines/zettel/issue-reporter.js');
|
|
275
|
+
const { refineFromIntent, applyOps, opsToA2UI, validateOps } = await import('../../compose/engines/zettel/chunk-refiner.js');
|
|
276
|
+
const { listChunksByKind } = await import('../../corpus/scripts/chunk-library.js');
|
|
277
|
+
|
|
278
|
+
// 7a. State cache + state-id chain
|
|
279
|
+
const cache = new StateCache({ maxSize: 8 });
|
|
280
|
+
const root = mintStateId('admin dashboard', 1);
|
|
281
|
+
cache.set(root, { state_id: root, intent: 'admin dashboard', plan: { page: 'p', slot_bindings: {} }, ops_history: [], version: 1 });
|
|
282
|
+
const child = mintNextStateId(root, 2);
|
|
283
|
+
if (child.startsWith(root.split('-').slice(0, 2).join('-')) && child.includes('v2')) {
|
|
284
|
+
ok('State-id chain preserves stem + bumps version');
|
|
285
|
+
} else {
|
|
286
|
+
bad('State-id chain', `child=${child} root=${root}`);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// 7b. validateOps catches unknown chunks
|
|
290
|
+
const pages = listChunksByKind('page');
|
|
291
|
+
const blocks = listChunksByKind('block');
|
|
292
|
+
if (pages.length === 0 || blocks.length < 3) {
|
|
293
|
+
bad('Refiner test setup', 'corpus missing chunks');
|
|
294
|
+
} else {
|
|
295
|
+
const samplePage = pages.find((p) => (p.slots || p.instances?.[0]?.slots || []).length >= 1) || pages[0];
|
|
296
|
+
const samplePageSlots = (samplePage.slots || samplePage.instances?.[0]?.slots || []).map((s) => s.name);
|
|
297
|
+
const targetedSlot = samplePageSlots[0];
|
|
298
|
+
const priorState = {
|
|
299
|
+
state_id: root,
|
|
300
|
+
intent: 'admin dashboard',
|
|
301
|
+
plan: { page: samplePage.name, slot_bindings: { [targetedSlot]: [blocks[0].name] } },
|
|
302
|
+
html: '<placeholder/>',
|
|
303
|
+
version: 1,
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
const badOps = validateOps(
|
|
307
|
+
[{ type: 'rebindSlot', slot: targetedSlot, chunks: ['no-such-chunk-zzz'] }],
|
|
308
|
+
priorState,
|
|
309
|
+
);
|
|
310
|
+
if (!badOps.ok && badOps.errors.some((e) => /not found/.test(e))) {
|
|
311
|
+
ok('validateOps rejects unknown chunk');
|
|
312
|
+
} else {
|
|
313
|
+
bad('validateOps unknown chunk', JSON.stringify(badOps));
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// 7c. applyOps produces materialized HTML
|
|
317
|
+
const applied = await applyOps({
|
|
318
|
+
priorState,
|
|
319
|
+
ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [blocks[1].name] }],
|
|
320
|
+
});
|
|
321
|
+
if (applied.ops_applied.length === 1 && applied.newState.html) {
|
|
322
|
+
ok('applyOps materializes new HTML');
|
|
323
|
+
} else {
|
|
324
|
+
bad('applyOps materialize', `applied=${applied.ops_applied.length} html=${!!applied.newState.html}`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// 7d. Two-pass refineFromIntent with stub LLM
|
|
328
|
+
const stubResponses = [
|
|
329
|
+
{ targeted: true, target_slots: [targetedSlot] },
|
|
330
|
+
{ ops: [{ type: 'rebindSlot', slot: targetedSlot, chunks: [blocks[1].name] }], delta_summary: 'test' },
|
|
331
|
+
];
|
|
332
|
+
let qIdx = 0;
|
|
333
|
+
const stubLLM = {
|
|
334
|
+
complete: async () => ({ content: JSON.stringify(stubResponses[qIdx++]) }),
|
|
335
|
+
};
|
|
336
|
+
const refined = await refineFromIntent({
|
|
337
|
+
priorState,
|
|
338
|
+
intent: `change ${targetedSlot}`,
|
|
339
|
+
llmAdapter: stubLLM,
|
|
340
|
+
catalog: [{ name: blocks[1].name, kind: 'block', primary: 'div', slots: [] }],
|
|
341
|
+
});
|
|
342
|
+
if (refined.ops.length === 1 && refined.synthesis.targeted) {
|
|
343
|
+
ok('refineFromIntent two-pass synthesis');
|
|
344
|
+
} else {
|
|
345
|
+
bad('refineFromIntent', `ops=${refined.ops.length} targeted=${refined.synthesis.targeted}`);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// 7e. opsToA2UI emits updateComponents wire-format
|
|
349
|
+
const wire = opsToA2UI(refined.ops, applied.newState);
|
|
350
|
+
if (wire.length === 1 && wire[0].type === 'updateComponents' && wire[0].surfaceId === 'main') {
|
|
351
|
+
ok('opsToA2UI wraps as updateComponents');
|
|
352
|
+
} else {
|
|
353
|
+
bad('opsToA2UI', JSON.stringify(wire[0]));
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// 7f. IssueAccumulator coalesces via reasons()
|
|
357
|
+
const acc = createIssueAccumulator();
|
|
358
|
+
acc.add('locator-empty-targets', { intent: 't' });
|
|
359
|
+
acc.add('validator-exhausted', { tool: 't' });
|
|
360
|
+
if (acc.size() === 2 && acc.reasons().length === 2) {
|
|
361
|
+
ok('IssueAccumulator collects multiple auto-fires');
|
|
362
|
+
} else {
|
|
363
|
+
bad('IssueAccumulator', `size=${acc.size()}`);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
} catch (e) {
|
|
367
|
+
bad('Multi-turn modules', e.message);
|
|
368
|
+
}
|
|
369
|
+
|
|
267
370
|
// ── Summary ─────────────────────────────────────────────────────────
|
|
268
371
|
|
|
269
372
|
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|