@adia-ai/a2ui-compose 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +86 -0
- package/README.md +181 -0
- package/engine/artifacts.js +262 -0
- package/engine/constitution.md +78 -0
- package/engine/context-store.js +218 -0
- package/engine/generator.js +500 -0
- package/engine/pattern-export.js +149 -0
- package/engine/pipeline/engine.js +289 -0
- package/engine/pipeline/types.js +91 -0
- package/engine/reference.js +115 -0
- package/engine/state.js +15 -0
- package/engines/monolithic/_shared.js +1320 -0
- package/engines/monolithic/generate-instant.js +229 -0
- package/engines/monolithic/generate-pro.js +367 -0
- package/engines/monolithic/generate-thinking.js +211 -0
- package/engines/registry.js +195 -0
- package/engines/zettel/_smoke.js +37 -0
- package/engines/zettel/composer.js +146 -0
- package/engines/zettel/fragment-library.js +209 -0
- package/engines/zettel/generate.js +15 -0
- package/engines/zettel/generator-adapter.js +202 -0
- package/engines/zettel/session-store.js +121 -0
- package/engines/zettel/synthesizer.js +343 -0
- package/evals/harness.mjs +193 -0
- package/index.js +16 -0
- package/llm/adapters/anthropic.js +106 -0
- package/llm/adapters/gemini.js +99 -0
- package/llm/adapters/index.js +138 -0
- package/llm/adapters/openai.js +85 -0
- package/llm/adapters/sse.js +50 -0
- package/llm/llm-bridge.js +214 -0
- package/llm/llm-stub.js +69 -0
- package/package.json +41 -0
- package/transpiler/transpiler-maps.js +277 -0
- package/transpiler/transpiler.js +820 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-powered composition synthesis for zettel.
|
|
3
|
+
*
|
|
4
|
+
* When pure retrieval fails (no composition match or weak top score), ask an
|
|
5
|
+
* LLM to assemble a composition from the fragment catalog. The LLM gets:
|
|
6
|
+
* - the intent
|
|
7
|
+
* - the fragment catalog (name, semantic_role, shape, slots, description)
|
|
8
|
+
* - 2–3 existing compositions as in-context examples
|
|
9
|
+
*
|
|
10
|
+
* It must return JSON matching the composition schema — a template array where
|
|
11
|
+
* nodes either declare a component inline OR reference a fragment via $fragment
|
|
12
|
+
* with bindings. We validate the output against slots (required bindings present,
|
|
13
|
+
* fragment names exist) before feeding it to the composer.
|
|
14
|
+
*
|
|
15
|
+
* This is the reasoning layer on top of pure retrieval. It turns fragments into
|
|
16
|
+
* the LLM's typed vocabulary — smaller, more structured than a 97-pattern corpus.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { getAllFragments, getAllCompositions } from './fragment-library.js';
|
|
20
|
+
import { resolveComposition, templateToMessages } from './composer.js';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Build a fragment catalog summary for the LLM prompt — name, role, shape,
|
|
24
|
+
* slot contract, description, AND the internal template so the LLM can inline
|
|
25
|
+
* the fragment if it needs to edit interior nodes (technique B in the prompt).
|
|
26
|
+
*/
|
|
27
|
+
function buildFragmentCatalog() {
|
|
28
|
+
const frags = getAllFragments();
|
|
29
|
+
return frags.map((f) => ({
|
|
30
|
+
name: f.name,
|
|
31
|
+
semantic_role: f.semantic_role,
|
|
32
|
+
shape: f.shape,
|
|
33
|
+
description: f.description,
|
|
34
|
+
keywords: f.keywords || [],
|
|
35
|
+
slots: (f.slots || []).map((s) => ({
|
|
36
|
+
name: s.name,
|
|
37
|
+
attribute: s.attribute,
|
|
38
|
+
required: !!s.required,
|
|
39
|
+
defaultValue: s.defaultValue,
|
|
40
|
+
description: s.description,
|
|
41
|
+
})),
|
|
42
|
+
template: f.template,
|
|
43
|
+
}));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Pick 2–3 in-context example compositions. We want variety (different domains)
|
|
48
|
+
* so the LLM sees the composition shape across intents.
|
|
49
|
+
*/
|
|
50
|
+
function buildExamples(count = 3) {
|
|
51
|
+
const comps = getAllCompositions();
|
|
52
|
+
const byDomain = new Map();
|
|
53
|
+
for (const c of comps) {
|
|
54
|
+
if (!byDomain.has(c.domain)) byDomain.set(c.domain, c);
|
|
55
|
+
if (byDomain.size >= count) break;
|
|
56
|
+
}
|
|
57
|
+
return [...byDomain.values()].map((c) => ({
|
|
58
|
+
name: c.name,
|
|
59
|
+
domain: c.domain,
|
|
60
|
+
description: c.description,
|
|
61
|
+
template: c.template,
|
|
62
|
+
}));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const SYSTEM_PROMPT = `You are a UI composer. Given a user intent, you assemble a UI composition from a catalog of reusable fragments.
|
|
66
|
+
|
|
67
|
+
⚠️ ABSOLUTE OUTPUT CONTRACT ⚠️
|
|
68
|
+
Your ENTIRE response must be a single JSON object. No prose before or after. No clarifying
|
|
69
|
+
questions. No "I'll add..." narration. No markdown fences. Just JSON.
|
|
70
|
+
If a user request is ambiguous, make a best-guess decision and emit the JSON. Never ask.
|
|
71
|
+
|
|
72
|
+
A composition is a flat-adjacency array of A2UI nodes. Each node either:
|
|
73
|
+
(a) declares a component inline:
|
|
74
|
+
{ "id": "foo", "component": "Column", "children": ["a","b"], "gap": "3" }
|
|
75
|
+
(b) references a fragment by name with slot bindings:
|
|
76
|
+
{ "id": "foo", "$fragment": "labeled-input",
|
|
77
|
+
"bindings": { "label": "Email", "name": "email", "type": "email" } }
|
|
78
|
+
|
|
79
|
+
Rules:
|
|
80
|
+
1. Exactly one root node with id "root". Other nodes reference it via children arrays.
|
|
81
|
+
2. Every $fragment ref must name a fragment that exists in the catalog. Never invent fragment names.
|
|
82
|
+
3. Every required slot on a referenced fragment MUST appear in bindings.
|
|
83
|
+
4. Prefer fragments over inline nodes when an atom fits — they carry semantics and reuse.
|
|
84
|
+
5. Use inline nodes (Column/Row/Card/Section/Text/Button/etc.) for layout, glue, and gaps between fragments.
|
|
85
|
+
6. Keep compositions compact — 6 to 15 nodes is typical. Don't over-engineer.
|
|
86
|
+
|
|
87
|
+
EXTENDING FRAGMENTS — four techniques when a fragment almost fits but needs more.
|
|
88
|
+
STRONGLY PREFER technique (A) for "add X above/below the heading" requests — those
|
|
89
|
+
are structurally OUTSIDE the header slot grid, not inside it.
|
|
90
|
+
|
|
91
|
+
A) Place content ABOVE/BELOW a fragment (outside it — MOST ROBUST).
|
|
92
|
+
When the user says "add a logo above the heading" in a form, place a new node
|
|
93
|
+
BEFORE the header fragment at the composition root:
|
|
94
|
+
{ "id": "root", "component": "Card", "children": ["brand", "hdr", "sec", "ftr"] }
|
|
95
|
+
{ "id": "brand", "component": "Row", "children": ["logo"], "justify": "center" }
|
|
96
|
+
{ "id": "logo", "component": "Image", "src": "/logo.svg", "alt": "Brand" }
|
|
97
|
+
{ "id": "hdr", "$fragment": "card-header-with-description", "bindings": { … } }
|
|
98
|
+
⚠️ BUT: Card direct children are restricted to <header>, <section>, <footer>.
|
|
99
|
+
For logos above the header, prefer technique B (inline the header) OR slot
|
|
100
|
+
the logo into the header with slot="icon" via technique C.
|
|
101
|
+
|
|
102
|
+
B) Inject extra children INTO a fragment's root WITH a slot declaration.
|
|
103
|
+
A $fragment node can declare prependChildren or appendChildren (arrays of node
|
|
104
|
+
ids). Injected children should declare slot="…" when the host uses slot-grid
|
|
105
|
+
layout. Example — add a logo to a card-header:
|
|
106
|
+
{ "id": "hdr", "$fragment": "card-header-with-description",
|
|
107
|
+
"bindings": { "heading": "Sign in", "description": "..." },
|
|
108
|
+
"prependChildren": ["logo"] }
|
|
109
|
+
{ "id": "logo", "component": "Image", "src": "/logo.svg", "alt": "Brand",
|
|
110
|
+
"slot": "icon" }
|
|
111
|
+
Common host slots:
|
|
112
|
+
- Card <header>: icon | heading | description | action
|
|
113
|
+
- Card <section>: (default)
|
|
114
|
+
- Card <footer>: (default)
|
|
115
|
+
Without a slot attribute, children flow to the default slot and may not lay
|
|
116
|
+
out as expected on slot-grid hosts.
|
|
117
|
+
|
|
118
|
+
C) Inline the fragment (when you need to edit INTERIOR nodes or restructure the
|
|
119
|
+
layout). Copy the fragment's own template nodes into your composition with
|
|
120
|
+
fresh ids. Bindings and slot indirection go away. Each fragment in the catalog
|
|
121
|
+
below shows its internal template so you know what to copy. Inlined nodes lose
|
|
122
|
+
reuse; use only when (A) and (B) can't solve the problem.
|
|
123
|
+
|
|
124
|
+
D) Compose fragments side by side (two fragments together make the shape).
|
|
125
|
+
Place multiple $fragment refs as siblings inside a layout node.
|
|
126
|
+
|
|
127
|
+
When HISTORY is provided, the user is building on PRIOR turns. You MUST:
|
|
128
|
+
- Treat the latest turn's template as the starting point.
|
|
129
|
+
- Apply the user's new instruction as a MODIFICATION to that template (add, remove, replace, or edit bindings on existing nodes).
|
|
130
|
+
- Preserve node ids from the prior template wherever possible so the canvas edits look incremental, not regenerated from scratch.
|
|
131
|
+
- For additive requests inside a fragment ("add X"), prefer technique (B) WITH a slot declaration matching the host's slot vocabulary. If the host has no matching slot, fall back to (C) inlining.
|
|
132
|
+
- For structural/interior edits, use technique (C) inlining.
|
|
133
|
+
- Only generate a fully fresh template if the new intent is clearly a topic change.
|
|
134
|
+
|
|
135
|
+
Return ONLY a JSON object: { "template": [...nodes] }. No prose, no markdown fences.`;
|
|
136
|
+
|
|
137
|
+
function buildUserPrompt(intent, fragmentCatalog, examples, historySummary = null) {
|
|
138
|
+
const fragLines = fragmentCatalog.map((f) => {
|
|
139
|
+
const slotStr = f.slots.length
|
|
140
|
+
? f.slots.map((s) => `${s.name}${s.required ? '*' : ''}`).join(', ')
|
|
141
|
+
: '(no slots)';
|
|
142
|
+
// Compact template skeleton for technique (B) — ids + component types only
|
|
143
|
+
const tpl = (f.template || []).map((n) => {
|
|
144
|
+
const bits = { id: n.id, component: n.component };
|
|
145
|
+
if (n.children) bits.children = n.children;
|
|
146
|
+
return bits;
|
|
147
|
+
});
|
|
148
|
+
return ` - ${f.name} [${f.semantic_role}] — ${f.description}
|
|
149
|
+
slots: ${slotStr}
|
|
150
|
+
template: ${JSON.stringify(tpl)}`;
|
|
151
|
+
}).join('\n');
|
|
152
|
+
|
|
153
|
+
const exLines = examples.map((ex) => {
|
|
154
|
+
return `--- example: ${ex.name} (${ex.domain}) ---\n${ex.description}\n${JSON.stringify({ template: ex.template }, null, 2)}`;
|
|
155
|
+
}).join('\n\n');
|
|
156
|
+
|
|
157
|
+
const historyBlock = historySummary
|
|
158
|
+
? `\nHISTORY (most recent turn last — build on this):\n${historySummary}\n\n`
|
|
159
|
+
: '\n';
|
|
160
|
+
|
|
161
|
+
return `INTENT: ${intent}
|
|
162
|
+
${historyBlock}FRAGMENT CATALOG:
|
|
163
|
+
${fragLines}
|
|
164
|
+
|
|
165
|
+
EXAMPLES:
|
|
166
|
+
${exLines}
|
|
167
|
+
|
|
168
|
+
Compose a UI for the intent. If HISTORY is present, build on the latest turn's template. Return { "template": [...] } only.`;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Extract the FIRST top-level JSON object from a possibly-messy LLM response.
|
|
173
|
+
* Handles markdown fences and leading/trailing prose.
|
|
174
|
+
*/
|
|
175
|
+
function extractJSON(text) {
|
|
176
|
+
if (!text) return null;
|
|
177
|
+
// Strip markdown fences
|
|
178
|
+
let t = text.trim();
|
|
179
|
+
const fence = t.match(/^```(?:json)?\s*([\s\S]*?)```$/m);
|
|
180
|
+
if (fence) t = fence[1].trim();
|
|
181
|
+
// Find first { and its matching }
|
|
182
|
+
const start = t.indexOf('{');
|
|
183
|
+
if (start === -1) return null;
|
|
184
|
+
let depth = 0;
|
|
185
|
+
let inStr = false;
|
|
186
|
+
let esc = false;
|
|
187
|
+
for (let i = start; i < t.length; i++) {
|
|
188
|
+
const c = t[i];
|
|
189
|
+
if (esc) { esc = false; continue; }
|
|
190
|
+
if (c === '\\') { esc = true; continue; }
|
|
191
|
+
if (c === '"') { inStr = !inStr; continue; }
|
|
192
|
+
if (inStr) continue;
|
|
193
|
+
if (c === '{') depth++;
|
|
194
|
+
else if (c === '}') {
|
|
195
|
+
depth--;
|
|
196
|
+
if (depth === 0) {
|
|
197
|
+
try { return JSON.parse(t.slice(start, i + 1)); } catch { return null; }
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Validate a synthesized template before we try to resolve it. Returns
|
|
206
|
+
* { ok: true, errors: [] } or { ok: false, errors: [msg, ...] }.
|
|
207
|
+
*/
|
|
208
|
+
function validateSynthesis(template, fragmentsByName) {
|
|
209
|
+
const errors = [];
|
|
210
|
+
if (!Array.isArray(template) || template.length === 0) {
|
|
211
|
+
return { ok: false, errors: ['template is empty or not an array'] };
|
|
212
|
+
}
|
|
213
|
+
const root = template.find((n) => n.id === 'root');
|
|
214
|
+
if (!root) errors.push('no node with id "root"');
|
|
215
|
+
|
|
216
|
+
const ids = new Set(template.map((n) => n.id).filter(Boolean));
|
|
217
|
+
for (const node of template) {
|
|
218
|
+
if (!node.id) errors.push('node is missing id');
|
|
219
|
+
if (node.$fragment) {
|
|
220
|
+
const frag = fragmentsByName.get(node.$fragment);
|
|
221
|
+
if (!frag) {
|
|
222
|
+
errors.push(`unknown $fragment: ${node.$fragment} (node ${node.id})`);
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
const bindings = node.bindings || {};
|
|
226
|
+
for (const slot of frag.slots || []) {
|
|
227
|
+
if (slot.required && !(slot.name in bindings) && slot.defaultValue === undefined) {
|
|
228
|
+
errors.push(`missing required binding "${slot.name}" on fragment ${node.$fragment} (node ${node.id})`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
} else if (node.component) {
|
|
232
|
+
// Inline node — children refs must resolve
|
|
233
|
+
const childArrays = [
|
|
234
|
+
node.children,
|
|
235
|
+
];
|
|
236
|
+
for (const arr of childArrays) {
|
|
237
|
+
if (Array.isArray(arr)) {
|
|
238
|
+
for (const c of arr) {
|
|
239
|
+
if (typeof c === 'string' && !ids.has(c)) {
|
|
240
|
+
errors.push(`child "${c}" on node "${node.id}" does not resolve`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
} else {
|
|
246
|
+
errors.push(`node "${node.id}" has neither $fragment nor component`);
|
|
247
|
+
}
|
|
248
|
+
// prependChildren / appendChildren on fragment refs must resolve too
|
|
249
|
+
for (const key of ['prependChildren', 'appendChildren']) {
|
|
250
|
+
const arr = node[key];
|
|
251
|
+
if (!Array.isArray(arr)) continue;
|
|
252
|
+
for (const c of arr) {
|
|
253
|
+
if (typeof c === 'string' && !ids.has(c)) {
|
|
254
|
+
errors.push(`${key} ref "${c}" on node "${node.id}" does not resolve`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return { ok: errors.length === 0, errors };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Main entry: synthesize a composition from an intent using an LLM.
|
|
265
|
+
*
|
|
266
|
+
* @param {object} opts
|
|
267
|
+
* @param {string} opts.intent
|
|
268
|
+
* @param {object} opts.llmAdapter — must expose `complete({ messages, systemPrompt })`
|
|
269
|
+
* @param {string} [opts.historySummary] — optional prior-turn context (see session-store.js)
|
|
270
|
+
* @param {number} [opts.maxAttempts=2] — retry with feedback on validation failure
|
|
271
|
+
* @returns {Promise<{ template: Array, messages: Array, synthesis: object }>}
|
|
272
|
+
* `synthesis` contains llmRawResponse, attempts, finalValidation for debugging.
|
|
273
|
+
*/
|
|
274
|
+
export async function synthesizeComposition({ intent, llmAdapter, historySummary = null, maxAttempts = 3 }) {
|
|
275
|
+
if (!llmAdapter?.complete) {
|
|
276
|
+
throw new Error('synthesizeComposition requires an llmAdapter with .complete()');
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const fragmentCatalog = buildFragmentCatalog();
|
|
280
|
+
const fragmentsByName = new Map(getAllFragments().map((f) => [f.name, f]));
|
|
281
|
+
const examples = buildExamples(3);
|
|
282
|
+
|
|
283
|
+
const userPrompt = buildUserPrompt(intent, fragmentCatalog, examples, historySummary);
|
|
284
|
+
let lastError = null;
|
|
285
|
+
const attempts = [];
|
|
286
|
+
|
|
287
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
288
|
+
const retryNudge = lastError
|
|
289
|
+
? `\n\nPREVIOUS ATTEMPT FAILED: ${lastError}. Return ONLY a JSON object shaped as { "template": [...] }. No prose, no questions, no clarifications — just JSON.`
|
|
290
|
+
: '';
|
|
291
|
+
const messages = [{ role: 'user', content: userPrompt + retryNudge }];
|
|
292
|
+
const response = await llmAdapter.complete({ messages, systemPrompt: SYSTEM_PROMPT });
|
|
293
|
+
const raw = response?.content || response?.text || (typeof response === 'string' ? response : '');
|
|
294
|
+
attempts.push({ attempt: i + 1, raw });
|
|
295
|
+
|
|
296
|
+
const parsed = extractJSON(raw);
|
|
297
|
+
if (!parsed || !Array.isArray(parsed.template)) {
|
|
298
|
+
// Detect prose / clarification-style responses specifically
|
|
299
|
+
const isProse = /^(I['’]ll |Sure|Certainly|To do this|Here's)/i.test(raw.trim());
|
|
300
|
+
lastError = isProse
|
|
301
|
+
? 'Response was conversational prose, not JSON. Do not ask questions. Emit the composition directly with your best-guess defaults.'
|
|
302
|
+
: 'Response was not valid JSON with a template array.';
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const { ok, errors } = validateSynthesis(parsed.template, fragmentsByName);
|
|
307
|
+
if (!ok) {
|
|
308
|
+
lastError = errors.slice(0, 3).join('; ');
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Resolve via the composer
|
|
313
|
+
try {
|
|
314
|
+
const resolved = resolveComposition({ template: parsed.template });
|
|
315
|
+
const messagesOut = [{
|
|
316
|
+
type: 'updateComponents',
|
|
317
|
+
components: resolved.map((n) => {
|
|
318
|
+
const {
|
|
319
|
+
id, component, children,
|
|
320
|
+
$fragment, bindings,
|
|
321
|
+
prependChildren, appendChildren,
|
|
322
|
+
...rest
|
|
323
|
+
} = n;
|
|
324
|
+
return { id, component, children: children || [], ...rest };
|
|
325
|
+
}),
|
|
326
|
+
}];
|
|
327
|
+
return {
|
|
328
|
+
template: parsed.template,
|
|
329
|
+
messages: messagesOut,
|
|
330
|
+
synthesis: {
|
|
331
|
+
attempts: i + 1,
|
|
332
|
+
attemptsLog: attempts,
|
|
333
|
+
validation: { ok: true, errors: [] },
|
|
334
|
+
usedHistory: !!historySummary,
|
|
335
|
+
},
|
|
336
|
+
};
|
|
337
|
+
} catch (e) {
|
|
338
|
+
lastError = `Composer failed to resolve: ${e.message}`;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
throw new Error(`Synthesis failed after ${maxAttempts} attempts: ${lastError}`);
|
|
343
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generator-agnostic eval harness.
|
|
3
|
+
*
|
|
4
|
+
* Contract (generator return):
|
|
5
|
+
* {
|
|
6
|
+
* messages?: Array, // A2UI messages (may be empty)
|
|
7
|
+
* validation?: { score: number }, // 0-100 (or absent)
|
|
8
|
+
* strategy?: string, // architecture-specific label
|
|
9
|
+
* retrieval?: { // optional retrieval metadata
|
|
10
|
+
* hit: boolean, // top result considered relevant
|
|
11
|
+
* rank: number|null, // 1-indexed rank of first relevant candidate
|
|
12
|
+
* candidate: string|null, // name of chosen pattern/composition
|
|
13
|
+
* },
|
|
14
|
+
* fragments_used?: string[], // zettel-only
|
|
15
|
+
* ...extra
|
|
16
|
+
* }
|
|
17
|
+
*
|
|
18
|
+
* Two runners:
|
|
19
|
+
* - runHarness() legacy shape — preserved byte-for-byte for back-compat
|
|
20
|
+
* - runHarnessV2() augmented shape — coverage / avgScoreWhenEmitted /
|
|
21
|
+
* retrievalMRR / per-intent strategy & fragment data
|
|
22
|
+
*/
|
|
23
|
+
import { readFile } from 'node:fs/promises';
|
|
24
|
+
import { join, dirname } from 'node:path';
|
|
25
|
+
import { fileURLToPath } from 'node:url';
|
|
26
|
+
|
|
27
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
28
|
+
|
|
29
|
+
export async function loadHeldOut() {
|
|
30
|
+
// held-out.jsonl lives in the sibling @adia-ai/a2ui-corpus package.
|
|
31
|
+
// Callers can still pass `intents` explicitly; this fallback is the default.
|
|
32
|
+
// Path from here: packages/a2ui/compose/evals/ → ../.. → packages/a2ui/
|
|
33
|
+
// → corpus/evals/held-out.jsonl.
|
|
34
|
+
const evalsPath = join(__dirname, '..', '..', 'corpus', 'evals', 'held-out.jsonl');
|
|
35
|
+
const raw = await readFile(evalsPath, 'utf8');
|
|
36
|
+
return raw.trim().split('\n').map((l) => JSON.parse(l));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function computeF1(result, evalItem) {
|
|
40
|
+
// Handle both message shapes:
|
|
41
|
+
// mcp: [{ components: [{ component: 'Card', ... }] }]
|
|
42
|
+
// zettel: [{ messageType: 'beginComponent', componentType: 'Card', ... }]
|
|
43
|
+
const msgs = result.messages || [];
|
|
44
|
+
const types = new Set();
|
|
45
|
+
for (const m of msgs) {
|
|
46
|
+
if (Array.isArray(m.components)) {
|
|
47
|
+
for (const c of m.components) if (c.component) types.add(c.component);
|
|
48
|
+
}
|
|
49
|
+
if (m.componentType) types.add(m.componentType);
|
|
50
|
+
}
|
|
51
|
+
const expected = new Set(evalItem.expected_components || []);
|
|
52
|
+
const tp = [...expected].filter((t) => types.has(t)).length;
|
|
53
|
+
const precision = types.size ? tp / types.size : 0;
|
|
54
|
+
const recall = expected.size ? tp / expected.size : 0;
|
|
55
|
+
const f1 = precision + recall > 0 ? (2 * precision * recall) / (precision + recall) : 0;
|
|
56
|
+
return { precision, recall, f1 };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── Legacy runner (byte-compat with pre-2026-04-17 inline code) ──
|
|
60
|
+
|
|
61
|
+
export async function runHarness({ generate, domain, limit, mode = 'instant', intents }) {
|
|
62
|
+
let items = intents || (await loadHeldOut());
|
|
63
|
+
if (domain) items = items.filter((i) => i.domain === domain);
|
|
64
|
+
if (limit) items = items.slice(0, limit);
|
|
65
|
+
|
|
66
|
+
const results = [];
|
|
67
|
+
for (const evalItem of items) {
|
|
68
|
+
try {
|
|
69
|
+
const result = await generate({ intent: evalItem.intent, mode });
|
|
70
|
+
const score = result.validation?.score || 0;
|
|
71
|
+
const { f1 } = computeF1(result, evalItem);
|
|
72
|
+
results.push({
|
|
73
|
+
id: evalItem.id,
|
|
74
|
+
intent: evalItem.intent,
|
|
75
|
+
score,
|
|
76
|
+
f1: Math.round(f1 * 100),
|
|
77
|
+
pass: score >= (evalItem.expected_validation_min || 70),
|
|
78
|
+
});
|
|
79
|
+
} catch (e) {
|
|
80
|
+
results.push({
|
|
81
|
+
id: evalItem.id,
|
|
82
|
+
intent: evalItem.intent,
|
|
83
|
+
score: 0,
|
|
84
|
+
f1: 0,
|
|
85
|
+
pass: false,
|
|
86
|
+
error: e.message,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const passCount = results.filter((r) => r.pass).length;
|
|
92
|
+
const avgScore = Math.round(
|
|
93
|
+
results.reduce((s, r) => s + r.score, 0) / (results.length || 1),
|
|
94
|
+
);
|
|
95
|
+
return {
|
|
96
|
+
total: results.length,
|
|
97
|
+
pass: passCount,
|
|
98
|
+
avgScore,
|
|
99
|
+
passRate: Math.round((passCount / (results.length || 1)) * 100),
|
|
100
|
+
results,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ── V2 runner (coverage-aware, architecture-fair) ──
|
|
105
|
+
|
|
106
|
+
export async function runHarnessV2({ generate, domain, limit, mode = 'instant', intents, label = 'generator' }) {
|
|
107
|
+
let items = intents || (await loadHeldOut());
|
|
108
|
+
if (domain) items = items.filter((i) => i.domain === domain);
|
|
109
|
+
if (limit) items = items.slice(0, limit);
|
|
110
|
+
|
|
111
|
+
const results = [];
|
|
112
|
+
for (const evalItem of items) {
|
|
113
|
+
const row = {
|
|
114
|
+
id: evalItem.id,
|
|
115
|
+
intent: evalItem.intent,
|
|
116
|
+
domain: evalItem.domain,
|
|
117
|
+
strategy: null,
|
|
118
|
+
messagesEmitted: false,
|
|
119
|
+
validationScore: null,
|
|
120
|
+
precision: null,
|
|
121
|
+
recall: null,
|
|
122
|
+
componentF1: null,
|
|
123
|
+
retrievalHit: null,
|
|
124
|
+
retrievalRank: null,
|
|
125
|
+
candidate: null,
|
|
126
|
+
fragmentsUsed: null,
|
|
127
|
+
pass: false,
|
|
128
|
+
error: null,
|
|
129
|
+
};
|
|
130
|
+
try {
|
|
131
|
+
const result = await generate({ intent: evalItem.intent, mode });
|
|
132
|
+
const emitted = Array.isArray(result.messages) && result.messages.length > 0;
|
|
133
|
+
row.messagesEmitted = emitted;
|
|
134
|
+
row.strategy = result.strategy || (emitted ? 'emitted' : 'miss');
|
|
135
|
+
if (emitted) {
|
|
136
|
+
row.validationScore = result.validation?.score ?? null;
|
|
137
|
+
const { precision, recall, f1 } = computeF1(result, evalItem);
|
|
138
|
+
row.precision = Math.round(precision * 100);
|
|
139
|
+
row.recall = Math.round(recall * 100);
|
|
140
|
+
row.componentF1 = Math.round(f1 * 100);
|
|
141
|
+
row.pass = (row.validationScore ?? 0) >= (evalItem.expected_validation_min || 70);
|
|
142
|
+
}
|
|
143
|
+
if (result.retrieval) {
|
|
144
|
+
row.retrievalHit = !!result.retrieval.hit;
|
|
145
|
+
row.retrievalRank = result.retrieval.rank ?? null;
|
|
146
|
+
row.candidate = result.retrieval.candidate ?? null;
|
|
147
|
+
}
|
|
148
|
+
if (Array.isArray(result.fragments_used)) {
|
|
149
|
+
row.fragmentsUsed = result.fragments_used;
|
|
150
|
+
}
|
|
151
|
+
} catch (e) {
|
|
152
|
+
row.error = e.message;
|
|
153
|
+
}
|
|
154
|
+
results.push(row);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ── Aggregates ──
|
|
158
|
+
const emitted = results.filter((r) => r.messagesEmitted);
|
|
159
|
+
const coverage = results.length ? emitted.length / results.length : 0;
|
|
160
|
+
const avgScoreWhenEmitted = emitted.length
|
|
161
|
+
? emitted.reduce((s, r) => s + (r.validationScore || 0), 0) / emitted.length
|
|
162
|
+
: 0;
|
|
163
|
+
const avgF1WhenEmitted = emitted.length
|
|
164
|
+
? emitted.reduce((s, r) => s + (r.componentF1 || 0), 0) / emitted.length
|
|
165
|
+
: 0;
|
|
166
|
+
const passCount = results.filter((r) => r.pass).length;
|
|
167
|
+
|
|
168
|
+
// Mean reciprocal rank over intents with retrieval metadata
|
|
169
|
+
const withRetrieval = results.filter((r) => r.retrievalRank != null);
|
|
170
|
+
const retrievalMRR = withRetrieval.length
|
|
171
|
+
? withRetrieval.reduce((s, r) => s + 1 / r.retrievalRank, 0) / withRetrieval.length
|
|
172
|
+
: null;
|
|
173
|
+
|
|
174
|
+
const strategyBreakdown = results.reduce((acc, r) => {
|
|
175
|
+
const k = r.strategy || 'unknown';
|
|
176
|
+
acc[k] = (acc[k] || 0) + 1;
|
|
177
|
+
return acc;
|
|
178
|
+
}, {});
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
label,
|
|
182
|
+
total: results.length,
|
|
183
|
+
coverage: Math.round(coverage * 100), // % intents that emitted
|
|
184
|
+
emitted: emitted.length,
|
|
185
|
+
pass: passCount,
|
|
186
|
+
passRate: Math.round((passCount / (results.length || 1)) * 100),
|
|
187
|
+
avgScoreWhenEmitted: Math.round(avgScoreWhenEmitted),
|
|
188
|
+
avgF1WhenEmitted: Math.round(avgF1WhenEmitted),
|
|
189
|
+
retrievalMRR: retrievalMRR == null ? null : Math.round(retrievalMRR * 1000) / 1000,
|
|
190
|
+
strategyBreakdown,
|
|
191
|
+
results,
|
|
192
|
+
};
|
|
193
|
+
}
|
package/index.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @adia-ai/a2ui-compose — main entry.
|
|
3
|
+
*
|
|
4
|
+
* Re-exports the most common consumer surface. For deeper access, reach
|
|
5
|
+
* via subpaths:
|
|
6
|
+
*
|
|
7
|
+
* import { generateUI } from '@adia-ai/a2ui-compose';
|
|
8
|
+
* import { pick, registerEngine } from '@adia-ai/a2ui-compose/engines/registry';
|
|
9
|
+
* import { generateZettel } from '@adia-ai/a2ui-compose/engines/zettel';
|
|
10
|
+
* import { llmBridge } from '@adia-ai/a2ui-compose/llm';
|
|
11
|
+
*
|
|
12
|
+
* See README for the full public surface.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export { generateUI, generateUIStream } from './engine/generator.js';
|
|
16
|
+
export { pick, listEngines, registerEngine, unregisterEngine, ENGINES } from './engines/registry.js';
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Messages API adapter.
|
|
3
|
+
* Endpoint: https://api.anthropic.com/v1/messages
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readSSE } from './sse.js';
|
|
7
|
+
|
|
8
|
+
const API_URL = 'https://api.anthropic.com/v1/messages';
|
|
9
|
+
const API_VERSION = '2023-06-01';
|
|
10
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
11
|
+
|
|
12
|
+
export const anthropic = {
|
|
13
|
+
name: 'anthropic',
|
|
14
|
+
|
|
15
|
+
buildRequest(opts) {
|
|
16
|
+
const body = {
|
|
17
|
+
model: opts.model,
|
|
18
|
+
max_tokens: opts.maxTokens || DEFAULT_MAX_TOKENS,
|
|
19
|
+
messages: opts.messages,
|
|
20
|
+
stream: !!opts.stream,
|
|
21
|
+
};
|
|
22
|
+
if (opts.system) {
|
|
23
|
+
// Prompt caching: the AdiaUI system prompt is ~23KB and constant across
|
|
24
|
+
// a session. Emitting it as a cached block marks it as a cache breakpoint
|
|
25
|
+
// (ephemeral, ~5 min TTL). First call = cache write (+25% cost), every
|
|
26
|
+
// subsequent call in the window = cache read (−90% cost). No-op below
|
|
27
|
+
// the model's minimum cacheable size (1024 tok Sonnet/Opus, 2048 Haiku).
|
|
28
|
+
body.system = opts.cache
|
|
29
|
+
? [{ type: 'text', text: opts.system, cache_control: { type: 'ephemeral' } }]
|
|
30
|
+
: opts.system;
|
|
31
|
+
}
|
|
32
|
+
if (opts.temperature != null) body.temperature = opts.temperature;
|
|
33
|
+
if (opts.thinking) {
|
|
34
|
+
body.thinking = { type: 'enabled', budget_tokens: opts.thinkingBudget || 10000 };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
url: opts.proxyUrl || API_URL,
|
|
39
|
+
headers: {
|
|
40
|
+
'content-type': 'application/json',
|
|
41
|
+
'x-api-key': opts.apiKey,
|
|
42
|
+
'anthropic-version': API_VERSION,
|
|
43
|
+
},
|
|
44
|
+
body,
|
|
45
|
+
};
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
parseResponse(data) {
|
|
49
|
+
const text = data.content?.find(b => b.type === 'text')?.text ?? '';
|
|
50
|
+
return {
|
|
51
|
+
text,
|
|
52
|
+
usage: {
|
|
53
|
+
input: data.usage?.input_tokens ?? 0,
|
|
54
|
+
output: data.usage?.output_tokens ?? 0,
|
|
55
|
+
// Cache telemetry: non-zero cacheRead on turn 2+ is the signal that
|
|
56
|
+
// caching is actually kicking in. Recorded per-turn for hit-rate analysis.
|
|
57
|
+
cacheCreation: data.usage?.cache_creation_input_tokens ?? 0,
|
|
58
|
+
cacheRead: data.usage?.cache_read_input_tokens ?? 0,
|
|
59
|
+
},
|
|
60
|
+
stopReason: data.stop_reason ?? 'end',
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
async *parseStream(response) {
|
|
65
|
+
let snapshot = '';
|
|
66
|
+
let usage = { input: 0, output: 0, cacheCreation: 0, cacheRead: 0 };
|
|
67
|
+
let stopReason = 'end';
|
|
68
|
+
|
|
69
|
+
for await (const event of readSSE(response.body)) {
|
|
70
|
+
if (event.done) break;
|
|
71
|
+
let data;
|
|
72
|
+
try { data = JSON.parse(event.data); } catch { continue; }
|
|
73
|
+
const eventType = event.event ?? data.type;
|
|
74
|
+
|
|
75
|
+
switch (eventType) {
|
|
76
|
+
case 'message_start':
|
|
77
|
+
if (data.message?.usage) {
|
|
78
|
+
usage.input = data.message.usage.input_tokens ?? 0;
|
|
79
|
+
usage.cacheCreation = data.message.usage.cache_creation_input_tokens ?? 0;
|
|
80
|
+
usage.cacheRead = data.message.usage.cache_read_input_tokens ?? 0;
|
|
81
|
+
}
|
|
82
|
+
break;
|
|
83
|
+
case 'content_block_delta': {
|
|
84
|
+
const delta = data.delta;
|
|
85
|
+
if (delta?.type === 'text_delta') {
|
|
86
|
+
snapshot += delta.text;
|
|
87
|
+
yield { type: 'text', text: delta.text, snapshot };
|
|
88
|
+
} else if (delta?.type === 'thinking_delta') {
|
|
89
|
+
yield { type: 'thinking', text: delta.thinking };
|
|
90
|
+
}
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
case 'message_delta':
|
|
94
|
+
if (data.delta?.stop_reason) stopReason = data.delta.stop_reason;
|
|
95
|
+
if (data.usage) usage.output = data.usage.output_tokens ?? 0;
|
|
96
|
+
break;
|
|
97
|
+
case 'message_stop':
|
|
98
|
+
yield { type: 'done', text: snapshot, usage, stopReason };
|
|
99
|
+
break;
|
|
100
|
+
case 'error':
|
|
101
|
+
yield { type: 'error', error: new Error(data.error?.message ?? 'Stream error') };
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
};
|