@adia-ai/a2ui-compose 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/core/generator.js +31 -0
- package/package.json +1 -1
- package/strategies/registry.js +29 -1
- package/strategies/zettel/chunk-synthesizer.js +99 -33
- package/strategies/zettel/chunk-synthesizer.test.js +97 -0
- package/strategies/zettel/composer.js +18 -1
- package/strategies/zettel/generator-adapter.js +31 -1
- package/strategies/zettel/issue-reporter.js +10 -0
package/CHANGELOG.md
CHANGED
|
@@ -12,6 +12,78 @@ generator graph.
|
|
|
12
12
|
|
|
13
13
|
_No pending changes._
|
|
14
14
|
|
|
15
|
+
## [0.3.3] - 2026-05-07
|
|
16
|
+
|
|
17
|
+
**Lockstep cut.** All 9 published `@adia-ai/*` packages now share version `0.3.3`, governed by [`docs/specs/package-architecture.md` § 15](../../../docs/specs/package-architecture.md#15-versioning-policy). Internal `@adia-ai/*` ranges stay at `^0.3.0` (patch-cut asymmetry — caret floats `0.3.x`).
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- **`iteration-synthesis-failure` auto-fire policy reason** in
|
|
22
|
+
`strategies/zettel/issue-reporter.js`. Generator-adapter's
|
|
23
|
+
iteration synthesis failures now route through
|
|
24
|
+
`autoReport('iteration-synthesis-failure', ...)` instead of bare
|
|
25
|
+
`console.error`. Suppressed in eval mode; emits to
|
|
26
|
+
`.brain/audit-history/issues/` in production. (closes backlog #49)
|
|
27
|
+
|
|
28
|
+
- **`ID_PREFIX_SEPARATOR` exported constant** in
|
|
29
|
+
`strategies/zettel/composer.js` — the magic string `'--'` used
|
|
30
|
+
when prefixing fragment internal node ids with composition node
|
|
31
|
+
id. Documented choice rationale (double-dash chosen because single
|
|
32
|
+
`-` collides with kebab-case ids in primitives). (closes backlog #50)
|
|
33
|
+
|
|
34
|
+
- **`computeScopeDrift` + `SCOPE_DRIFT_RATIO` + `SCOPE_DRIFT_MIN_ACTUAL`
|
|
35
|
+
exports** in `strategies/zettel/chunk-synthesizer.js`. Internal
|
|
36
|
+
function + constants are now exported for testability. New
|
|
37
|
+
`chunk-synthesizer.test.js` (8/8 pass) covers no-drift,
|
|
38
|
+
ratio-below-gate, drift-fires, floor-prevents-false-positive,
|
|
39
|
+
malformed-corpus zero-expected guard, multi-chunk aggregation,
|
|
40
|
+
instances[]-shape support, exports. (closes backlog #52)
|
|
41
|
+
|
|
42
|
+
- **`A2UI_COMPOSE_TRACE` env-var support** in `core/generator.js`.
|
|
43
|
+
When set to a directory path, writes a per-request JSON trace
|
|
44
|
+
(full `_debug` payload + result) per `generateUI()` call, named
|
|
45
|
+
`<ISO-timestamp>--<intent-slug>.json`. `dialog-recorder.isRecording()`
|
|
46
|
+
honors the trace var so `_debug` payload is populated. New
|
|
47
|
+
`npm run compose:trace` script. (closes backlog #89)
|
|
48
|
+
|
|
49
|
+
### Changed
|
|
50
|
+
|
|
51
|
+
- **Cache-invalidation contract** documented inline in
|
|
52
|
+
`strategies/zettel/generator-adapter.js`. `loadAll()` itself is
|
|
53
|
+
idempotent (`fragments.clear()` + `compositions.clear()` + re-walk
|
|
54
|
+
on every call). The CACHING happens at the call-site:
|
|
55
|
+
`ensureBooted()` sets a process-singleton flag and never reloads
|
|
56
|
+
for the lifetime of the process. Trade-offs: good for long-running
|
|
57
|
+
MCP, bad for tests/hot-reload. To force a reload, call `loadAll()`
|
|
58
|
+
directly. (closes backlog #100)
|
|
59
|
+
|
|
60
|
+
## [0.3.2] - 2026-05-06
|
|
61
|
+
|
|
62
|
+
**9-package lockstep patch cut to v0.3.2.** All lockstep members share
|
|
63
|
+
one version per [`docs/specs/package-architecture.md` § 15](../../../docs/specs/package-architecture.md#15-versioning-policy).
|
|
64
|
+
Internal `@adia-ai/*` dep ranges unchanged at `^0.3.0`.
|
|
65
|
+
|
|
66
|
+
### Added
|
|
67
|
+
|
|
68
|
+
- **`chunk-zettel` engine** — registered as 5th built-in engine in
|
|
69
|
+
`strategies/registry.js`. Chunk-aware composition from training-chunk
|
|
70
|
+
corpus (page shells + block chunks). Fast path: sync keyword search
|
|
71
|
+
over async embeddings to prevent ranking drift.
|
|
72
|
+
|
|
73
|
+
### Fixed
|
|
74
|
+
|
|
75
|
+
- **Keyword-first fast path** — `composeFromIntent` prefers sync
|
|
76
|
+
`searchChunks` over `searchChunksAsync` (embeddings) for retrieval.
|
|
77
|
+
- **Whole-word keyword matching** — `keywordScore()` splits chunk names
|
|
78
|
+
on `[-_]` and uses `nameWords.includes(tok)`; prevents `"pane"` from
|
|
79
|
+
matching `"panel"`.
|
|
80
|
+
- **Coverage scoring** — PascalCase → kebab-case regex fix for
|
|
81
|
+
multi-word components (`AgentTrace` → `agent-trace-ui`).
|
|
82
|
+
|
|
83
|
+
### Changed
|
|
84
|
+
|
|
85
|
+
- `version`: `0.3.1` → `0.3.2`.
|
|
86
|
+
|
|
15
87
|
## [0.3.1] - 2026-05-06
|
|
16
88
|
|
|
17
89
|
**9-package lockstep patch cut.** All 9 published `@adia-ai/*` packages bump 0.3.0 → 0.3.1 per [`docs/specs/package-architecture.md` § 15](../../../docs/specs/package-architecture.md#15-versioning-policy). Internal `@adia-ai/*` dep ranges remain at `^0.3.0` (covers `0.3.1` under semver — patch-cut asymmetry).
|
package/core/generator.js
CHANGED
|
@@ -185,6 +185,37 @@ export async function generateUI({ intent, engine: engineName = 'monolithic', mo
|
|
|
185
185
|
// Strip the _debug payload before returning — it's an internal collaboration
|
|
186
186
|
// channel between engines and the recorder, not part of the public API.
|
|
187
187
|
// Without this strip the proxy would echo a 12KB+ system prompt to clients.
|
|
188
|
+
//
|
|
189
|
+
// Compose-trace flag: A2UI_COMPOSE_TRACE=<dir> writes the full debug payload
|
|
190
|
+
// (system prompt, raw LLM response, retrieval log, strategy decisions) to
|
|
191
|
+
// a per-request JSON file BEFORE the strip. Useful for debugging
|
|
192
|
+
// strategy-label decisions or reproducing eval failures. Off by default;
|
|
193
|
+
// file writes happen synchronously and add ~1-5ms per request.
|
|
194
|
+
const traceDir = process.env.A2UI_COMPOSE_TRACE;
|
|
195
|
+
if (traceDir && result._debug) {
|
|
196
|
+
try {
|
|
197
|
+
const { writeFile, mkdir } = await import('node:fs/promises');
|
|
198
|
+
const { join: pathJoin } = await import('node:path');
|
|
199
|
+
await mkdir(traceDir, { recursive: true });
|
|
200
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
201
|
+
const safeIntent = String(intent || 'unknown').slice(0, 40).replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
202
|
+
const tracePath = pathJoin(traceDir, `${ts}-${engineName}-${safeIntent}.json`);
|
|
203
|
+
await writeFile(tracePath, JSON.stringify({
|
|
204
|
+
timestamp: ts,
|
|
205
|
+
intent,
|
|
206
|
+
engine: engineName,
|
|
207
|
+
mode: effectiveMode,
|
|
208
|
+
executionId: result.executionId,
|
|
209
|
+
strategy: result.strategy || null,
|
|
210
|
+
validation: result.validation || null,
|
|
211
|
+
messageCount: result.messages?.length || 0,
|
|
212
|
+
debug: result._debug,
|
|
213
|
+
}, null, 2));
|
|
214
|
+
} catch (err) {
|
|
215
|
+
// Tracing is diagnostic — never let it break the request path.
|
|
216
|
+
console.error('[compose:trace] failed to write trace:', err.message);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
188
219
|
if (result._debug) delete result._debug;
|
|
189
220
|
return result;
|
|
190
221
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adia-ai/a2ui-compose",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"description": "AdiaUI A2UI compose engine \u2014 framework-agnostic. Takes natural-language intents + a catalog and produces A2UI protocol messages. Pairs with `@adia-ai/a2ui-retrieval` (intent classification, catalog lookup) and `@adia-ai/a2ui-validator` (schema + semantic checks).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
package/strategies/registry.js
CHANGED
|
@@ -118,11 +118,39 @@ async function generateZettelAdapter(ctx) {
|
|
|
118
118
|
};
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
+
async function generateChunkZettelAdapter(ctx) {
|
|
122
|
+
const { composeFromIntent } = await import('./zettel/chunk-synthesizer.js');
|
|
123
|
+
const result = await composeFromIntent({
|
|
124
|
+
intent: ctx.intent,
|
|
125
|
+
llmAdapter: ctx.llmAdapter || null,
|
|
126
|
+
maxAttempts: 2,
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
// Convert chunk-composition result to A2UI message shape.
|
|
130
|
+
const messages = result.html
|
|
131
|
+
? [{ type: 'updateComponents', components: [{ id: 'chunk-root', component: 'article', html: result.html }] }]
|
|
132
|
+
: [];
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
executionId: ctx.executionId,
|
|
136
|
+
messages,
|
|
137
|
+
validation: { score: result.html ? 70 : 0 },
|
|
138
|
+
strategy: result.source === 'retrieval' ? 'chunk-retrieval' : 'chunk-synthesis',
|
|
139
|
+
engine: 'chunk-zettel',
|
|
140
|
+
_debug: {
|
|
141
|
+
plan: result.plan || null,
|
|
142
|
+
warnings: result.warnings || [],
|
|
143
|
+
scopeDrift: result.scopeDrift || null,
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
121
148
|
export const ENGINES = {
|
|
122
149
|
'monolithic-instant': generateInstantAdapter,
|
|
123
150
|
'monolithic-pro': generateProAdapter,
|
|
124
151
|
'monolithic-thinking': generateThinkingAdapter,
|
|
125
152
|
'zettel': generateZettelAdapter,
|
|
153
|
+
'chunk-zettel': generateChunkZettelAdapter,
|
|
126
154
|
};
|
|
127
155
|
|
|
128
156
|
/**
|
|
@@ -147,7 +175,7 @@ export const ENGINES = {
|
|
|
147
175
|
* });
|
|
148
176
|
* generateUI({ engine: 'my-hybrid', intent: '...' });
|
|
149
177
|
*/
|
|
150
|
-
const RESERVED = new Set(['monolithic', 'monolithic-instant', 'monolithic-pro', 'monolithic-thinking', 'zettel']);
|
|
178
|
+
const RESERVED = new Set(['monolithic', 'monolithic-instant', 'monolithic-pro', 'monolithic-thinking', 'zettel', 'chunk-zettel']);
|
|
151
179
|
|
|
152
180
|
export function registerEngine(name, generateFn) {
|
|
153
181
|
if (typeof name !== 'string' || !name.length) {
|
|
@@ -35,36 +35,61 @@ const DEFAULT_MAX_ATTEMPTS = 2;
|
|
|
35
35
|
// chunks' component counts. A multiplier > SCOPE_DRIFT_RATIO trips a warning
|
|
36
36
|
// + auto-fires a `scope-drift` issue. Floor prevents false positives on
|
|
37
37
|
// small UIs where slot-wrapper noise dominates.
|
|
38
|
-
const SCOPE_DRIFT_RATIO = 1.5;
|
|
39
|
-
const SCOPE_DRIFT_MIN_ACTUAL = 20;
|
|
38
|
+
export const SCOPE_DRIFT_RATIO = 1.5;
|
|
39
|
+
export const SCOPE_DRIFT_MIN_ACTUAL = 20;
|
|
40
40
|
|
|
41
41
|
const SYSTEM_PROMPT = `You compose web-app pages by binding training chunks into named slots.
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
## YOUR TASK
|
|
44
|
+
|
|
45
|
+
Given a user intent and a catalog of chunks, return a JSON object that:
|
|
46
|
+
1. SELECTS the most appropriate page-kind shell
|
|
47
|
+
2. BINDS block/panel chunks into its slots
|
|
48
|
+
|
|
49
|
+
## STEP 1 — SELECT A PAGE SHELL
|
|
50
|
+
|
|
51
|
+
Page shells (kind=page or kind=panel) define the layout topology. Match the
|
|
52
|
+
intent domain to the right shell — NEVER default to dashboard-admin-page for
|
|
53
|
+
non-dashboard intents:
|
|
54
|
+
|
|
55
|
+
| Shell | Domain | Slots |
|
|
56
|
+
|-------|--------|-------|
|
|
57
|
+
| dashboard-admin-page | Admin/analytics dashboards | page-header, page-content |
|
|
58
|
+
| settings-page-shell | Settings, preferences, configuration | page-header, page-tabs, page-content |
|
|
59
|
+
| form-page-shell | Auth forms, sign-in, sign-up, profile | page-header, form-content, page-footer |
|
|
60
|
+
| marketing-page-shell | Landing pages, heroes, features, CTAs | hero, features, cta |
|
|
61
|
+
| error-page-shell | Error states (404, 500, permission denied) | error-content, navigation |
|
|
62
|
+
| editor-page-shell | Split-pane editors (code + preview) | editor-pane, preview-pane |
|
|
63
|
+
| onb-step-shell | Onboarding wizards, multi-step flows | page-story, page-header, page-content, page-footer |
|
|
64
|
+
| reg-step-shell | Registration flows, multi-step sign-up | page-story, page-header, page-content, page-footer |
|
|
65
|
+
|
|
66
|
+
Selection rule: the shell whose DOMAIN matches the intent keywords wins.
|
|
67
|
+
|
|
68
|
+
## STEP 2 — SELECT BLOCKS TO FILL SLOTS
|
|
69
|
+
|
|
70
|
+
- Search the block catalog for chunks whose name contains intent keywords.
|
|
71
|
+
- Prefer blocks whose primary tag matches the desired component (e.g.
|
|
72
|
+
"stat-ui" for stat cards, "chart-ui" for charts).
|
|
73
|
+
- If a slot is optional and no matching block exists, OMIT it rather
|
|
74
|
+
than forcing a random block.
|
|
75
|
+
- Never invent chunk names. Every bound name must appear in the catalog.
|
|
76
|
+
|
|
77
|
+
## STEP 3 — RETURN JSON
|
|
52
78
|
|
|
53
79
|
Return ONLY a JSON object shaped exactly like:
|
|
54
80
|
{
|
|
55
|
-
"page": "<name of
|
|
81
|
+
"page": "<name of page-kind chunk>",
|
|
56
82
|
"slot_bindings": {
|
|
57
|
-
"<slot-name>": "<bound chunk name>"
|
|
83
|
+
"<slot-name>": "<bound chunk name>"
|
|
58
84
|
OR
|
|
59
|
-
"<slot-name>": ["<chunk1>", "<chunk2>"]
|
|
85
|
+
"<slot-name>": ["<chunk1>", "<chunk2>"]
|
|
60
86
|
}
|
|
61
87
|
}
|
|
62
88
|
|
|
63
89
|
Rules:
|
|
64
90
|
- The "page" must be a chunk with kind="page" or kind="panel".
|
|
65
|
-
- Every slot
|
|
66
|
-
-
|
|
67
|
-
pages.
|
|
91
|
+
- Every slot key must be declared by the chosen page shell.
|
|
92
|
+
- Every bound value must be a real chunk name from the catalog.
|
|
68
93
|
- Return valid JSON. No prose, no comments, no markdown fences. Begin with "{".
|
|
69
94
|
`;
|
|
70
95
|
|
|
@@ -78,8 +103,8 @@ function buildCatalogSummary(chunks) {
|
|
|
78
103
|
}
|
|
79
104
|
|
|
80
105
|
function buildExamples() {
|
|
81
|
-
//
|
|
82
|
-
// the LLM
|
|
106
|
+
// Four canonical examples spanning different domains. Diverse examples
|
|
107
|
+
// prevent the LLM from defaulting to dashboard-shaped output.
|
|
83
108
|
return [
|
|
84
109
|
{
|
|
85
110
|
intent: 'admin dashboard with KPIs and a conversion funnel',
|
|
@@ -91,6 +116,38 @@ function buildExamples() {
|
|
|
91
116
|
},
|
|
92
117
|
},
|
|
93
118
|
},
|
|
119
|
+
{
|
|
120
|
+
intent: 'sign-in form with email and password',
|
|
121
|
+
output: {
|
|
122
|
+
page: 'form-page-shell',
|
|
123
|
+
slot_bindings: {
|
|
124
|
+
'page-header': 'auth-signin-card-password',
|
|
125
|
+
'form-content': 'auth-email-entry',
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
intent: 'settings page with tabs for general and billing',
|
|
131
|
+
output: {
|
|
132
|
+
page: 'settings-page-shell',
|
|
133
|
+
slot_bindings: {
|
|
134
|
+
'page-header': 'settings-general-form',
|
|
135
|
+
'page-tabs': 'check-combinations-settings-group',
|
|
136
|
+
'page-content': 'settings-general-form',
|
|
137
|
+
},
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
intent: 'marketing landing hero with feature cards',
|
|
142
|
+
output: {
|
|
143
|
+
page: 'marketing-page-shell',
|
|
144
|
+
slot_bindings: {
|
|
145
|
+
hero: 'hero-cta-simple',
|
|
146
|
+
features: 'empty-state-action',
|
|
147
|
+
cta: 'button-primary',
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
},
|
|
94
151
|
];
|
|
95
152
|
}
|
|
96
153
|
|
|
@@ -150,23 +207,31 @@ export async function composeFromIntent({ intent, llmAdapter, maxAttempts = DEFA
|
|
|
150
207
|
//
|
|
151
208
|
// Restricted to kind=block: page/panel chunks are SKELETONS that need
|
|
152
209
|
// slot-binding composition (Tier 2 handles them). Returning a skeleton
|
|
153
|
-
//
|
|
154
|
-
//
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
210
|
+
// Fast path — deterministic keyword-first; embeddings as tie-breaker only.
|
|
211
|
+
// Embeddings can drift (e.g. "pane" matching "panel" boosting unrelated
|
|
212
|
+
// chunks). Sync keyword search is stable and keyword-discoverability is
|
|
213
|
+
// the baseline guarantee. We run both and prefer the sync result when
|
|
214
|
+
// it meets the threshold; only use async when sync is weak but async
|
|
215
|
+
// is strong (e.g. semantic match on content not captured by keywords).
|
|
216
|
+
const syncHits = searchChunks(intent, { kind: 'block', limit: 5 });
|
|
217
|
+
const asyncHits = await searchChunksAsync(intent, { kind: 'block', limit: 5 });
|
|
218
|
+
|
|
219
|
+
const syncTop = syncHits[0];
|
|
220
|
+
const asyncTop = asyncHits[0];
|
|
221
|
+
const useSync = syncTop && syncTop.score >= STRONG_RETRIEVAL_SCORE;
|
|
222
|
+
const useAsync = !useSync && asyncTop && asyncTop.score >= STRONG_RETRIEVAL_SCORE;
|
|
223
|
+
|
|
224
|
+
if (useSync || useAsync) {
|
|
225
|
+
const hit = useSync ? syncTop : asyncTop;
|
|
226
|
+
const top = getChunk(hit.name);
|
|
158
227
|
const html = top.html || top.instances?.[0]?.html || '';
|
|
159
|
-
// Tier-1 fast path: sole bound chunk is the retrieved block. The gate
|
|
160
|
-
// is mostly a no-op here (ratio ≈ 1) but stays for symmetry — and to
|
|
161
|
-
// catch a corner case where retrieval returns a block that, post-render,
|
|
162
|
-
// expands far beyond its source (shouldn't happen, but worth detecting).
|
|
163
228
|
const scopeDrift = computeScopeDrift(html, [top]);
|
|
164
229
|
return {
|
|
165
230
|
html,
|
|
166
231
|
plan: null,
|
|
167
232
|
source: 'retrieval',
|
|
168
|
-
score:
|
|
169
|
-
cosineScore:
|
|
233
|
+
score: hit.score,
|
|
234
|
+
cosineScore: hit.cosineScore,
|
|
170
235
|
warnings: scopeDrift.drift
|
|
171
236
|
? [`scope drift: ${scopeDrift.actual} components in HTML vs ${scopeDrift.expected} in bound chunk (ratio ${scopeDrift.ratio.toFixed(2)}×)`]
|
|
172
237
|
: [],
|
|
@@ -211,15 +276,16 @@ export async function composeFromIntent({ intent, llmAdapter, maxAttempts = DEFA
|
|
|
211
276
|
// Trace: snapshot of the retrieval log for the issue-reporter to surface
|
|
212
277
|
// verbatim on bug tickets. Recorded once before the retry loop so it
|
|
213
278
|
// describes what the LLM actually saw.
|
|
279
|
+
const tier1HitList = useSync ? syncHits : (useAsync ? asyncHits : []);
|
|
214
280
|
const retrievalTrace = {
|
|
215
|
-
tier1Hits:
|
|
281
|
+
tier1Hits: tier1HitList.slice(0, 5).map((h) => ({
|
|
216
282
|
name: h.name,
|
|
217
283
|
score: Number(h.score.toFixed(3)),
|
|
218
284
|
kind: h.kind,
|
|
219
285
|
cosineScore: h.cosineScore != null ? Number(h.cosineScore.toFixed(3)) : null,
|
|
220
286
|
})),
|
|
221
287
|
tier1Threshold: STRONG_RETRIEVAL_SCORE,
|
|
222
|
-
tier1Pass:
|
|
288
|
+
tier1Pass: tier1HitList.length > 0 && tier1HitList[0].score >= STRONG_RETRIEVAL_SCORE,
|
|
223
289
|
catalogSize: filtered.length,
|
|
224
290
|
catalogPageNames: pageChunks.map((c) => c.name),
|
|
225
291
|
catalogPanelNames: panelChunks.map((c) => c.name),
|
|
@@ -331,7 +397,7 @@ function countComponents(html) {
|
|
|
331
397
|
* Returns { actual, expected, ratio, drift } where `drift` is true when
|
|
332
398
|
* actual exceeds SCOPE_DRIFT_RATIO × expected AND actual ≥ SCOPE_DRIFT_MIN_ACTUAL.
|
|
333
399
|
*/
|
|
334
|
-
function computeScopeDrift(html, boundChunks) {
|
|
400
|
+
export function computeScopeDrift(html, boundChunks) {
|
|
335
401
|
const actual = countComponents(html);
|
|
336
402
|
let expected = 0;
|
|
337
403
|
for (const c of boundChunks) {
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
computeScopeDrift,
|
|
4
|
+
SCOPE_DRIFT_RATIO,
|
|
5
|
+
SCOPE_DRIFT_MIN_ACTUAL,
|
|
6
|
+
} from './chunk-synthesizer.js';
|
|
7
|
+
|
|
8
|
+
// countComponents() in chunk-synthesizer counts elements that look like A2UI
|
|
9
|
+
// component instances. The exact regex matters for these tests; we synthesize
|
|
10
|
+
// HTML that's representative of real composed output.
|
|
11
|
+
function gridOfCards(n) {
|
|
12
|
+
const cards = Array.from({ length: n }, (_, i) =>
|
|
13
|
+
`<card-ui id="c${i}"><header-ui slot="heading">Card ${i}</header-ui><text-ui>body</text-ui></card-ui>`
|
|
14
|
+
).join('');
|
|
15
|
+
return `<grid-ui columns="3">${cards}</grid-ui>`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
describe('SCOPE_DRIFT_RATIO + computeScopeDrift', () => {
|
|
19
|
+
it('exports the constants', () => {
|
|
20
|
+
expect(SCOPE_DRIFT_RATIO).toBe(1.5);
|
|
21
|
+
expect(SCOPE_DRIFT_MIN_ACTUAL).toBe(20);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('reports no drift when actual ≤ expected', () => {
|
|
25
|
+
// 25 cards composed from a chunk with 25 cards
|
|
26
|
+
const html = gridOfCards(25);
|
|
27
|
+
const chunk = { html: gridOfCards(25) };
|
|
28
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
29
|
+
expect(drift.actual).toBeGreaterThanOrEqual(SCOPE_DRIFT_MIN_ACTUAL);
|
|
30
|
+
expect(drift.expected).toBeGreaterThanOrEqual(SCOPE_DRIFT_MIN_ACTUAL);
|
|
31
|
+
expect(drift.ratio).toBeLessThanOrEqual(1);
|
|
32
|
+
expect(drift.drift).toBe(false);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('reports no drift when ratio is below the gate (1.0 < ratio ≤ 1.5)', () => {
|
|
36
|
+
// 30 cards composed from 25 — ratio 1.2× below the 1.5× gate
|
|
37
|
+
const html = gridOfCards(30);
|
|
38
|
+
const chunk = { html: gridOfCards(25) };
|
|
39
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
40
|
+
expect(drift.ratio).toBeGreaterThan(1.0);
|
|
41
|
+
expect(drift.ratio).toBeLessThanOrEqual(SCOPE_DRIFT_RATIO);
|
|
42
|
+
expect(drift.drift).toBe(false);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('reports drift when ratio exceeds the gate (ratio > 1.5)', () => {
|
|
46
|
+
// 50 cards composed from 25 — ratio 2.0× exceeds the 1.5× gate
|
|
47
|
+
const html = gridOfCards(50);
|
|
48
|
+
const chunk = { html: gridOfCards(25) };
|
|
49
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
50
|
+
expect(drift.actual).toBeGreaterThanOrEqual(SCOPE_DRIFT_MIN_ACTUAL);
|
|
51
|
+
expect(drift.ratio).toBeGreaterThan(SCOPE_DRIFT_RATIO);
|
|
52
|
+
expect(drift.drift).toBe(true);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('does not trip the drift floor on small UIs (actual < SCOPE_DRIFT_MIN_ACTUAL)', () => {
|
|
56
|
+
// Tiny html (5 tags) vs even-tinier chunk (1 tag) — ratio 5× far above
|
|
57
|
+
// gate, but actual=5 < 20 floor
|
|
58
|
+
const html = '<div><span></span><span></span><span></span><span></span></div>';
|
|
59
|
+
const chunk = { html: '<div></div>' };
|
|
60
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
61
|
+
expect(drift.actual).toBeLessThan(SCOPE_DRIFT_MIN_ACTUAL);
|
|
62
|
+
expect(drift.ratio).toBeGreaterThan(SCOPE_DRIFT_RATIO);
|
|
63
|
+
expect(drift.drift).toBe(false); // floor prevents false positive on tiny UIs
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('handles the malformed-corpus case (zero-expected, non-zero actual)', () => {
|
|
67
|
+
// Bound chunk has no html field at all
|
|
68
|
+
const html = gridOfCards(50);
|
|
69
|
+
const chunk = { /* no html */ };
|
|
70
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
71
|
+
expect(drift.expected).toBe(0);
|
|
72
|
+
expect(drift.ratio).toBe(null); // guard returns null instead of Infinity
|
|
73
|
+
expect(drift.drift).toBe(false); // gate is skipped, not failed
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('aggregates expected across multiple bound chunks', () => {
|
|
77
|
+
// 30 actual; bound chunks 10 + 12 = 22 expected; ratio 30/22 ≈ 1.36 < 1.5
|
|
78
|
+
const html = gridOfCards(30);
|
|
79
|
+
const chunks = [
|
|
80
|
+
{ html: gridOfCards(10) },
|
|
81
|
+
{ html: gridOfCards(12) },
|
|
82
|
+
];
|
|
83
|
+
const drift = computeScopeDrift(html, chunks);
|
|
84
|
+
expect(drift.expected).toBeGreaterThanOrEqual(22);
|
|
85
|
+
expect(drift.ratio).toBeLessThanOrEqual(SCOPE_DRIFT_RATIO);
|
|
86
|
+
expect(drift.drift).toBe(false);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('reads chunk html from instances[0] when top-level html is absent', () => {
|
|
90
|
+
// Multi-instance chunk format: { name, instances: [{ html, ... }, ...] }
|
|
91
|
+
const html = gridOfCards(25);
|
|
92
|
+
const chunk = { name: 'multi', instances: [{ html: gridOfCards(25) }] };
|
|
93
|
+
const drift = computeScopeDrift(html, [chunk]);
|
|
94
|
+
expect(drift.expected).toBeGreaterThanOrEqual(SCOPE_DRIFT_MIN_ACTUAL);
|
|
95
|
+
expect(drift.drift).toBe(false);
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -18,13 +18,30 @@
|
|
|
18
18
|
|
|
19
19
|
import { getFragment } from './fragment-library.js';
|
|
20
20
|
|
|
21
|
+
/**
|
|
22
|
+
* Separator used when prefixing a fragment's internal node ids with the
|
|
23
|
+
* composition node id. Format: `{compNode}{ID_PREFIX_SEPARATOR}{fragNode}`.
|
|
24
|
+
*
|
|
25
|
+
* Why double-dash: single `-` collides with kebab-case ids that primitives
|
|
26
|
+
* commonly emit (`auth-card-header`, `card-header-heading`); double-dash
|
|
27
|
+
* has no observed natural occurrence in either composition node ids or
|
|
28
|
+
* fragment node ids, so the parse is unambiguous if anyone ever needs to
|
|
29
|
+
* reverse the prefixing.
|
|
30
|
+
*
|
|
31
|
+
* EXTERNAL CONTRACT: the renderer treats node ids as opaque strings, so
|
|
32
|
+
* changing this separator does not break A2UI consumers. But anything
|
|
33
|
+
* upstream that splits on `--` (issue-reporter ticket rendering, eval
|
|
34
|
+
* trace inspection, debug logs) will need to be updated in lockstep.
|
|
35
|
+
*/
|
|
36
|
+
export const ID_PREFIX_SEPARATOR = '--';
|
|
37
|
+
|
|
21
38
|
function cloneFragmentWithPrefix(fragment, prefix) {
|
|
22
39
|
const idMap = new Map();
|
|
23
40
|
const cloned = fragment.template.map((n) => ({ ...n }));
|
|
24
41
|
|
|
25
42
|
// Generate new ids
|
|
26
43
|
for (const node of cloned) {
|
|
27
|
-
const newId = `${prefix}
|
|
44
|
+
const newId = `${prefix}${ID_PREFIX_SEPARATOR}${node.id}`;
|
|
28
45
|
idMap.set(node.id, newId);
|
|
29
46
|
}
|
|
30
47
|
// Rewrite ids and children refs
|
|
@@ -30,6 +30,7 @@ import {
|
|
|
30
30
|
getTurns,
|
|
31
31
|
buildHistorySummary,
|
|
32
32
|
} from './session-store.js';
|
|
33
|
+
import { autoReport } from './issue-reporter.js';
|
|
33
34
|
import { validateSchema } from '../../../validator/validator.js';
|
|
34
35
|
|
|
35
36
|
let booted = false;
|
|
@@ -40,6 +41,18 @@ function ensureBooted() {
|
|
|
40
41
|
}
|
|
41
42
|
}
|
|
42
43
|
|
|
44
|
+
// NOTE on cache invalidation: `loadAll()` itself reloads fragments + compositions
|
|
45
|
+
// from disk every time it's called — `fragments.clear()` + `compositions.clear()`
|
|
46
|
+
// at the top, then re-walk. The CACHING happens here at the call-site: we set
|
|
47
|
+
// `booted = true` after the first load and never reload for the lifetime of
|
|
48
|
+
// this process. Trade-offs:
|
|
49
|
+
// - GOOD for long-running MCP: zero corpus-load cost on subsequent requests.
|
|
50
|
+
// - BAD for tests / hot-reload: a fresh fragment file isn't visible until
|
|
51
|
+
// the process restarts.
|
|
52
|
+
// To force a reload (e.g. in a test that just wrote a new fragment file),
|
|
53
|
+
// call `loadAll()` directly — it's idempotent. The `booted` flag here is a
|
|
54
|
+
// process-singleton optimization, not a correctness invariant.
|
|
55
|
+
|
|
43
56
|
// Retrieval score threshold — above this we trust the match and emit verbatim;
|
|
44
57
|
// below, fall through to LLM synthesis (creative composition from fragments).
|
|
45
58
|
// Calibrated on the 100-intent held-out set:
|
|
@@ -105,8 +118,25 @@ export async function generateZettel({ intent, mode = 'instant', llmAdapter = nu
|
|
|
105
118
|
};
|
|
106
119
|
} catch (err) {
|
|
107
120
|
// If iteration synthesis fails, fall through to the normal path. Record
|
|
108
|
-
// the failure so the next turn can see we tried
|
|
121
|
+
// the failure so the next turn can see we tried, and auto-fire an issue
|
|
122
|
+
// ticket so synthesis-owners can investigate the failure post-hoc.
|
|
109
123
|
console.error('[zettel] iteration synthesis failed:', err.message);
|
|
124
|
+
try {
|
|
125
|
+
await autoReport(
|
|
126
|
+
'iteration-synthesis-failure',
|
|
127
|
+
{
|
|
128
|
+
intent,
|
|
129
|
+
turn: priorTurns.length + 1,
|
|
130
|
+
state_id: sessionId,
|
|
131
|
+
body: `Auto-fired by generator-adapter. Iteration synthesis threw on turn ${priorTurns.length + 1}.\n\nError: \`${err.message}\``,
|
|
132
|
+
tags: ['generator-adapter', `turn-${priorTurns.length + 1}`],
|
|
133
|
+
},
|
|
134
|
+
{ evalMode: mode === 'eval' }
|
|
135
|
+
);
|
|
136
|
+
} catch (reportErr) {
|
|
137
|
+
// Never let issue-reporting crash the request path.
|
|
138
|
+
console.error('[zettel] autoReport failed:', reportErr.message);
|
|
139
|
+
}
|
|
110
140
|
}
|
|
111
141
|
}
|
|
112
142
|
|
|
@@ -85,6 +85,16 @@ export const AUTO_FIRE_POLICY = {
|
|
|
85
85
|
return `Scope drift${ratio ? ' ' + ratio : ''}: composed HTML exceeds bound-chunk envelope${intent}`;
|
|
86
86
|
},
|
|
87
87
|
},
|
|
88
|
+
'iteration-synthesis-failure': {
|
|
89
|
+
type: 'bug',
|
|
90
|
+
severity: 'drift',
|
|
91
|
+
suggested_owner: 'synthesis',
|
|
92
|
+
titleFor: (ctx) => {
|
|
93
|
+
const turn = ctx?.turn != null ? ` on turn ${ctx.turn}` : '';
|
|
94
|
+
const intent = ctx?.intent ? ` for "${truncate(ctx.intent, 40)}"` : '';
|
|
95
|
+
return `Iteration synthesis failed${turn}${intent}`;
|
|
96
|
+
},
|
|
97
|
+
},
|
|
88
98
|
};
|
|
89
99
|
|
|
90
100
|
function truncate(s, n = 60) {
|