@adia-ai/a2ui-mcp 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -11,6 +11,24 @@ zettel strategies.
11
11
 
12
12
  _No pending changes._
13
13
 
14
+ ## [0.4.7] - 2026-05-12
15
+
16
+ ### Changed — smoke + test scripts aligned to post-§72 retrieval surface
17
+
18
+ `scripts/smoke-engine-registry.mjs` retrieval probe set + `scripts/test-a2ui.mjs` (composition-count threshold + spot-checks + intent-gate keyword surface) now exercise the harvested-chunks substrate that survives §72's `corpus/patterns/` + `corpus/compositions/` retirement. Probe for "pricing tiers" dropped (no pricing surface in shipped `/site/`); replaced with "admin dashboard with kpi cards" matching `dashboard-admin-page`. Spot-check names updated to real chunk names (`auth-signin-card-password`, `auth-signup-entry`, `dashboard-admin-page`, `settings-admin-page`). No tool / API change — internal scripts only.
19
+
20
+ ## [0.4.6] - 2026-05-12
21
+
22
+ ### Changed — patterns surface retired (§64 step 5, 2026-05-12)
23
+
24
+ Companion to `@adia-ai/a2ui-compose@[Unreleased]` + `@adia-ai/a2ui-retrieval@[Unreleased]` retiring the legacy `pattern-library.js` surface in favor of `composition-library` as the canonical retrieval source.
25
+
26
+ - **`mcp/server.js`** — removed pattern-surface MCP tools that read from the retired `pattern-library` (the surface had been gradually deprecated through §62-§64; this commit cuts the last consumer paths). Tool count net change reflected in `mcp/TOOLS.md`.
27
+ - **`mcp/scripts/eval-fix.mjs`** — retired the pattern-surface fix-up logic (no longer reachable; compositions are the canonical retrieval shape).
28
+ - **`mcp/scripts/test-a2ui.mjs`** — assertions updated to reflect the post-§64 tool inventory.
29
+
30
+ See root [CHANGELOG.md `[Unreleased]`](../../../CHANGELOG.md) for the cross-cutting §64 arc narrative.
31
+
14
32
  ## [0.4.5] - 2026-05-12
15
33
 
16
34
  ### Ride-along (no source changes)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adia-ai/a2ui-mcp",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "AdiaUI A2UI MCP server. Exposes the compose engine over MCP with an engine selector for monolithic + zettel strategies.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -44,10 +44,15 @@ console.log(`\n[smoke] shape invariants: ${ok ? 'ok' : 'FAIL'}`);
44
44
  // component tree's text content must overlap the intent's keywords.
45
45
  // This catches retrieval regressions (wrong-domain top hit) that pure
46
46
  // shape-validation gates miss.
47
+ // Probes pick intents that match the post-§65 harvested-chunks
48
+ // substrate (auth flows, dashboard variants, settings, errors).
49
+ // Removed: 'pricing tiers' (no pricing surface in shipped /site/ —
50
+ // retrieval honestly returns synthesis-failed; LLM fallback handles
51
+ // the intent at ~9s vs ~25ms).
47
52
  const RETRIEVAL_PROBES = [
48
53
  { intent: 'login form with email and password', engine: 'zettel', expectKeywords: ['sign in', 'login', 'email', 'password'] },
49
- { intent: 'pricing tiers with three plans', engine: 'zettel', expectKeywords: ['pricing', 'tier', 'plan', 'starter', 'pro', 'enterprise', '$'] },
50
54
  { intent: 'sign up form for a new account', engine: 'zettel', expectKeywords: ['sign up', 'register', 'create account', 'email'] },
55
+ { intent: 'admin dashboard with kpi cards', engine: 'zettel', expectKeywords: ['dashboard', 'kpi', 'metric', 'revenue', 'users', 'orders', 'conversion'] },
51
56
  ];
52
57
 
53
58
  function extractText(messages) {
@@ -72,20 +72,23 @@ try {
72
72
  bad('LLM adapter', e.message);
73
73
  }
74
74
 
75
- // ── Test 2: Pattern library ─────────────────────────────────────────
75
+ // ── Test 2: Composition library (post-§65 chunks-only substrate) ───
76
76
 
77
- console.log('\n2. Pattern library');
77
+ console.log('\n2. Composition library');
78
78
 
79
79
  const { searchBlocks, listPatterns, lookupDomain } = await import('../../compose/core/reference.js');
80
80
 
81
- const allPatterns = listPatterns();
82
- const withTemplates = allPatterns.filter(p => p.template && Array.isArray(p.template));
83
- const domains = [...new Set(allPatterns.map(p => p.domain).filter(Boolean))];
81
+ const allCompositions = listPatterns();
82
+ const withTemplates = allCompositions.filter(p => p.template && Array.isArray(p.template));
83
+ const domains = [...new Set(allCompositions.map(p => p.domain).filter(Boolean))];
84
84
 
85
- if (allPatterns.length >= 70) {
86
- ok('Pattern count', `${allPatterns.length} total (${withTemplates.length} with templates)`);
85
+ // Post-§65: retrieval surface is the harvested-chunks substrate
86
+ // (~28-32 annotated chunks at the time of v0.4.7). Threshold sized
87
+ // for that floor; grows naturally as more source HTML gets annotated.
88
+ if (allCompositions.length >= 20) {
89
+ ok('Composition count', `${allCompositions.length} total (${withTemplates.length} with templates)`);
87
90
  } else {
88
- bad('Pattern count', `only ${allPatterns.length} (expected 70+)`);
91
+ bad('Composition count', `only ${allCompositions.length} (expected 20+)`);
89
92
  }
90
93
 
91
94
  if (domains.length >= 3) {
@@ -94,14 +97,17 @@ if (domains.length >= 3) {
94
97
  bad('Domains', `only ${domains.length}: ${domains.join(', ')}`);
95
98
  }
96
99
 
97
- // Spot-check known patterns
98
- const spotChecks = ['login-form', 'dashboard', 'data-table-view', 'user-profile'];
99
- const foundAll = spotChecks.every(name => allPatterns.some(p => p.name === name));
100
+ // Spot-check chunk names that exist in the harvested substrate. These
101
+ // are real chunk names from /apps/user-flow/, /apps/saas/, etc. —
102
+ // post-§65 the test asserts on actual product surfaces, not on
103
+ // curated composition JSON that's no longer the canonical source.
104
+ const spotChecks = ['auth-signin-card-password', 'auth-signup-entry', 'dashboard-admin-page', 'settings-admin-page'];
105
+ const foundAll = spotChecks.every(name => allCompositions.some(p => p.name === name));
100
106
  if (foundAll) {
101
- ok('Known patterns', spotChecks.join(', '));
107
+ ok('Known chunks', spotChecks.join(', '));
102
108
  } else {
103
- const missing = spotChecks.filter(name => !allPatterns.some(p => p.name === name));
104
- bad('Known patterns', `missing: ${missing.join(', ')}`);
109
+ const missing = spotChecks.filter(name => !allCompositions.some(p => p.name === name));
110
+ bad('Known chunks', `missing: ${missing.join(', ')}`);
105
111
  }
106
112
 
107
113
  // ── Test 3: Instant mode gate ───────────────────────────────────────
@@ -118,14 +124,20 @@ function testGate(intent) {
118
124
  const intentWords = intent.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !GATE_STOPS.has(w));
119
125
  const nameWords = best.name.toLowerCase().split(/[-_\s]+/);
120
126
  const matchTags = (best.tags || []).map(t => t.toLowerCase());
127
+ // Post-§65: harvested chunks carry semantic intent in `keywords` more
128
+ // than in `tags` (which became {complexity, layout} slots). Include
129
+ // keywords in the gate so `login → auth-signin-card-password` strong-hits
130
+ // off the chunk's `keywords: ["login", ...]` field.
131
+ const matchKeywords = (best.keywords || []).map(k => k.toLowerCase());
121
132
  const matchDomain = (best.domain || '').toLowerCase();
122
133
 
123
134
  const hasStrongHit = intentWords.some(w => {
124
135
  if (w.length < 3) return false;
125
- if (nameWords.includes(w) || matchTags.includes(w)) return true;
136
+ if (nameWords.includes(w) || matchTags.includes(w) || matchKeywords.includes(w)) return true;
126
137
  if (w.length >= 4) {
127
138
  return nameWords.some(n => n.length >= 3 && (w.startsWith(n) || n.startsWith(w))) ||
128
- matchTags.some(t => t.length >= 3 && (w.startsWith(t) || t.startsWith(w)));
139
+ matchTags.some(t => t.length >= 3 && (w.startsWith(t) || t.startsWith(w))) ||
140
+ matchKeywords.some(k => k.length >= 3 && (w.startsWith(k) || k.startsWith(w)));
129
141
  }
130
142
  return false;
131
143
  });
@@ -133,19 +145,26 @@ function testGate(intent) {
133
145
  const hasWeakHit = !hasStrongHit && intentWords.some(w => {
134
146
  return nameWords.some(n => n.length >= 3 && (n.includes(w) || w.includes(n))) ||
135
147
  matchTags.some(t => t.length >= 3 && (t.includes(w) || w.includes(t))) ||
148
+ matchKeywords.some(k => k.length >= 3 && (k.includes(w) || w.includes(k))) ||
136
149
  matchDomain.includes(w);
137
150
  });
138
151
 
139
152
  return { gate: hasStrongHit ? 'STRONG' : hasWeakHit ? 'WEAK' : 'REJECTED', pattern: best.name };
140
153
  }
141
154
 
142
- // Should STRONG match
155
+ // Should STRONG match — restricted to intents covered by the
156
+ // harvested-chunks substrate (auth, dashboard, settings, error pages).
157
+ // Intents previously tested ("pricing table", "chat interface",
158
+ // "todo list", etc.) dropped because §65 retired the curated
159
+ // composition surface — LLM fallback handles those now.
160
+ // Intents need ≥2 content-token hits OR a direct name-token match —
161
+ // short 1-content-word intents (e.g. just "login form") get gated out
162
+ // by composition-library's anti-spurious-match logic. Use intents that
163
+ // land naturally — they're what real users type anyway.
143
164
  const strongTests = [
144
- ['login form', 'login-form'],
145
- ['nav bar', null], // any match is fine
146
- ['dashboard stats', null],
147
- ['pricing table', null],
148
- ['chat interface', null],
165
+ ['login with email and password', null], // → auth-signin-card-password (3 keyword hits)
166
+ ['admin dashboard kpi', null], // dashboard-admin-page
167
+ ['workspace admin settings', null], // → settings-admin-page
149
168
  ];
150
169
  for (const [intent, expected] of strongTests) {
151
170
  const { gate, pattern } = testGate(intent);
@@ -158,10 +177,9 @@ for (const [intent, expected] of strongTests) {
158
177
 
159
178
  // Should NOT be REJECTED (STRONG or WEAK both acceptable)
160
179
  const passTests = [
161
- 'show me a table',
162
- 'create a todo list',
163
- 'user profile card',
180
+ 'sign up for an account',
164
181
  'settings page',
182
+ '404 not found error',
165
183
  ];
166
184
  for (const intent of passTests) {
167
185
  const { gate, pattern } = testGate(intent);
@@ -237,17 +255,21 @@ if (!THINKING) {
237
255
  }
238
256
 
239
257
  // ── Test 6: Training corpus surfaces ────────────────────────────────
240
- // (The legacy exemplar extract ingest path was retired 2026-04-28 in
241
- // mcp 0.0.5. The chunk corpus is the training surface now.)
258
+ // Post-§65: `compositions/` retired alongside the hand-authored
259
+ // pattern library. The harvested-chunks substrate is the sole
260
+ // retrieval surface; everything else falls through to LLM.
261
+ // (Legacy exemplar extract → ingest path retired 2026-04-28 mcp 0.0.5.)
242
262
 
243
263
  console.log('\n6. Training corpus surfaces');
244
264
 
245
- // 6a. Hand-authored pattern library should be ≥ 100 entries.
265
+ // 6a. Composition library (harvested chunks via composition-library).
266
+ // Threshold sized for the post-§65 floor (~28 annotated chunks at
267
+ // v0.4.7); grows as more source HTML gets annotated.
246
268
  const patterns = listPatterns();
247
- if (patterns.length >= 100) {
248
- ok('Pattern library', `${patterns.length} hand-authored patterns`);
269
+ if (patterns.length >= 20) {
270
+ ok('Composition library', `${patterns.length} compositions (harvested-chunks substrate)`);
249
271
  } else {
250
- bad('Pattern library', `only ${patterns.length} (expected ≥ 100)`);
272
+ bad('Composition library', `only ${patterns.length} (expected ≥ 20)`);
251
273
  }
252
274
 
253
275
  // 6b. Gen-UI chunk corpus — should be ≥ 500 unique chunks across
package/server.js CHANGED
@@ -11,7 +11,7 @@
11
11
  * validate_schema — Validate A2UI messages
12
12
  * lookup_component — Component API lookup
13
13
  * get_component_map — Full catalog
14
- * search_patterns — Pattern library search
14
+ * search_patterns — Composition library search (kept for back-compat; backed by composition-library since §64)
15
15
  * classify_intent — Domain classification
16
16
  *
17
17
  * Usage:
@@ -39,7 +39,6 @@ import {
39
39
  } from '../retrieval/catalog.js';
40
40
  import { serializeEntry } from '../retrieval/component-entry.js';
41
41
  import { classifyIntent, getDomain, getAllDomains } from '../retrieval/domain-router.js';
42
- import { getPattern, searchPatterns, getAllPatterns } from '../retrieval/pattern-library.js';
43
42
  import { getAntiPatterns, checkAllAntiPatterns } from '../retrieval/anti-patterns.js';
44
43
  import { assembleContext } from '../retrieval/context-assembler.js';
45
44
 
@@ -49,6 +48,7 @@ import {
49
48
  getComposition as getZettelComposition,
50
49
  getAllCompositions as getAllZettelCompositions,
51
50
  getGraph as getZettelGraph,
51
+ searchAll as searchCompositions,
52
52
  } from '../compose/strategies/zettel/composition-library.js';
53
53
  import {
54
54
  resolveComposition as resolveZettelComposition,
@@ -57,7 +57,7 @@ import {
57
57
  // Zettel bootstrap is still needed for get_fragment/resolve_composition tools;
58
58
  // the generate_ui tool now dispatches through the unified registry in gen-ui.
59
59
 
60
- // Bootstrap zettel corpus alongside the monolithic pattern library
60
+ // Bootstrap zettel composition corpus
61
61
  const _zettelBoot = loadZettelCorpus();
62
62
  console.error(
63
63
  `[adiaui-mcp] zettel corpus: ${_zettelBoot.compositionCount} compositions`,
@@ -71,10 +71,9 @@ import {
71
71
  searchChunks as searchGenUIChunks,
72
72
  } from '../corpus/scripts/chunk-library.js';
73
73
 
74
- // ── Inline-tool deps (transpiler / wiring / patterns / feedback) ──
74
+ // ── Inline-tool deps (transpiler / wiring / feedback) ──
75
75
  import { transpileHTML } from '../compose/transpiler/transpiler.js';
76
76
  import { getWiringCatalog } from '../retrieval/wiring-catalog.js';
77
- import { registerPattern } from '../retrieval/pattern-library.js';
78
77
  import { FeedbackCollector } from '../retrieval/feedback/feedback.js';
79
78
  import { feedbackStore } from '../retrieval/feedback/feedback-store.js';
80
79
 
@@ -203,31 +202,16 @@ server.tool(
203
202
 
204
203
  server.tool(
205
204
  'search_patterns',
206
- `Search the pattern library for reusable UI templates. Returns matching patterns with full A2UI component trees that can be used directly or adapted.
205
+ `Search the composition library for reusable UI templates. Returns matching compositions with full A2UI component trees that can be used directly or adapted.
207
206
 
208
- Use this to find a starting point before generating from scratch. If a good pattern exists, pass it to generate_ui with instant mode. If no pattern matches, use generate_ui with thinking mode.
207
+ Use this to find a starting point before generating from scratch. If a good composition exists, pass it to generate_ui with instant mode. If no composition matches, use generate_ui with thinking mode.
209
208
 
210
- Keyword search by default. Set semantic=true for LLM-powered conceptual matching. Set remix=true to compose a new pattern by combining existing ones.`,
209
+ Keyword search (§64 v0.4.6 migration: now backed by composition-library; the historical "pattern" library is retired).`,
211
210
  {
212
211
  query: z.string().describe('Search query (natural language)'),
213
- semantic: z.boolean().optional().describe('Use LLM for conceptual matching (default: false)'),
214
- remix: z.boolean().optional().describe('Compose a new pattern by remixing existing ones (default: false)'),
215
212
  },
216
- async ({ query, semantic, remix }) => {
217
- if (semantic || remix) {
218
- const { semanticSearchPatterns } = await import('../a2ui/intelligence/pattern-library.js');
219
- const { createAdapter } = await import('../a2ui/llm-bridge.js');
220
- try {
221
- const adapter = await createAdapter();
222
- const results = await semanticSearchPatterns(query, { llmAdapter: adapter, remix });
223
- return { content: [{ type: 'text', text: JSON.stringify(results, null, 2) }] };
224
- } catch (err) {
225
- // Fall back to keyword search on LLM failure
226
- const results = searchPatterns(query);
227
- return { content: [{ type: 'text', text: JSON.stringify({ matches: results, note: 'Semantic search unavailable, using keyword fallback' }, null, 2) }] };
228
- }
229
- }
230
- const results = searchPatterns(query);
213
+ async ({ query }) => {
214
+ const results = searchCompositions(query);
231
215
  return { content: [{ type: 'text', text: JSON.stringify(results, null, 2) }] };
232
216
  }
233
217
  );
@@ -313,33 +297,10 @@ server.tool(
313
297
  }
314
298
  );
315
299
 
316
- // ── Pattern & Feedback Tools ──
300
+ // ── Feedback Tools ──
317
301
 
318
302
  const feedbackCollector = new FeedbackCollector();
319
303
 
320
- server.tool(
321
- 'import_pattern',
322
- 'Import a saved pattern JSON into the runtime pattern library.',
323
- {
324
- pattern: z.string().describe('JSON string of pattern object { name, description, domain, components, template }'),
325
- },
326
- async ({ pattern }) => {
327
- try {
328
- const parsed = JSON.parse(pattern);
329
- if (!parsed.name || !parsed.template || !Array.isArray(parsed.template)) {
330
- return { content: [{ type: 'text', text: 'Invalid: must have name and template array' }], isError: true };
331
- }
332
- const success = registerPattern(parsed);
333
- if (!success) {
334
- return { content: [{ type: 'text', text: `Pattern "${parsed.name}" already exists` }], isError: true };
335
- }
336
- return { content: [{ type: 'text', text: JSON.stringify({ imported: parsed.name, components: parsed.components?.length || 0 }) }] };
337
- } catch (err) {
338
- return { content: [{ type: 'text', text: `Parse error: ${err.message}` }], isError: true };
339
- }
340
- }
341
- );
342
-
343
304
  server.tool(
344
305
  'submit_feedback',
345
306
  'Submit structured feedback for a generation execution. Used by the evolution engine to learn from each generation.',
@@ -447,13 +408,13 @@ server.resource(
447
408
  );
448
409
 
449
410
  server.resource(
450
- 'patterns',
451
- 'a2ui://catalog/patterns',
411
+ 'compositions',
412
+ 'a2ui://catalog/compositions',
452
413
  async (uri) => ({
453
414
  contents: [{
454
415
  uri: uri.href,
455
416
  mimeType: 'application/json',
456
- text: JSON.stringify(getAllPatterns(), null, 2),
417
+ text: JSON.stringify(getAllZettelCompositions(), null, 2),
457
418
  }],
458
419
  })
459
420
  );
@@ -663,9 +624,8 @@ async function main() {
663
624
 
664
625
  await server.connect(transport);
665
626
  const catalog = await getCatalog();
666
- const patterns = getAllPatterns();
667
627
  const traits = getTraits();
668
- console.error(`AdiaUI MCP Server running (${catalog.totalTypes} components, ${patterns.length} patterns, ${traits.length} traits, ${_zettelBoot.compositionCount} compositions)`);
628
+ console.error(`AdiaUI MCP Server running (${catalog.totalTypes} components, ${traits.length} traits, ${_zettelBoot.compositionCount} compositions)`);
669
629
  }
670
630
 
671
631
  main().catch(console.error);
@@ -1,446 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * eval-fix.mjs — Recursive improvement loop for A2UI generation quality.
5
- *
6
- * Runs evals, diagnoses failures, traces root causes upstream, and applies fixes.
7
- *
8
- * The loop:
9
- * 1. Run eval suite → collect scores + failures
10
- * 2. For each failure, diagnose: pattern issue? template issue? training gap? validator bug?
11
- * 3. Generate a fix plan (which file, what change)
12
- * 4. Apply fixes (patch pattern JSON, update training manifest, etc.)
13
- * 5. Re-run evals to verify the fix worked
14
- * 6. Repeat until all pass or max iterations reached
15
- *
16
- * Diagnosis categories (in priority order):
17
- * ANTI_PATTERN → pattern template has bare Text (no variant), unslotted header children,
18
- * or buttons without text. Fix: patch the pattern JSON.
19
- * INTENT_MISS → generated output missing required components. Fix: check if the
20
- * matched pattern has those components; if not, update the pattern or
21
- * add the component to the template.
22
- * CARD_MODEL → Card children don't follow Header/Section/Footer structure.
23
- * Fix: restructure the pattern template.
24
- * STRUCTURAL → validation score below threshold. Fix: usually a template syntax
25
- * issue (missing root, dangling children, duplicate IDs).
26
- * COVERAGE → uses forbidden component types. Fix: swap components in template.
27
- *
28
- * Usage:
29
- * node packages/a2ui/mcp/scripts/eval-fix.mjs # diagnose + report (dry run)
30
- * node packages/a2ui/mcp/scripts/eval-fix.mjs --apply # diagnose + apply fixes + re-verify
31
- * node packages/a2ui/mcp/scripts/eval-fix.mjs --max-iter=3 # limit fix iterations (default: 3)
32
- * node packages/a2ui/mcp/scripts/eval-fix.mjs --verbose # show detailed diagnosis
33
- */
34
-
35
- import '../../../../scripts/load-env.mjs';
36
- import { readFile, writeFile } from 'node:fs/promises';
37
- import { dirname, join } from 'node:path';
38
- import { fileURLToPath } from 'node:url';
39
-
40
- const __dirname = dirname(fileURLToPath(import.meta.url));
41
- const REPO_ROOT = join(__dirname, '..', '..', '..', '..');
42
- const EVALS_PATH = join(REPO_ROOT, '.claude', 'skills', 'adia-ui-kit', 'evals', 'evals.json');
43
- const PATTERNS_DIR = join(REPO_ROOT, 'packages', 'web-components', 'catalog', 'patterns');
44
-
45
- const args = new Set(process.argv.slice(2));
46
- const APPLY = args.has('--apply');
47
- const VERBOSE = args.has('--verbose');
48
- const TICKETS = args.has('--tickets');
49
- const MAX_ITER = parseInt([...args].find(a => a.startsWith('--max-iter='))?.split('=')[1] || '3');
50
-
51
- // ── Load modules ──
52
-
53
- const { generateUI } = await import('../../compose/core/generator.js');
54
- const { validateSchema } = await import('../../validator/validator.js');
55
- const { getPattern, searchPatterns } = await import('../../retrieval/pattern-library.js');
56
- const { createTicket, formatTicket, formatTicketList } = await import('../../../../.tickets/tickets.js');
57
-
58
- // ── Scoring (same as test-evals.mjs) ──
59
-
60
- function scoreAntiPatterns(components) {
61
- const violations = [];
62
- for (const c of components) {
63
- if (c.component === 'Text' && !c.variant) {
64
- violations.push({ id: c.id, issue: 'text-no-variant', message: `Text "${c.id}" has no variant attribute` });
65
- }
66
- const parent = components.find(p => p.children?.includes(c.id));
67
- if (parent?.component === 'Header' && !c.slot && c.component === 'Text') {
68
- violations.push({ id: c.id, issue: 'header-child-no-slot', message: `Text "${c.id}" in Header without slot attribute` });
69
- }
70
- if (c.component === 'Button' && !c.text && !c.icon) {
71
- violations.push({ id: c.id, issue: 'button-no-label', message: `Button "${c.id}" has no text or icon` });
72
- }
73
- // Text used for clickable actions (should be Button)
74
- if (c.component === 'Text' && c.textContent) {
75
- const actionWords = /\b(view|contact|learn more|retry|download|sign|log ?in|log ?out|submit|cancel|delete|remove|edit|save|close|open|click|tap|go to|visit)\b/i;
76
- if (actionWords.test(c.textContent) && !c.slot) {
77
- violations.push({ id: c.id, issue: 'text-as-action', message: `Text "${c.id}" looks like an action ("${c.textContent.slice(0, 30)}") — should be a Button` });
78
- }
79
- }
80
- }
81
- return violations;
82
- }
83
-
84
- function scoreIntent(components, evalCase) {
85
- const required = evalCase.required_components || [];
86
- const present = new Set(components.map(c => c.component));
87
- const missing = required.filter(r => !present.has(r));
88
- return { required, present: [...present], missing };
89
- }
90
-
91
- function scoreCardModel(components) {
92
- const issues = [];
93
- const cards = components.filter(c => c.component === 'Card');
94
- for (const card of cards) {
95
- const childIds = card.children || [];
96
- const children = childIds.map(id => components.find(c => c.id === id)).filter(Boolean);
97
- const types = children.map(c => c.component);
98
- if (!types.includes('Header') && !types.includes('Section')) {
99
- issues.push({ cardId: card.id, issue: 'no-header-or-section', children: types });
100
- }
101
- }
102
- return issues;
103
- }
104
-
105
- // ── Diagnosis ──
106
-
107
- function diagnose(evalCase, result) {
108
- const components = result.messages?.[0]?.components || [];
109
- // Find matched pattern by running the same search the generator uses
110
- const searchResults = searchPatterns(evalCase.prompt);
111
- const matched = searchResults[0]?.name || null;
112
- const findings = [];
113
-
114
- // 1. Anti-pattern violations
115
- const antiViolations = scoreAntiPatterns(components);
116
- if (antiViolations.length > 0) {
117
- findings.push({
118
- category: 'ANTI_PATTERN',
119
- severity: 'high',
120
- matched_pattern: matched,
121
- violations: antiViolations,
122
- fix: matched
123
- ? `Patch pattern "${matched}": add variant to Text nodes, add slot to Header children`
124
- : 'No pattern matched — anti-patterns come from fallback generation',
125
- });
126
- }
127
-
128
- // 2. Intent alignment
129
- const intent = scoreIntent(components, evalCase);
130
- if (intent.missing.length > 0) {
131
- findings.push({
132
- category: 'INTENT_MISS',
133
- severity: 'medium',
134
- matched_pattern: matched,
135
- missing_components: intent.missing,
136
- present_components: intent.present,
137
- fix: matched
138
- ? `Pattern "${matched}" is missing: ${intent.missing.join(', ')}. Add these components to the pattern template.`
139
- : `No pattern matched. Consider creating a pattern for intent: "${evalCase.prompt.slice(0, 60)}"`,
140
- });
141
- }
142
-
143
- // 3. Card model
144
- const cardIssues = scoreCardModel(components);
145
- if (cardIssues.length > 0) {
146
- findings.push({
147
- category: 'CARD_MODEL',
148
- severity: 'medium',
149
- matched_pattern: matched,
150
- issues: cardIssues,
151
- fix: matched
152
- ? `Pattern "${matched}" has Card nodes without Header/Section children`
153
- : 'Card model violation in generated output',
154
- });
155
- }
156
-
157
- // 4. Structural
158
- const validation = validateSchema(result.messages);
159
- const failedChecks = (validation.checks || []).filter(c => !c.passed);
160
- if (failedChecks.length > 0) {
161
- findings.push({
162
- category: 'STRUCTURAL',
163
- severity: failedChecks.some(c => c.hardFail) ? 'critical' : 'low',
164
- matched_pattern: matched,
165
- failed_checks: failedChecks.map(c => ({ name: c.name, message: c.message })),
166
- fix: matched
167
- ? `Pattern "${matched}" fails validation: ${failedChecks.map(c => c.name).join(', ')}`
168
- : 'Structural issues in generated output',
169
- });
170
- }
171
-
172
- return { evalId: evalCase.id, prompt: evalCase.prompt, matched_pattern: matched, findings };
173
- }
174
-
175
- // ── Fix application ──
176
-
177
- async function applyFix(diagnosis) {
178
- const fixes = [];
179
-
180
- for (const finding of diagnosis.findings) {
181
- const patternName = finding.matched_pattern;
182
- if (!patternName) continue; // Can't fix if no pattern matched
183
-
184
- // Find the pattern JSON file
185
- const pattern = getPattern(patternName);
186
- if (!pattern) continue;
187
-
188
- const domain = pattern.domain || 'layout';
189
- const filePath = join(PATTERNS_DIR, domain, `${patternName}.json`);
190
-
191
- let patternJson;
192
- try {
193
- patternJson = JSON.parse(await readFile(filePath, 'utf8'));
194
- } catch {
195
- console.log(` ⚠ Cannot read ${filePath} — skipping fix`);
196
- continue;
197
- }
198
-
199
- let modified = false;
200
-
201
- if (finding.category === 'ANTI_PATTERN') {
202
- for (const v of finding.violations) {
203
- const node = patternJson.template.find(c => c.id === v.id);
204
- if (!node) continue;
205
-
206
- if (v.issue === 'text-no-variant' && node.component === 'Text') {
207
- node.variant = node.variant || 'body';
208
- modified = true;
209
- fixes.push(` + Added variant="body" to ${node.id} in ${patternName}`);
210
- }
211
- if (v.issue === 'header-child-no-slot' && node.component === 'Text') {
212
- // First text child = heading, second = description
213
- const headerNode = patternJson.template.find(c => c.children?.includes(node.id) && c.component === 'Header');
214
- if (headerNode) {
215
- const childTexts = headerNode.children
216
- .map(id => patternJson.template.find(c => c.id === id))
217
- .filter(c => c?.component === 'Text');
218
- const idx = childTexts.indexOf(node);
219
- node.slot = idx === 0 ? 'heading' : 'description';
220
- modified = true;
221
- fixes.push(` + Added slot="${node.slot}" to ${node.id} in ${patternName}`);
222
- }
223
- }
224
- if (v.issue === 'button-no-label' && node.component === 'Button') {
225
- node.text = node.text || 'Action';
226
- modified = true;
227
- fixes.push(` + Added text="Action" to ${node.id} in ${patternName}`);
228
- }
229
- }
230
- }
231
-
232
- if (finding.category === 'INTENT_MISS' && finding.missing_components.length > 0) {
233
- const template = patternJson.template;
234
- const root = template.find(c => c.id === 'root');
235
-
236
- for (const comp of finding.missing_components) {
237
- // Auto-fix: add Footer with Button if pattern is missing both
238
- if (comp === 'Footer' && finding.missing_components.includes('Button')) {
239
- if (root && !template.some(c => c.component === 'Footer')) {
240
- const ftrId = 'ftr';
241
- const btnId = 'action-btn';
242
- template.push({ id: ftrId, component: 'Footer', children: [btnId] });
243
- template.push({ id: btnId, component: 'Button', text: 'Save', variant: 'primary', slot: 'action' });
244
- if (root.children && !root.children.includes(ftrId)) {
245
- root.children.push(ftrId);
246
- }
247
- // Also add to components list
248
- if (!patternJson.components.includes('Footer')) patternJson.components.push('Footer');
249
- if (!patternJson.components.includes('Button')) patternJson.components.push('Button');
250
- modified = true;
251
- fixes.push(` + Added Footer with Button to ${patternName}`);
252
- }
253
- continue; // Skip individual Button fix since Footer handled it
254
- }
255
-
256
- // Auto-fix: add Button to Footer if Footer exists but Button doesn't
257
- if (comp === 'Button' && !finding.missing_components.includes('Footer')) {
258
- const footer = template.find(c => c.component === 'Footer');
259
- if (footer) {
260
- const btnId = 'save-btn';
261
- template.push({ id: btnId, component: 'Button', text: 'Save', variant: 'primary', slot: 'action' });
262
- if (footer.children) footer.children.push(btnId);
263
- else footer.children = [btnId];
264
- if (!patternJson.components.includes('Button')) patternJson.components.push('Button');
265
- modified = true;
266
- fixes.push(` + Added Button to existing Footer in ${patternName}`);
267
- } else {
268
- // No footer — add one
269
- const ftrId = 'ftr';
270
- const btnId = 'save-btn';
271
- template.push({ id: ftrId, component: 'Footer', children: [btnId] });
272
- template.push({ id: btnId, component: 'Button', text: 'Save', variant: 'primary', slot: 'action' });
273
- if (root?.children) root.children.push(ftrId);
274
- if (!patternJson.components.includes('Footer')) patternJson.components.push('Footer');
275
- if (!patternJson.components.includes('Button')) patternJson.components.push('Button');
276
- modified = true;
277
- fixes.push(` + Added Footer + Button to ${patternName}`);
278
- }
279
- continue;
280
- }
281
-
282
- // Auto-fix: wrap root in Column if Column is missing
283
- if (comp === 'Column' && root && root.component !== 'Column') {
284
- // If root is Card, check if Section children could be wrapped
285
- const section = template.find(c => c.component === 'Section' && root.children?.includes(c.id));
286
- if (section && section.children?.length > 0) {
287
- // Wrap section children in a Column
288
- const colId = 'col-wrap';
289
- const originalChildren = [...section.children];
290
- template.push({ id: colId, component: 'Column', children: originalChildren, gap: '4' });
291
- section.children = [colId];
292
- if (!patternJson.components.includes('Column')) patternJson.components.push('Column');
293
- modified = true;
294
- fixes.push(` + Wrapped Section children in Column for ${patternName}`);
295
- } else {
296
- fixes.push(` ⚠ Pattern "${patternName}" needs ${comp} — manual review required`);
297
- }
298
- continue;
299
- }
300
-
301
- fixes.push(` ⚠ Pattern "${patternName}" needs ${comp} — manual review required`);
302
- }
303
- }
304
-
305
- if (modified) {
306
- patternJson.version = (patternJson.version || 1) + 1;
307
- await writeFile(filePath, JSON.stringify(patternJson, null, 2) + '\n');
308
- fixes.push(` ✓ Wrote ${filePath}`);
309
- }
310
- }
311
-
312
- return fixes;
313
- }
314
-
315
- // ── Main loop ──
316
-
317
- async function runEvalLoop() {
318
- const evalsData = JSON.parse(await readFile(EVALS_PATH, 'utf8'));
319
- const evalCases = evalsData.evals;
320
-
321
- for (let iter = 0; iter < MAX_ITER; iter++) {
322
- console.log(`\n${'═'.repeat(60)}`);
323
- console.log(` Iteration ${iter + 1}/${MAX_ITER}`);
324
- console.log('═'.repeat(60));
325
-
326
- // Run all evals
327
- const diagnoses = [];
328
- let allClean = true;
329
-
330
- for (const evalCase of evalCases) {
331
- const result = await generateUI({ intent: evalCase.prompt, mode: evalCase.mode || 'instant' });
332
- const diag = diagnose(evalCase, result);
333
-
334
- if (diag.findings.length > 0) {
335
- allClean = false;
336
- diagnoses.push(diag);
337
-
338
- const validation = validateSchema(result.messages);
339
- console.log(`\n ✗ #${evalCase.id} [${validation.score}] ${evalCase.prompt.slice(0, 55)}...`);
340
- if (diag.matched_pattern) console.log(` Matched: ${diag.matched_pattern}`);
341
- for (const f of diag.findings) {
342
- console.log(` ${f.category} (${f.severity}): ${f.fix}`);
343
- if (VERBOSE && f.violations) {
344
- for (const v of f.violations) console.log(` - ${v.message}`);
345
- }
346
- if (VERBOSE && f.missing_components) {
347
- console.log(` Missing: ${f.missing_components.join(', ')}`);
348
- }
349
- }
350
- } else {
351
- const validation = validateSchema(result.messages);
352
- console.log(` ✓ #${evalCase.id} [${validation.score}] ${evalCase.prompt.slice(0, 55)}...`);
353
- }
354
- }
355
-
356
- if (allClean) {
357
- console.log('\n ✓ All evals clean — no issues found.');
358
- break;
359
- }
360
-
361
- if (!APPLY && !TICKETS) {
362
- console.log(`\n ${diagnoses.length} eval(s) with issues. Run with --apply to fix or --tickets to create tickets.`);
363
- break;
364
- }
365
-
366
- // Create tickets for all findings (when --tickets flag is set, or for unfixable issues during --apply)
367
- if (TICKETS) {
368
- console.log('\n── Creating tickets ──');
369
- for (const diag of diagnoses) {
370
- for (const f of diag.findings) {
371
- const ticket = await createTicket({
372
- type: f.category === 'INTENT_MISS' ? 'improvement' : 'bug',
373
- title: `Eval #${diag.evalId}: ${f.category} in ${f.matched_pattern || 'generated output'}`,
374
- source: 'eval-fix',
375
- severity: f.severity,
376
- category: f.category === 'ANTI_PATTERN' || f.category === 'INTENT_MISS' ? 'pattern'
377
- : f.category === 'CARD_MODEL' ? 'pattern'
378
- : f.category === 'STRUCTURAL' ? 'validator' : 'generator',
379
- target: f.matched_pattern ? `patterns/${getPattern(f.matched_pattern)?.domain || 'layout'}/${f.matched_pattern}.json` : null,
380
- description: f.fix,
381
- evidence: {
382
- evalId: diag.evalId,
383
- prompt: diag.prompt.slice(0, 100),
384
- matched_pattern: f.matched_pattern,
385
- ...(f.violations ? { violations: f.violations.map(v => v.message) } : {}),
386
- ...(f.missing_components ? { missing_components: f.missing_components } : {}),
387
- ...(f.issues ? { card_issues: f.issues } : {}),
388
- ...(f.failed_checks ? { failed_checks: f.failed_checks.map(c => c.name) } : {}),
389
- },
390
- suggested_fix: f.fix,
391
- });
392
- console.log(` 📋 ${ticket.id}`);
393
- console.log(` ${ticket.title}`);
394
- }
395
- }
396
- if (!APPLY) break;
397
- }
398
-
399
- if (!APPLY) break;
400
-
401
- // Apply fixes
402
- console.log('\n── Applying fixes ──');
403
- let totalFixes = 0;
404
- for (const diag of diagnoses) {
405
- const fixes = await applyFix(diag);
406
- for (const f of fixes) {
407
- console.log(f);
408
- totalFixes++;
409
- }
410
- }
411
-
412
- if (totalFixes === 0) {
413
- console.log(' No auto-fixable issues found. Remaining issues need manual review.');
414
- break;
415
- }
416
-
417
- console.log(`\n Applied ${totalFixes} fixes. Re-running evals...`);
418
-
419
- // Reload pattern library to pick up changes
420
- const { loadCorpus } = await import('../../retrieval/pattern-library.js');
421
- // Force reload by resetting the loaded state
422
- // (The pattern files were rewritten, but the in-memory Map still has old data)
423
- // We need to re-import fresh — but ESM caching prevents that.
424
- // Instead, directly re-read the patched files and update the Map.
425
- for (const diag of diagnoses) {
426
- if (!diag.matched_pattern) continue;
427
- const pattern = getPattern(diag.matched_pattern);
428
- if (!pattern) continue;
429
- const domain = pattern.domain || 'layout';
430
- const filePath = join(PATTERNS_DIR, domain, `${diag.matched_pattern}.json`);
431
- try {
432
- const fresh = JSON.parse(await readFile(filePath, 'utf8'));
433
- // Normalize tags
434
- if (fresh.tags && !Array.isArray(fresh.tags)) {
435
- fresh.tagsMeta = fresh.tags;
436
- fresh.tags = fresh.keywords || [];
437
- }
438
- // Update in-memory
439
- const { registerPattern } = await import('../../retrieval/pattern-library.js');
440
- registerPattern(fresh, { replace: true });
441
- } catch {}
442
- }
443
- }
444
- }
445
-
446
- runEvalLoop().catch(console.error);