npm - @adia-ai/a2ui-mcp - Versions diffs - 0.0.1 - Mend

@adia-ai/a2ui-mcp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +65 -0
package/README.md +154 -0
package/package.json +35 -0
package/scripts/dogfood-test.mjs +107 -0
package/scripts/eval-diff.mjs +282 -0
package/scripts/eval-fix.mjs +446 -0
package/scripts/generate.mjs +189 -0
package/scripts/multi-turn-test.mjs +247 -0
package/scripts/smoke-engine-registry.mjs +43 -0
package/scripts/smoke-merged.mjs +50 -0
package/scripts/smoke-register-engine.mjs +51 -0
package/scripts/smoke-searchable-select.mjs +39 -0
package/scripts/smoke-synthesis.mjs +59 -0
package/scripts/smoke-zettel.mjs +37 -0
package/scripts/test-a2ui.mjs +269 -0
package/scripts/test-evals.mjs +238 -0
package/scripts/visual-validate.mjs +158 -0
package/server.js +573 -0

package/scripts/test-a2ui.mjs ADDED Viewed

@@ -0,0 +1,269 @@
+#!/usr/bin/env node
+/**
+ * test-a2ui.mjs — Smoke test for the A2UI generation pipeline.
+ *
+ * Tests:
+ *   1. Env loading & LLM adapter detection
+ *   2. Pattern library health (search, count, domains)
+ *   3. Instant mode gate (strong/weak/rejected matching)
+ *   4. Instant mode generation (pattern-matched)
+ *   5. Thinking mode generation (LLM-powered, optional)
+ *   6. Training data ingestion
+ *
+ * Usage:
+ *   node packages/a2ui/mcp/scripts/test-a2ui.mjs              # run all (skip thinking if slow)
+ *   node packages/a2ui/mcp/scripts/test-a2ui.mjs --thinking   # include thinking mode (calls LLM API)
+ *   node packages/a2ui/mcp/scripts/test-a2ui.mjs --verbose    # show component details
+ */
+import '../../../../scripts/load-env.mjs';
+const args = new Set(process.argv.slice(2));
+const THINKING = args.has('--thinking');
+const VERBOSE = args.has('--verbose');
+let pass = 0;
+let fail = 0;
+let skip = 0;
+function ok(name, detail = '') {
+  pass++;
+  console.log(`  ✓ ${name}${detail ? ` — ${detail}` : ''}`);
+}
+function bad(name, detail = '') {
+  fail++;
+  console.log(`  ✗ ${name}${detail ? ` — ${detail}` : ''}`);
+}
+function skipped(name, reason = '') {
+  skip++;
+  console.log(`  ○ ${name}${reason ? ` — ${reason}` : ''}`);
+}
+// ── Test 1: Env & LLM adapter ──────────────────────────────────────
+console.log('\n1. Environment & LLM adapter');
+const hasAnthropicKey = !!(process.env.ANTHROPIC_API_KEY || process.env.VITE_ANTHROPIC_API_KEY);
+const hasOpenAIKey = !!(process.env.OPENAI_API_KEY || process.env.VITE_OPENAI_API_KEY);
+const hasGeminiKey = !!(process.env.GEMINI_API_KEY || process.env.VITE_GEMINI_API_KEY);
+if (hasAnthropicKey || hasOpenAIKey || hasGeminiKey) {
+  ok('API keys loaded', [
+    hasAnthropicKey && 'anthropic',
+    hasOpenAIKey && 'openai',
+    hasGeminiKey && 'gemini',
+  ].filter(Boolean).join(', '));
+} else {
+  bad('No API keys found', 'check .env file');
+}
+let adapterType = 'unknown';
+try {
+  const { createAdapter } = await import('../../compose/llm/llm-bridge.js');
+  const adapter = await createAdapter();
+  adapterType = adapter.constructor.name;
+  if (adapterType === 'AdiaUILLMBridge') {
+    ok('LLM adapter', `${adapterType} (provider: ${adapter.provider})`);
+  } else {
+    bad('LLM adapter', `got ${adapterType} (expected AdiaUILLMBridge)`);
+  }
+} catch (e) {
+  bad('LLM adapter', e.message);
+}
+// ── Test 2: Pattern library ─────────────────────────────────────────
+console.log('\n2. Pattern library');
+const { searchBlocks, listPatterns, lookupDomain } = await import('../../compose/engine/reference.js');
+const allPatterns = listPatterns();
+const withTemplates = allPatterns.filter(p => p.template && Array.isArray(p.template));
+const domains = [...new Set(allPatterns.map(p => p.domain).filter(Boolean))];
+if (allPatterns.length >= 70) {
+  ok('Pattern count', `${allPatterns.length} total (${withTemplates.length} with templates)`);
+} else {
+  bad('Pattern count', `only ${allPatterns.length} (expected 70+)`);
+}
+if (domains.length >= 3) {
+  ok('Domains', domains.join(', '));
+} else {
+  bad('Domains', `only ${domains.length}: ${domains.join(', ')}`);
+}
+// Spot-check known patterns
+const spotChecks = ['login-form', 'dashboard', 'data-table-view', 'user-profile'];
+const foundAll = spotChecks.every(name => allPatterns.some(p => p.name === name));
+if (foundAll) {
+  ok('Known patterns', spotChecks.join(', '));
+} else {
+  const missing = spotChecks.filter(name => !allPatterns.some(p => p.name === name));
+  bad('Known patterns', `missing: ${missing.join(', ')}`);
+}
+// ── Test 3: Instant mode gate ───────────────────────────────────────
+console.log('\n3. Instant mode gate');
+const GATE_STOPS = new Set(['the','and','with','for','from','that','this','its','are','all','can','has','each','show','using','based','into','like','make','your','type','just','only','also','more','most','some','very','much','many','will','about','been','when','they','them','what','would','could','should','different','simple','basic','custom','display','controls','group','selection','content','state']);
+function testGate(intent) {
+  const patterns = searchBlocks(intent);
+  const best = patterns[0] || null;
+  if (!best) return { gate: 'NO_RESULTS', pattern: null };
+  const intentWords = intent.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !GATE_STOPS.has(w));
+  const nameWords = best.name.toLowerCase().split(/[-_\s]+/);
+  const matchTags = (best.tags || []).map(t => t.toLowerCase());
+  const matchDomain = (best.domain || '').toLowerCase();
+  const hasStrongHit = intentWords.some(w => {
+    if (w.length < 3) return false;
+    if (nameWords.includes(w) || matchTags.includes(w)) return true;
+    if (w.length >= 4) {
+      return nameWords.some(n => n.length >= 3 && (w.startsWith(n) || n.startsWith(w))) ||
+             matchTags.some(t => t.length >= 3 && (w.startsWith(t) || t.startsWith(w)));
+    }
+    return false;
+  });
+  const hasWeakHit = !hasStrongHit && intentWords.some(w => {
+    return nameWords.some(n => n.length >= 3 && (n.includes(w) || w.includes(n))) ||
+           matchTags.some(t => t.length >= 3 && (t.includes(w) || w.includes(t))) ||
+           matchDomain.includes(w);
+  });
+  return { gate: hasStrongHit ? 'STRONG' : hasWeakHit ? 'WEAK' : 'REJECTED', pattern: best.name };
+}
+// Should STRONG match
+const strongTests = [
+  ['login form', 'login-form'],
+  ['nav bar', null],         // any match is fine
+  ['dashboard stats', null],
+  ['pricing table', null],
+  ['chat interface', null],
+];
+for (const [intent, expected] of strongTests) {
+  const { gate, pattern } = testGate(intent);
+  if (gate === 'STRONG') {
+    ok(`"${intent}"`, `STRONG → ${pattern}`);
+  } else {
+    bad(`"${intent}"`, `expected STRONG, got ${gate} → ${pattern}`);
+  }
+}
+// Should NOT be REJECTED (STRONG or WEAK both acceptable)
+const passTests = [
+  'show me a table',
+  'create a todo list',
+  'user profile card',
+  'settings page',
+];
+for (const intent of passTests) {
+  const { gate, pattern } = testGate(intent);
+  if (gate !== 'REJECTED' && gate !== 'NO_RESULTS') {
+    ok(`"${intent}"`, `${gate} → ${pattern}`);
+  } else {
+    bad(`"${intent}"`, `expected pass, got ${gate}`);
+  }
+}
+// ── Test 4: Instant mode generation ─────────────────────────────────
+console.log('\n4. Instant mode generation');
+const { generateUI } = await import('../../compose/engine/generator.js');
+const instantTests = [
+  { intent: 'login form', minComponents: 3 },
+  { intent: 'dashboard with stats', minComponents: 3 },
+  { intent: 'user settings page', minComponents: 3 },
+];
+for (const { intent, minComponents } of instantTests) {
+  try {
+    const result = await generateUI({ intent, mode: 'instant' });
+    const comps = result.messages?.[0]?.components || [];
+    if (comps.length >= minComponents) {
+      ok(`"${intent}"`, `${comps.length} components`);
+      if (VERBOSE) {
+        console.log(`    types: ${comps.slice(0, 6).map(c => c.component).join(', ')}${comps.length > 6 ? '...' : ''}`);
+      }
+    } else {
+      bad(`"${intent}"`, `only ${comps.length} components (expected ${minComponents}+)`);
+    }
+  } catch (e) {
+    bad(`"${intent}"`, e.message);
+  }
+}
+// ── Test 5: Thinking mode generation ────────────────────────────────
+console.log('\n5. Thinking mode generation');
+if (!THINKING) {
+  skipped('Thinking mode', 'pass --thinking to test (calls LLM API, ~10s per intent)');
+} else if (adapterType !== 'AdiaUILLMBridge') {
+  skipped('Thinking mode', 'no real LLM adapter available');
+} else {
+  const thinkingTests = [
+    { intent: 'a user settings page with profile and notifications tabs', minComponents: 10 },
+    { intent: 'an e-commerce product detail page with reviews', minComponents: 8 },
+  ];
+  for (const { intent, minComponents } of thinkingTests) {
+    try {
+      const start = Date.now();
+      const result = await generateUI({ intent, mode: 'thinking' });
+      const elapsed = ((Date.now() - start) / 1000).toFixed(1);
+      const comps = result.messages?.[0]?.components || [];
+      if (comps.length >= minComponents) {
+        ok(`"${intent.slice(0, 50)}..."`, `${comps.length} components in ${elapsed}s`);
+        if (VERBOSE) {
+          console.log(`    types: ${comps.slice(0, 8).map(c => c.component).join(', ')}${comps.length > 8 ? '...' : ''}`);
+          console.log(`    suggestions: ${(result.suggestions || []).join('; ')}`);
+        }
+      } else {
+        bad(`"${intent.slice(0, 50)}..."`, `only ${comps.length} components in ${elapsed}s`);
+      }
+    } catch (e) {
+      bad(`"${intent.slice(0, 50)}..."`, e.message);
+    }
+  }
+}
+// ── Test 6: Training data ingestion ─────────────────────────────────
+console.log('\n6. Training data ingestion');
+try {
+  const { ingestAll } = await import('../../corpus/scripts/ingest.js');
+  const result = await ingestAll();
+  if (result.registered >= 0 && result.pages > 0) {
+    ok('Ingestion', `${result.pages} pages → ${result.registered} new, ${result.replaced} replaced, ${result.skipped} skipped`);
+  } else {
+    bad('Ingestion', `unexpected result: ${JSON.stringify(result)}`);
+  }
+} catch (e) {
+  bad('Ingestion', e.message);
+}
+// Final check: pattern count after ingestion
+const afterPatterns = listPatterns();
+if (afterPatterns.length >= 200) {
+  ok('Post-ingest count', `${afterPatterns.length} patterns`);
+} else {
+  bad('Post-ingest count', `only ${afterPatterns.length} (expected 200+)`);
+}
+// ── Summary ─────────────────────────────────────────────────────────
+console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+console.log(`  ${pass} passed  ${fail} failed  ${skip} skipped`);
+console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+process.exit(fail > 0 ? 1 : 0);

package/scripts/test-evals.mjs ADDED Viewed

@@ -0,0 +1,238 @@
+#!/usr/bin/env node
+/**
+ * test-evals.mjs — Quality evaluation suite for A2UI generation.
+ *
+ * Scores generated output on 5 dimensions:
+ *   1. structural_validity  — schema validation score (0-100)
+ *   2. intent_alignment     — required components present (F1 score)
+ *   3. component_coverage   — uses right component types
+ *   4. card_model_compliance — header/section/footer structure
+ *   5. anti_pattern_count   — inverse of anti-pattern violations
+ *
+ * Usage:
+ *   node packages/a2ui/mcp/scripts/test-evals.mjs                  # run all evals (instant mode)
+ *   node packages/a2ui/mcp/scripts/test-evals.mjs --mode=thinking  # thinking mode (calls LLM)
+ *   node packages/a2ui/mcp/scripts/test-evals.mjs --save-baseline  # save current scores as baseline
+ *   node packages/a2ui/mcp/scripts/test-evals.mjs --json           # machine-readable output
+ *   node packages/a2ui/mcp/scripts/test-evals.mjs --only=2         # run single eval by ID
+ */
+import '../../../../scripts/load-env.mjs';
+import { readFile, writeFile } from 'node:fs/promises';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = join(__dirname, '..', '..', '..', '..');
+const EVALS_PATH = join(REPO_ROOT, '.claude', 'skills', 'adia-ui-kit', 'evals', 'evals.json');
+const BASELINE_PATH = join(REPO_ROOT, 'scripts', 'eval-baseline.json');
+const args = new Set(process.argv.slice(2));
+const MODE = [...args].find(a => a.startsWith('--mode='))?.split('=')[1] || 'instant';
+const ONLY = [...args].find(a => a.startsWith('--only='))?.split('=')[1];
+const SAVE_BASELINE = args.has('--save-baseline');
+const JSON_OUT = args.has('--json');
+// ── Load evals ──
+const evalsData = JSON.parse(await readFile(EVALS_PATH, 'utf8'));
+let evalCases = evalsData.evals;
+if (ONLY) evalCases = evalCases.filter(e => String(e.id) === ONLY);
+// ── Load generator ──
+const { generateUI } = await import('../../compose/engine/generator.js');
+const { validateSchema } = await import('../../validator/validator.js');
+// ── Scoring functions ──
+function scoreStructural(messages) {
+  try {
+    const result = validateSchema(messages);
+    return result.score ?? 0;
+  } catch { return 0; }
+}
+function scoreIntentAlignment(components, evalCase) {
+  const required = evalCase.required_components || [];
+  if (!required.length) return 100; // No requirements specified
+  const present = new Set(components.map(c => c.component));
+  const tp = required.filter(r => present.has(r)).length;
+  const precision = required.length ? tp / required.length : 1;
+  const recall = required.length ? tp / required.length : 1;
+  const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
+  return Math.round(f1 * 100);
+}
+function scoreComponentCoverage(components, evalCase) {
+  const forbidden = evalCase.forbidden_patterns || [];
+  if (!forbidden.length) return 100;
+  const types = components.map(c => c.component);
+  const violations = forbidden.filter(f => types.includes(f)).length;
+  return Math.max(0, Math.round((1 - violations / forbidden.length) * 100));
+}
+function scoreCardModel(components) {
+  const cards = components.filter(c => c.component === 'Card');
+  if (!cards.length) return 100; // No cards to check
+  let compliant = 0;
+  for (const card of cards) {
+    const childIds = card.children || [];
+    const children = childIds.map(id => components.find(c => c.id === id)).filter(Boolean);
+    const types = children.map(c => c.component);
+    // Card should have Header and/or Section
+    const hasStructure = types.includes('Header') || types.includes('Section');
+    if (hasStructure) compliant++;
+  }
+  return Math.round((compliant / cards.length) * 100);
+}
+function scoreAntiPatterns(components) {
+  let violations = 0;
+  for (const c of components) {
+    // Text without variant
+    if (c.component === 'Text' && !c.variant) violations++;
+    // Header children without slot
+    const parent = components.find(p => p.children?.includes(c.id));
+    if (parent?.component === 'Header' && !c.slot && c.component === 'Text') violations++;
+    // Button without text
+    if (c.component === 'Button' && !c.text) violations++;
+  }
+  const maxViolations = Math.max(components.length * 0.3, 5);
+  return Math.max(0, Math.round((1 - violations / maxViolations) * 100));
+}
+// ── Run evals ──
+const WEIGHTS = { structural: 0.30, intent: 0.25, coverage: 0.20, card_model: 0.15, anti_pattern: 0.10 };
+const results = [];
+for (const evalCase of evalCases) {
+  const start = Date.now();
+  let scores = { structural: 0, intent: 0, coverage: 0, card_model: 0, anti_pattern: 0 };
+  let error = null;
+  try {
+    const result = await generateUI({ intent: evalCase.prompt, mode: MODE });
+    const components = result.messages?.[0]?.components || [];
+    scores.structural = scoreStructural(result.messages);
+    scores.intent = scoreIntentAlignment(components, evalCase);
+    scores.coverage = scoreComponentCoverage(components, evalCase);
+    scores.card_model = scoreCardModel(components);
+    scores.anti_pattern = scoreAntiPatterns(components);
+  } catch (e) {
+    error = e.message;
+  }
+  const aggregate = Math.round(
+    scores.structural * WEIGHTS.structural +
+    scores.intent * WEIGHTS.intent +
+    scores.coverage * WEIGHTS.coverage +
+    scores.card_model * WEIGHTS.card_model +
+    scores.anti_pattern * WEIGHTS.anti_pattern
+  );
+  const thresholds = evalCase.thresholds || { aggregate: 50 };
+  const failures = [];
+  if (aggregate < (thresholds.aggregate || 50)) failures.push(`aggregate ${aggregate} < ${thresholds.aggregate || 50}`);
+  for (const [dim, threshold] of Object.entries(thresholds)) {
+    if (dim === 'aggregate') continue;
+    if (scores[dim] != null && scores[dim] < threshold) {
+      failures.push(`${dim} ${scores[dim]} < ${threshold}`);
+    }
+  }
+  results.push({
+    id: evalCase.id,
+    prompt: evalCase.prompt.slice(0, 60) + (evalCase.prompt.length > 60 ? '...' : ''),
+    scores,
+    aggregate,
+    pass: failures.length === 0 && !error,
+    failures,
+    error,
+    elapsed: Date.now() - start,
+  });
+}
+// ── Regression detection ──
+let regressions = [];
+try {
+  const baseline = JSON.parse(await readFile(BASELINE_PATH, 'utf8'));
+  for (const result of results) {
+    const base = baseline.scores?.[result.id];
+    if (!base) continue;
+    if (result.aggregate < base.aggregate - 3) {
+      regressions.push(`#${result.id}: aggregate ${result.aggregate} vs baseline ${base.aggregate}`);
+    }
+    for (const dim of Object.keys(WEIGHTS)) {
+      if (result.scores[dim] < (base[dim] || 0) - 5) {
+        regressions.push(`#${result.id}: ${dim} ${result.scores[dim]} vs baseline ${base[dim]}`);
+      }
+    }
+  }
+} catch { /* no baseline file */ }
+// ── Save baseline ──
+if (SAVE_BASELINE) {
+  const baselineData = {
+    generated: new Date().toISOString(),
+    mode: MODE,
+    scores: {},
+  };
+  for (const r of results) {
+    baselineData.scores[r.id] = { aggregate: r.aggregate, ...r.scores };
+  }
+  await writeFile(BASELINE_PATH, JSON.stringify(baselineData, null, 2) + '\n');
+  if (!JSON_OUT) console.log(`Baseline saved to ${BASELINE_PATH}`);
+}
+// ── Output ──
+const passed = results.filter(r => r.pass).length;
+const avgAggregate = Math.round(results.reduce((s, r) => s + r.aggregate, 0) / (results.length || 1));
+if (JSON_OUT) {
+  console.log(JSON.stringify({
+    timestamp: new Date().toISOString(),
+    mode: MODE,
+    results,
+    summary: { total: results.length, passed, avg_aggregate: avgAggregate, regressions },
+  }, null, 2));
+} else {
+  console.log(`\nA2UI Eval Suite (mode: ${MODE})`);
+  console.log('━'.repeat(70));
+  for (const r of results) {
+    const status = r.error ? '✗' : r.pass ? '✓' : '~';
+    const dims = Object.entries(r.scores).map(([k, v]) => `${k.slice(0, 4)}:${v}`).join(' ');
+    console.log(`  ${status} #${r.id} [${r.aggregate}] ${dims}  ${r.elapsed}ms`);
+    console.log(`    ${r.prompt}`);
+    if (r.failures.length) console.log(`    FAIL: ${r.failures.join(', ')}`);
+    if (r.error) console.log(`    ERROR: ${r.error}`);
+  }
+  if (regressions.length) {
+    console.log('\n⚠ REGRESSIONS:');
+    for (const r of regressions) console.log(`  ${r}`);
+  }
+  console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+  console.log(`  ${passed}/${results.length} passed  avg: ${avgAggregate}  regressions: ${regressions.length}`);
+  console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+}
+// ── Exit code ──
+if (regressions.length) process.exit(2);
+if (passed < results.length) process.exit(1);
+process.exit(0);

package/scripts/visual-validate.mjs ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env node
+/**
+ * visual-validate.mjs — Generate UI from test intents and produce an HTML
+ * preview page for visual inspection. Works in both instant and pro modes.
+ *
+ * Usage:
+ *   node packages/a2ui/mcp/scripts/visual-validate.mjs              # instant mode
+ *   node packages/a2ui/mcp/scripts/visual-validate.mjs --pro        # pro mode (calls LLM)
+ *   node packages/a2ui/mcp/scripts/visual-validate.mjs --open       # open in browser after
+ */
+import '../../../../scripts/load-env.mjs';
+import { writeFile } from 'node:fs/promises';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { execSync } from 'node:child_process';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const args = new Set(process.argv.slice(2));
+const PRO = args.has('--pro');
+const OPEN = args.has('--open');
+const OUTPUT = join(__dirname, '..', 'visual-validation.html');
+const { generateUI } = await import('../../compose/engine/generator.js');
+const { validateSchema } = await import('../../validator/validator.js');
+const TEST_INTENTS = [
+  'login form with email and password',
+  'dashboard with 4 KPI stat cards',
+  'user profile card with avatar and bio',
+  'pricing table with 3 tiers',
+  'chat interface with message history',
+  'data table with sorting and pagination',
+  'file upload form with drag and drop',
+  'notification toast stack',
+  'settings page with toggle switches',
+  'kanban board for project tasks',
+  'team activity feed',
+  'monitor server health dashboard',
+];
+console.log(`\nVisual Validation (mode: ${PRO ? 'pro' : 'instant'})`);
+console.log('━'.repeat(60));
+const results = [];
+for (const intent of TEST_INTENTS) {
+  const start = Date.now();
+  try {
+    const result = await generateUI({
+      intent,
+      mode: PRO ? 'pro' : undefined,
+    });
+    const elapsed = Date.now() - start;
+    const components = result.messages?.flatMap(m => m.components || []) || [];
+    const validation = validateSchema(result.messages || []);
+    results.push({
+      intent,
+      components,
+      messages: result.messages || [],
+      score: validation.score,
+      valid: validation.valid,
+      elapsed,
+      error: null,
+    });
+    const icon = validation.score >= 80 ? '✓' : validation.score >= 50 ? '△' : '✗';
+    console.log(`  ${icon} [${validation.score}] ${components.length} comps  ${elapsed}ms  ${intent}`);
+  } catch (e) {
+    const elapsed = Date.now() - start;
+    results.push({ intent, components: [], messages: [], score: 0, valid: false, elapsed, error: e.message });
+    console.log(`  ✗ ERROR  ${elapsed}ms  ${intent}: ${e.message}`);
+  }
+}
+// Generate preview HTML
+const avgScore = Math.round(results.reduce((s, r) => s + r.score, 0) / results.length);
+const totalComps = results.reduce((s, r) => s + r.components.length, 0);
+const cards = results.map((r, i) => {
+  const componentsJson = JSON.stringify(r.messages, null, 2)
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;');
+  const tree = r.components.map(c => {
+    const indent = '  '.repeat((c.children ? 0 : 1));
+    const props = Object.entries(c)
+      .filter(([k]) => !['id', 'component', 'children'].includes(k))
+      .map(([k, v]) => `${k}="${v}"`)
+      .join(' ');
+    return `<div style="margin-left: ${12}px; font-family: monospace; font-size: 12px; color: #ccc;">
+      <span style="color: #7dd3fc;">${c.component || '?'}</span>
+      <span style="color: #666;">#${c.id}</span>
+      ${props ? `<span style="color: #a78bfa; font-size: 11px;"> ${props.slice(0, 80)}${props.length > 80 ? '…' : ''}</span>` : ''}
+    </div>`;
+  }).join('');
+  const bg = r.score >= 80 ? '#1a2e1a' : r.score >= 50 ? '#2e2a1a' : '#2e1a1a';
+  const border = r.score >= 80 ? '#2d5a2d' : r.score >= 50 ? '#5a4a2d' : '#5a2d2d';
+  return `
+  <div style="background: ${bg}; border: 1px solid ${border}; border-radius: 8px; padding: 16px; margin-bottom: 12px;">
+    <div style="display: flex; justify-content: space-between; margin-bottom: 8px;">
+      <strong style="color: #e2e8f0; font-size: 14px;">${r.intent}</strong>
+      <span style="color: ${r.score >= 80 ? '#4ade80' : r.score >= 50 ? '#fbbf24' : '#f87171'}; font-weight: bold;">${r.score}/100</span>
+    </div>
+    <div style="display: flex; gap: 16px; font-size: 12px; color: #94a3b8; margin-bottom: 8px;">
+      <span>${r.components.length} components</span>
+      <span>${r.elapsed}ms</span>
+      <span>${[...new Set(r.components.map(c => c.component))].length} types</span>
+    </div>
+    <details>
+      <summary style="color: #7dd3fc; cursor: pointer; font-size: 12px;">Component tree</summary>
+      <div style="background: #0f172a; border-radius: 4px; padding: 8px; margin-top: 4px; max-height: 300px; overflow-y: auto;">
+        ${tree || '<em style="color:#666">No components</em>'}
+      </div>
+    </details>
+    <details style="margin-top: 4px;">
+      <summary style="color: #a78bfa; cursor: pointer; font-size: 12px;">Raw JSON</summary>
+      <pre style="background: #0f172a; border-radius: 4px; padding: 8px; margin-top: 4px; max-height: 300px; overflow-y: auto; font-size: 11px; color: #94a3b8;">${componentsJson}</pre>
+    </details>
+  </div>`;
+}).join('');
+const html = `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>A2UI Visual Validation — ${PRO ? 'Pro' : 'Instant'} Mode</title>
+  <style>
+    body { background: #0f172a; color: #e2e8f0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 0; padding: 24px; }
+    h1 { font-size: 20px; margin-bottom: 4px; }
+    .stats { display: flex; gap: 24px; color: #94a3b8; font-size: 14px; margin-bottom: 24px; }
+    .stats span { background: #1e293b; padding: 4px 12px; border-radius: 4px; }
+  </style>
+</head>
+<body>
+  <h1>A2UI Visual Validation — ${PRO ? 'Pro' : 'Instant'} Mode</h1>
+  <div class="stats">
+    <span>${results.length} intents</span>
+    <span>avg score: ${avgScore}/100</span>
+    <span>${totalComps} total components</span>
+    <span>${results.filter(r => r.score >= 80).length}/${results.length} passing</span>
+  </div>
+  ${cards}
+</body>
+</html>`;
+await writeFile(OUTPUT, html);
+console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+console.log(`  ${results.length} intents  avg: ${avgScore}  components: ${totalComps}`);
+console.log(`  Report: ${OUTPUT}`);
+if (OPEN) {
+  try { execSync(`open "${OUTPUT}"`); } catch {}
+}