@auto-engineer/component-implementor-react 1.95.0 → 1.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/.turbo/turbo-test.log +6 -6
  3. package/.turbo/turbo-type-check.log +1 -1
  4. package/CHANGELOG.md +90 -0
  5. package/dist/src/commands/implement-component.d.ts.map +1 -1
  6. package/dist/src/commands/implement-component.js +13 -16
  7. package/dist/src/commands/implement-component.js.map +1 -1
  8. package/dist/src/commands/implement-component.test.js +14 -5
  9. package/dist/src/commands/implement-component.test.js.map +1 -1
  10. package/dist/src/extract-code-block.d.ts +1 -0
  11. package/dist/src/extract-code-block.d.ts.map +1 -1
  12. package/dist/src/extract-code-block.js +12 -0
  13. package/dist/src/extract-code-block.js.map +1 -1
  14. package/dist/src/extract-code-block.test.js +28 -1
  15. package/dist/src/extract-code-block.test.js.map +1 -1
  16. package/dist/src/generate-component.d.ts +2 -13
  17. package/dist/src/generate-component.d.ts.map +1 -1
  18. package/dist/src/generate-component.js +4 -29
  19. package/dist/src/generate-component.js.map +1 -1
  20. package/dist/src/generate-component.test.js +18 -22
  21. package/dist/src/generate-component.test.js.map +1 -1
  22. package/dist/src/generate-story.d.ts +2 -12
  23. package/dist/src/generate-story.d.ts.map +1 -1
  24. package/dist/src/generate-story.js +4 -25
  25. package/dist/src/generate-story.js.map +1 -1
  26. package/dist/src/generate-story.test.js +17 -21
  27. package/dist/src/generate-story.test.js.map +1 -1
  28. package/dist/src/generate-test.d.ts +2 -12
  29. package/dist/src/generate-test.d.ts.map +1 -1
  30. package/dist/src/generate-test.js +4 -28
  31. package/dist/src/generate-test.js.map +1 -1
  32. package/dist/src/generate-test.test.js +17 -6
  33. package/dist/src/generate-test.test.js.map +1 -1
  34. package/dist/src/prompt.d.ts +64 -0
  35. package/dist/src/prompt.d.ts.map +1 -0
  36. package/dist/src/prompt.js +481 -0
  37. package/dist/src/prompt.js.map +1 -0
  38. package/dist/src/prompt.test.d.ts +2 -0
  39. package/dist/src/prompt.test.d.ts.map +1 -0
  40. package/dist/src/prompt.test.js +136 -0
  41. package/dist/src/prompt.test.js.map +1 -0
  42. package/dist/src/reconcile.d.ts +8 -0
  43. package/dist/src/reconcile.d.ts.map +1 -0
  44. package/dist/src/reconcile.js +18 -0
  45. package/dist/src/reconcile.js.map +1 -0
  46. package/dist/src/reconcile.test.d.ts +2 -0
  47. package/dist/src/reconcile.test.d.ts.map +1 -0
  48. package/dist/src/reconcile.test.js +108 -0
  49. package/dist/src/reconcile.test.js.map +1 -0
  50. package/dist/src/run.d.ts +2 -0
  51. package/dist/src/run.d.ts.map +1 -0
  52. package/dist/src/run.js +86 -0
  53. package/dist/src/run.js.map +1 -0
  54. package/dist/src/spec-contract.d.ts +9 -0
  55. package/dist/src/spec-contract.d.ts.map +1 -0
  56. package/dist/src/spec-contract.js +16 -0
  57. package/dist/src/spec-contract.js.map +1 -0
  58. package/dist/tsconfig.tsbuildinfo +1 -1
  59. package/improvement-prompt.md +208 -0
  60. package/inputs/action-button/spec.json +50 -0
  61. package/inputs/command-palette/spec.json +62 -0
  62. package/inputs/data-card/spec.json +59 -0
  63. package/inputs/editable-data-table/spec.json +70 -0
  64. package/inputs/multi-step-form/spec.json +66 -0
  65. package/inputs/notification-center/spec.json +67 -0
  66. package/inputs/search-input/spec.json +62 -0
  67. package/inputs/status-badge/spec.json +46 -0
  68. package/package.json +4 -3
  69. package/scripts/improve.ts +592 -0
  70. package/src/commands/implement-component.test.ts +14 -5
  71. package/src/commands/implement-component.ts +13 -17
  72. package/src/extract-code-block.test.ts +33 -1
  73. package/src/extract-code-block.ts +13 -0
  74. package/src/generate-component.test.ts +22 -26
  75. package/src/generate-component.ts +5 -46
  76. package/src/generate-story.test.ts +17 -21
  77. package/src/generate-story.ts +5 -40
  78. package/src/generate-test.test.ts +22 -7
  79. package/src/generate-test.ts +5 -44
  80. package/src/prompt.test.ts +163 -0
  81. package/src/prompt.ts +581 -0
  82. package/src/reconcile.test.ts +127 -0
  83. package/src/reconcile.ts +27 -0
  84. package/src/run.ts +106 -0
  85. package/src/spec-contract.ts +22 -0
@@ -0,0 +1,592 @@
1
+ /**
2
+ * Improvement Loop for the Component Implementor React Agent
3
+ *
4
+ * Runs the 4-agent pipeline (component + test + story in parallel, then reconcile)
5
+ * against MULTIPLE spec scenarios, evaluates each with Claude CLI, aggregates
6
+ * scores, and iteratively improves the agents' system prompts until average
7
+ * quality reaches the target.
8
+ *
9
+ * Using multiple diverse scenarios prevents overfitting the prompts to
10
+ * a single component type. The improvement prompt enforces that all suggested
11
+ * changes are component-agnostic structural patterns.
12
+ *
13
+ * Usage:
14
+ * npx tsx scripts/improve.ts
15
+ *
16
+ * Input structure (minimum 3 scenarios recommended):
17
+ * inputs/
18
+ * action-button/
19
+ * spec.json
20
+ * status-badge/
21
+ * spec.json
22
+ * data-card/
23
+ * spec.json
24
+ *
25
+ * Environment:
26
+ * MAX_ITERATIONS — max improvement cycles (default: 5)
27
+ * TARGET_SCORE — minimum average score (default: 90)
28
+ * CLAUDE_MODEL — Claude CLI model flag (default: opus)
29
+ */
30
+
31
+ import 'dotenv/config';
32
+ import { type ExecSyncOptions, execSync } from 'child_process';
33
+ import {
34
+ copyFileSync,
35
+ existsSync,
36
+ mkdirSync,
37
+ readdirSync,
38
+ readFileSync,
39
+ rmSync,
40
+ statSync,
41
+ unlinkSync,
42
+ writeFileSync,
43
+ } from 'fs';
44
+ import { dirname, resolve } from 'path';
45
+ import { fileURLToPath } from 'url';
46
+
47
+ const __filename = fileURLToPath(import.meta.url);
48
+ const __dirname = dirname(__filename);
49
+ const PKG_ROOT = resolve(__dirname, '..');
50
+
51
+ // ─── Config ──────────────────────────────────────────────────────────────────
52
+ const MAX_ITERATIONS = parseInt(process.env.MAX_ITERATIONS ?? '5', 10);
53
+ const TARGET_SCORE = parseInt(process.env.TARGET_SCORE ?? '90', 10);
54
+ const CLAUDE_MODEL = process.env.CLAUDE_MODEL ?? 'opus';
55
+
56
+ // ─── Paths ───────────────────────────────────────────────────────────────────
57
+ const INPUTS_DIR = resolve(PKG_ROOT, 'inputs');
58
+ const OUTPUTS_DIR = resolve(PKG_ROOT, 'outputs');
59
+ const PROMPT_FILE = resolve(PKG_ROOT, 'src', 'prompt.ts');
60
+ const IMPROVEMENT_PROMPT_FILE = resolve(PKG_ROOT, 'improvement-prompt.md');
61
+ const RUNNER_FILE = resolve(PKG_ROOT, '.improve-runner.ts');
62
+
63
+ const EXEC_OPTS: ExecSyncOptions = {
64
+ cwd: PKG_ROOT,
65
+ env: { ...process.env },
66
+ maxBuffer: 50 * 1024 * 1024,
67
+ };
68
+
69
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
70
+ function log(msg: string) {
71
+ console.log(` ${msg}`);
72
+ }
73
+
74
+ function heading(msg: string) {
75
+ console.log(`\n${'─'.repeat(60)}`);
76
+ console.log(` ${msg}`);
77
+ console.log(`${'─'.repeat(60)}\n`);
78
+ }
79
+
80
+ // ─── Clean scenario outputs ─────────────────────────────────────────────────
81
+ function cleanScenarioOutputs(scenarios: string[]): void {
82
+ for (const scenario of scenarios) {
83
+ const outputDir = resolve(OUTPUTS_DIR, scenario);
84
+ if (existsSync(outputDir)) {
85
+ rmSync(outputDir, { recursive: true, force: true });
86
+ }
87
+ }
88
+ log('Cleared previous scenario outputs.');
89
+ }
90
+
91
+ // ─── Discover scenarios ──────────────────────────────────────────────────────
92
+ function discoverScenarios(): string[] {
93
+ if (!existsSync(INPUTS_DIR)) {
94
+ return [];
95
+ }
96
+
97
+ const entries = readdirSync(INPUTS_DIR);
98
+ const scenarios: string[] = [];
99
+
100
+ for (const entry of entries) {
101
+ const entryPath = resolve(INPUTS_DIR, entry);
102
+ if (!statSync(entryPath).isDirectory()) continue;
103
+ const specPath = resolve(entryPath, 'spec.json');
104
+ if (existsSync(specPath)) {
105
+ scenarios.push(entry);
106
+ }
107
+ }
108
+
109
+ return scenarios.sort();
110
+ }
111
+
112
+ // ─── Validate ────────────────────────────────────────────────────────────────
113
+ function validate(): string[] {
114
+ const scenarios = discoverScenarios();
115
+
116
+ if (scenarios.length === 0) {
117
+ console.error('\n ERROR: No scenarios found in inputs/');
118
+ console.error(' Create subdirectories with spec.json files:');
119
+ console.error(' inputs/action-button/spec.json');
120
+ console.error(' inputs/data-card/spec.json');
121
+ console.error(' inputs/search-input/spec.json\n');
122
+ process.exit(1);
123
+ }
124
+
125
+ if (scenarios.length < 3) {
126
+ console.warn(`\n WARNING: Only ${scenarios.length} scenario(s) found.`);
127
+ console.warn(' Minimum 3 diverse scenarios recommended to prevent overfitting.\n');
128
+ }
129
+
130
+ if (!existsSync(PROMPT_FILE)) {
131
+ console.error(`\n ERROR: Prompt file not found at ${PROMPT_FILE}\n`);
132
+ process.exit(1);
133
+ }
134
+ if (!existsSync(IMPROVEMENT_PROMPT_FILE)) {
135
+ console.error(`\n ERROR: Improvement prompt not found at ${IMPROVEMENT_PROMPT_FILE}\n`);
136
+ process.exit(1);
137
+ }
138
+
139
+ log(`Found ${scenarios.length} scenario(s): ${scenarios.join(', ')}`);
140
+ return scenarios;
141
+ }
142
+
143
+ // ─── Run agent for one scenario ──────────────────────────────────────────────
144
+ function runAgentForScenario(scenario: string): void {
145
+ log(` Running agent on ${scenario}...`);
146
+
147
+ const outputDir = resolve(OUTPUTS_DIR, scenario);
148
+ mkdirSync(outputDir, { recursive: true });
149
+
150
+ // The runner imports the generators directly, runs the 3-parallel + reconcile
151
+ // pipeline, and writes the outputs.
152
+ const runnerCode = `
153
+ import 'dotenv/config';
154
+ import { readFileSync, writeFileSync } from 'fs';
155
+ import { generateComponentFile } from './src/generate-component.js';
156
+ import { generateTestFile } from './src/generate-test.js';
157
+ import { generateStoryFile } from './src/generate-story.js';
158
+ import { reconcile } from './src/reconcile.js';
159
+
160
+ const spec = JSON.parse(readFileSync('inputs/${scenario}/spec.json', 'utf-8'));
161
+ const { componentName, specDeltas } = spec;
162
+
163
+ console.log(' Phase 1: Running 3 agents in parallel...');
164
+ const [componentCode, testCode, storyCode] = await Promise.all([
165
+ generateComponentFile({ componentName, specDeltas }),
166
+ generateTestFile({ componentName, specDeltas }),
167
+ generateStoryFile({ componentName, specDeltas }),
168
+ ]);
169
+
170
+ console.log(' Phase 2: Reconciling...');
171
+ const reconciled = await reconcile({
172
+ componentName,
173
+ specDeltas,
174
+ componentCode,
175
+ testCode,
176
+ storyCode,
177
+ });
178
+
179
+ writeFileSync('outputs/${scenario}/component.tsx', reconciled.componentCode);
180
+ writeFileSync('outputs/${scenario}/test.tsx', testCode);
181
+ writeFileSync('outputs/${scenario}/story.tsx', reconciled.storyCode);
182
+
183
+ console.log(' -> Wrote component.tsx, test.tsx, story.tsx');
184
+ `;
185
+
186
+ writeFileSync(RUNNER_FILE, runnerCode);
187
+
188
+ try {
189
+ execSync('npx tsx .improve-runner.ts', { ...EXEC_OPTS, stdio: 'inherit' });
190
+ } finally {
191
+ try {
192
+ unlinkSync(RUNNER_FILE);
193
+ } catch {}
194
+ }
195
+ }
196
+
197
+ // ─── Evaluate one scenario with Claude CLI ───────────────────────────────────
198
+ function evaluateScenario(scenario: string): { totalScore: number; raw: any } | null {
199
+ log(` Evaluating ${scenario}...`);
200
+
201
+ const improvementPrompt = readFileSync(IMPROVEMENT_PROMPT_FILE, 'utf-8');
202
+ const specInput = readFileSync(resolve(INPUTS_DIR, scenario, 'spec.json'), 'utf-8');
203
+ const componentOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'component.tsx'), 'utf-8');
204
+ const testOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'test.tsx'), 'utf-8');
205
+ const storyOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'story.tsx'), 'utf-8');
206
+ const agentPrompts = readFileSync(PROMPT_FILE, 'utf-8');
207
+
208
+ const fullPrompt = [
209
+ improvementPrompt,
210
+ '',
211
+ '---',
212
+ '',
213
+ `## Input: Spec Deltas (scenario: ${scenario})`,
214
+ '',
215
+ '```json',
216
+ specInput,
217
+ '```',
218
+ '',
219
+ '## Output: Generated Component',
220
+ '',
221
+ '```tsx',
222
+ componentOutput,
223
+ '```',
224
+ '',
225
+ '## Output: Generated Test',
226
+ '',
227
+ '```tsx',
228
+ testOutput,
229
+ '```',
230
+ '',
231
+ '## Output: Generated Story',
232
+ '',
233
+ '```tsx',
234
+ storyOutput,
235
+ '```',
236
+ '',
237
+ '## Agent System Prompts (src/prompt.ts)',
238
+ '',
239
+ '```typescript',
240
+ agentPrompts,
241
+ '```',
242
+ '',
243
+ 'Now evaluate the output against the rubric. Respond with ONLY the JSON scorecard — no markdown fences, no explanation, just the raw JSON object.',
244
+ ].join('\n');
245
+
246
+ let result: string;
247
+ try {
248
+ result = execSync(`claude -p --model ${CLAUDE_MODEL}`, {
249
+ ...EXEC_OPTS,
250
+ input: fullPrompt,
251
+ encoding: 'utf-8',
252
+ stdio: ['pipe', 'pipe', 'pipe'],
253
+ });
254
+ } catch (err: any) {
255
+ console.error(` Claude CLI evaluation failed for ${scenario}:`, err.message);
256
+ return null;
257
+ }
258
+
259
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
260
+ if (!jsonMatch) {
261
+ console.error(` Failed to parse evaluation JSON for ${scenario}`);
262
+ return null;
263
+ }
264
+
265
+ try {
266
+ const parsed = JSON.parse(jsonMatch[0]);
267
+ return { totalScore: parsed.totalScore ?? 0, raw: parsed };
268
+ } catch (err: any) {
269
+ console.error(` JSON parse error for ${scenario}:`, err.message);
270
+ return null;
271
+ }
272
+ }
273
+
274
+ // ─── Aggregate improvements across scenarios ─────────────────────────────────
275
+ function aggregateImprovements(evaluations: { scenario: string; eval: any }[]): any[] {
276
+ const allImprovements: { improvement: any; scenario: string }[] = [];
277
+ for (const e of evaluations) {
278
+ for (const imp of e.eval.promptImprovements ?? []) {
279
+ allImprovements.push({ improvement: imp, scenario: e.scenario });
280
+ }
281
+ }
282
+
283
+ // Deduplicate by category + priority — prefer improvements that appear in multiple scenarios
284
+ const seen = new Map<string, { improvement: any; count: number; scenarios: string[] }>();
285
+
286
+ for (const { improvement, scenario } of allImprovements) {
287
+ const key = `${improvement.category}::${improvement.priority}::${improvement.desiredBehavior?.slice(0, 80)}`;
288
+ const existing = seen.get(key);
289
+ if (existing) {
290
+ existing.count++;
291
+ existing.scenarios.push(scenario);
292
+ } else {
293
+ seen.set(key, { improvement, count: 1, scenarios: [scenario] });
294
+ }
295
+ }
296
+
297
+ // Sort: improvements that appear across MORE scenarios come first (more generalizable)
298
+ const sorted = [...seen.values()].sort(
299
+ (a, b) => b.count - a.count || priorityOrder(a.improvement.priority) - priorityOrder(b.improvement.priority),
300
+ );
301
+
302
+ // Take top 10
303
+ return sorted.slice(0, 10).map((s) => ({
304
+ ...s.improvement,
305
+ _appearsInScenarios: s.scenarios,
306
+ _frequency: s.count,
307
+ }));
308
+ }
309
+
310
+ function priorityOrder(p: string): number {
311
+ switch (p) {
312
+ case 'critical':
313
+ return 0;
314
+ case 'high':
315
+ return 1;
316
+ case 'medium':
317
+ return 2;
318
+ case 'low':
319
+ return 3;
320
+ default:
321
+ return 4;
322
+ }
323
+ }
324
+
325
+ // ─── Apply improvements ─────────────────────────────────────────────────────
326
+ function applyImprovements(improvements: any[]): boolean {
327
+ if (improvements.length === 0) {
328
+ log('No improvements to apply.');
329
+ return false;
330
+ }
331
+
332
+ log(`Applying ${improvements.length} aggregated improvements...`);
333
+
334
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
335
+ const backupPath = resolve(OUTPUTS_DIR, `prompt.backup.${timestamp}.ts`);
336
+ mkdirSync(OUTPUTS_DIR, { recursive: true });
337
+ copyFileSync(PROMPT_FILE, backupPath);
338
+ log(`Backed up prompt to outputs/prompt.backup.${timestamp}.ts`);
339
+
340
+ const currentPrompt = readFileSync(PROMPT_FILE, 'utf-8');
341
+
342
+ const improvementList = improvements
343
+ .map((imp: any, i: number) => {
344
+ const freq =
345
+ imp._frequency > 1
346
+ ? ` (appeared in ${imp._frequency} scenarios: ${imp._appearsInScenarios?.join(', ')})`
347
+ : ` (from scenario: ${imp._appearsInScenarios?.[0]})`;
348
+ return (
349
+ `### Improvement ${i + 1} [${imp.priority}] — ${imp.category}${freq}\n` +
350
+ `**Current behavior:** ${imp.currentBehavior}\n` +
351
+ `**Desired behavior:** ${imp.desiredBehavior}\n` +
352
+ `**Suggested change:** ${imp.suggestedPromptChange}\n`
353
+ );
354
+ })
355
+ .join('\n');
356
+
357
+ const applyPrompt = [
358
+ 'You are modifying an LLM system prompt file to improve the quality of its output.',
359
+ 'Your job is to apply specific, targeted improvements. Be surgical.',
360
+ '',
361
+ 'CRITICAL CONSTRAINT: This prompt must remain COMPONENT-AGNOSTIC. It will be used',
362
+ 'to generate ANY React component (buttons, tables, modals, forms, dashboards, etc).',
363
+ 'Do NOT introduce any component-specific vocabulary, prop names, or examples tied',
364
+ 'to a particular component type. Every change must be expressed as a structural',
365
+ 'pattern that works universally.',
366
+ '',
367
+ 'The file contains FOUR system prompts — one per agent (Frontend Agent, Tester Agent,',
368
+ 'Story Agent, Reconciler). Each improvement specifies which agent it targets. Apply',
369
+ "changes only to the targeted agent's prompt sections.",
370
+ '',
371
+ '## Current prompt file (src/prompt.ts)',
372
+ '',
373
+ '```typescript',
374
+ currentPrompt,
375
+ '```',
376
+ '',
377
+ '## Improvements to apply (ordered by cross-scenario frequency and priority)',
378
+ '',
379
+ improvementList,
380
+ '',
381
+ '## Instructions',
382
+ '',
383
+ "1. Apply each improvement to the correct agent's prompt section.",
384
+ '2. Be surgical — make minimum changes to address each improvement.',
385
+ '3. Do NOT restructure, reformat, or rewrite sections that are not targeted.',
386
+ '4. Preserve all existing rules, formatting, and structure.',
387
+ '5. Keep the file as valid TypeScript — do not break string literals, template literals, or exports.',
388
+ '6. NEVER introduce component-specific vocabulary into the prompt.',
389
+ '7. Express every new rule as a structural pattern, not a component instance.',
390
+ "8. Preserve the persona paragraphs at the start of each agent's preamble.",
391
+ '',
392
+ 'Output ONLY the complete modified file content.',
393
+ 'No markdown code fences. No explanations. No commentary.',
394
+ 'Start directly with the first line of the TypeScript file.',
395
+ ].join('\n');
396
+
397
+ let result: string;
398
+ try {
399
+ result = execSync(`claude -p --model ${CLAUDE_MODEL}`, {
400
+ ...EXEC_OPTS,
401
+ input: applyPrompt,
402
+ encoding: 'utf-8',
403
+ stdio: ['pipe', 'pipe', 'pipe'],
404
+ });
405
+ } catch (err: any) {
406
+ console.error(' Claude CLI prompt-improvement failed:', err.message);
407
+ return false;
408
+ }
409
+
410
+ let cleaned = result.trim();
411
+ if (cleaned.startsWith('```')) {
412
+ cleaned = cleaned.replace(/^```\w*\n?/, '').replace(/\n?```\s*$/, '');
413
+ }
414
+
415
+ if (cleaned.length < 500) {
416
+ console.error(' Modified prompt is suspiciously short — skipping write.');
417
+ return false;
418
+ }
419
+
420
+ writeFileSync(PROMPT_FILE, cleaned);
421
+ log('Prompt file updated.');
422
+ return true;
423
+ }
424
+
425
+ // ─── Print scorecard ─────────────────────────────────────────────────────────
426
+ function printAggregateScorecard(
427
+ results: { scenario: string; totalScore: number; eval: any }[],
428
+ avgScore: number,
429
+ ): void {
430
+ console.log('');
431
+ console.log(' ┌──────────────────────────────────────────────┐');
432
+ console.log(` │ AVERAGE SCORE: ${avgScore.toFixed(1).padStart(5)} / 100 │`);
433
+ console.log(' ├──────────────────────────────────────────────┤');
434
+
435
+ for (const r of results) {
436
+ const name = r.scenario.padEnd(30);
437
+ console.log(` │ ${name} ${String(r.totalScore).padStart(3)} / 100 │`);
438
+ }
439
+
440
+ console.log(' ├──────────────────────────────────────────────┤');
441
+
442
+ // Show per-category averages
443
+ const categories = new Map<string, { total: number; max: number; count: number }>();
444
+ for (const r of results) {
445
+ for (const [key, val] of Object.entries(r.eval.categories ?? {})) {
446
+ const v = val as any;
447
+ const existing = categories.get(key) ?? { total: 0, max: v.maxScore, count: 0 };
448
+ existing.total += v.score;
449
+ existing.count++;
450
+ categories.set(key, existing);
451
+ }
452
+ }
453
+ for (const [key, val] of categories) {
454
+ const avg = (val.total / val.count).toFixed(1);
455
+ const name = key.padEnd(30);
456
+ console.log(` │ ${name} ${avg.padStart(4)} / ${String(val.max).padEnd(4)}│`);
457
+ }
458
+
459
+ console.log(' └──────────────────────────────────────────────┘');
460
+ console.log('');
461
+ }
462
+
463
+ // ─── Save iteration artifacts ────────────────────────────────────────────────
464
+ function saveIteration(
465
+ iteration: number,
466
+ results: { scenario: string; totalScore: number; eval: any }[],
467
+ avgScore: number,
468
+ ): void {
469
+ const iterDir = resolve(OUTPUTS_DIR, `iteration-${iteration}`);
470
+ mkdirSync(iterDir, { recursive: true });
471
+
472
+ for (const r of results) {
473
+ const scenarioDir = resolve(iterDir, r.scenario);
474
+ mkdirSync(scenarioDir, { recursive: true });
475
+
476
+ // Copy generated files
477
+ for (const file of ['component.tsx', 'test.tsx', 'story.tsx']) {
478
+ const srcPath = resolve(OUTPUTS_DIR, r.scenario, file);
479
+ if (existsSync(srcPath)) {
480
+ copyFileSync(srcPath, resolve(scenarioDir, file));
481
+ }
482
+ }
483
+ writeFileSync(resolve(scenarioDir, 'evaluation.json'), JSON.stringify(r.eval, null, 2));
484
+ }
485
+
486
+ copyFileSync(PROMPT_FILE, resolve(iterDir, 'prompt.ts'));
487
+ writeFileSync(
488
+ resolve(iterDir, 'summary.json'),
489
+ JSON.stringify(
490
+ {
491
+ avgScore,
492
+ scores: results.map((r) => ({ scenario: r.scenario, score: r.totalScore })),
493
+ },
494
+ null,
495
+ 2,
496
+ ),
497
+ );
498
+ }
499
+
500
+ // ─── Main loop ───────────────────────────────────────────────────────────────
501
+ async function main() {
502
+ console.log('');
503
+ const agentModel = process.env.CUSTOM_PROVIDER_DEFAULT_MODEL ?? 'unknown';
504
+ const providerName = process.env.CUSTOM_PROVIDER_NAME ?? 'unknown';
505
+
506
+ console.log('╔══════════════════════════════════════════════════╗');
507
+ console.log('║ Component Implementor — Improvement Loop ║');
508
+ console.log(`║ Target: ${TARGET_SCORE}/100 avg Max iterations: ${MAX_ITERATIONS} ║`);
509
+ console.log('╠══════════════════════════════════════════════════╣');
510
+ console.log(`║ Agent model: ${agentModel.padEnd(32)}║`);
511
+ console.log(`║ Provider: ${providerName.padEnd(32)}║`);
512
+ console.log(`║ Eval model: claude (${CLAUDE_MODEL})${''.padEnd(Math.max(0, 22 - CLAUDE_MODEL.length))}║`);
513
+ console.log('╚══════════════════════════════════════════════════╝');
514
+
515
+ const scenarios = validate();
516
+
517
+ for (let i = 1; i <= MAX_ITERATIONS; i++) {
518
+ heading(`Iteration ${i} / ${MAX_ITERATIONS}`);
519
+
520
+ // 1. Clean previous outputs — each iteration starts fresh
521
+ cleanScenarioOutputs(scenarios);
522
+
523
+ // 2. Run agent on all scenarios
524
+ log('Running agent on all scenarios...');
525
+ for (const scenario of scenarios) {
526
+ try {
527
+ runAgentForScenario(scenario);
528
+ } catch (err: any) {
529
+ console.error(` Agent failed on ${scenario}: ${err.message}`);
530
+ }
531
+ }
532
+
533
+ // 3. Evaluate all scenarios
534
+ log('\nEvaluating all scenarios...');
535
+ const results: { scenario: string; totalScore: number; eval: any }[] = [];
536
+
537
+ for (const scenario of scenarios) {
538
+ const outputPath = resolve(OUTPUTS_DIR, scenario, 'component.tsx');
539
+ if (!existsSync(outputPath)) {
540
+ log(` Skipping ${scenario} — no output generated.`);
541
+ continue;
542
+ }
543
+
544
+ const evalResult = evaluateScenario(scenario);
545
+ if (evalResult) {
546
+ results.push({ scenario, totalScore: evalResult.totalScore, eval: evalResult.raw });
547
+ }
548
+ }
549
+
550
+ if (results.length === 0) {
551
+ console.error(' No scenarios evaluated successfully.');
552
+ continue;
553
+ }
554
+
555
+ // 4. Aggregate scores
556
+ const avgScore = results.reduce((sum, r) => sum + r.totalScore, 0) / results.length;
557
+ const minScore = Math.min(...results.map((r) => r.totalScore));
558
+
559
+ printAggregateScorecard(results, avgScore);
560
+ saveIteration(i, results, avgScore);
561
+
562
+ // 5. Check target
563
+ if (avgScore >= TARGET_SCORE) {
564
+ log(`Target average score reached! (${avgScore.toFixed(1)} >= ${TARGET_SCORE})`);
565
+ log(`Min score: ${minScore} across ${results.length} scenarios.`);
566
+ log('Final outputs in outputs/<scenario>/');
567
+ break;
568
+ }
569
+
570
+ log(`Average ${avgScore.toFixed(1)} < ${TARGET_SCORE} target (min: ${minScore}).`);
571
+
572
+ // 6. Aggregate improvements from all scenarios and apply
573
+ if (i < MAX_ITERATIONS) {
574
+ const improvements = aggregateImprovements(results);
575
+ log(`\n ${improvements.length} aggregated improvements (cross-scenario deduped)`);
576
+
577
+ const improved = applyImprovements(improvements);
578
+ if (improved) {
579
+ log('Prompt updated — next iteration will use the improved prompt.');
580
+ } else {
581
+ log('Could not apply improvements — retrying with current prompt.');
582
+ }
583
+ } else {
584
+ log('Max iterations reached. Best outputs saved.');
585
+ }
586
+ }
587
+ }
588
+
589
+ main().catch((err) => {
590
+ console.error('Fatal error:', err);
591
+ process.exit(1);
592
+ });
@@ -55,12 +55,17 @@ describe('implement-component', () => {
55
55
  });
56
56
 
57
57
  describe('handleImplementComponent', () => {
58
- it('generates 3 files and returns ComponentImplemented on success', async () => {
58
+ it('generates 3 files via parallel agents + reconciliation and returns ComponentImplemented', async () => {
59
59
  const mockGenerateText = vi.mocked(generateText);
60
+ // Phase 1: 3 parallel calls (component, test, story)
60
61
  mockGenerateText
61
- .mockResolvedValueOnce({ text: 'test file code' } as Awaited<ReturnType<typeof generateText>>)
62
62
  .mockResolvedValueOnce({ text: 'component file code' } as Awaited<ReturnType<typeof generateText>>)
63
- .mockResolvedValueOnce({ text: 'story file code' } as Awaited<ReturnType<typeof generateText>>);
63
+ .mockResolvedValueOnce({ text: 'test file code' } as Awaited<ReturnType<typeof generateText>>)
64
+ .mockResolvedValueOnce({ text: 'story file code' } as Awaited<ReturnType<typeof generateText>>)
65
+ // Phase 2: reconciliation
66
+ .mockResolvedValueOnce({
67
+ text: '```tsx\nreconciled component\n```\n\n```tsx\nreconciled story\n```',
68
+ } as Awaited<ReturnType<typeof generateText>>);
64
69
 
65
70
  vi.mocked(existsSync).mockReturnValue(false);
66
71
  vi.mocked(writeFile).mockResolvedValue(undefined);
@@ -85,15 +90,19 @@ describe('implement-component', () => {
85
90
  correlationId: 'cor-1',
86
91
  });
87
92
 
93
+ expect(mockGenerateText).toHaveBeenCalledTimes(4);
88
94
  expect(writeFile).toHaveBeenCalledTimes(3);
89
95
  });
90
96
 
91
97
  it('reads existing component when modifying', async () => {
92
98
  const mockGenerateText = vi.mocked(generateText);
93
99
  mockGenerateText
94
- .mockResolvedValueOnce({ text: 'test code' } as Awaited<ReturnType<typeof generateText>>)
95
100
  .mockResolvedValueOnce({ text: 'component code' } as Awaited<ReturnType<typeof generateText>>)
96
- .mockResolvedValueOnce({ text: 'story code' } as Awaited<ReturnType<typeof generateText>>);
101
+ .mockResolvedValueOnce({ text: 'test code' } as Awaited<ReturnType<typeof generateText>>)
102
+ .mockResolvedValueOnce({ text: 'story code' } as Awaited<ReturnType<typeof generateText>>)
103
+ .mockResolvedValueOnce({
104
+ text: '```tsx\nrc\n```\n```tsx\nrs\n```',
105
+ } as Awaited<ReturnType<typeof generateText>>);
97
106
 
98
107
  vi.mocked(existsSync).mockReturnValue(true);
99
108
  vi.mocked(readFile).mockResolvedValue('existing component code' as never);
@@ -6,6 +6,7 @@ import createDebug from 'debug';
6
6
  import { generateComponentFile } from '../generate-component';
7
7
  import { generateStoryFile } from '../generate-story';
8
8
  import { generateTestFile } from '../generate-test';
9
+ import { reconcile } from '../reconcile';
9
10
 
10
11
  const debug = createDebug('auto:component-implementor-react:command');
11
12
 
@@ -100,33 +101,28 @@ export async function handleImplementComponent(
100
101
  styling: payload.styling,
101
102
  };
102
103
 
103
- debug('Generating test file...');
104
- const testCode = await generateTestFile({
105
- componentName,
106
- specDeltas,
107
- existingComponent,
108
- });
104
+ debug('Running Frontend, Tester, and Story agents in parallel...');
105
+ const [componentCode, testCode, storyCode] = await Promise.all([
106
+ generateComponentFile({ componentName, specDeltas, existingComponent }),
107
+ generateTestFile({ componentName, specDeltas, existingComponent }),
108
+ generateStoryFile({ componentName, specDeltas }),
109
+ ]);
109
110
 
110
- debug('Generating component file...');
111
- const componentCode = await generateComponentFile({
111
+ debug('Reconciling component and story against tests...');
112
+ const reconciled = await reconcile({
112
113
  componentName,
113
114
  specDeltas,
115
+ componentCode,
114
116
  testCode,
117
+ storyCode,
115
118
  existingComponent,
116
119
  });
117
120
 
118
- debug('Generating story file...');
119
- const storyCode = await generateStoryFile({
120
- componentName,
121
- specDeltas,
122
- componentCode,
123
- });
124
-
125
121
  await mkdir(path.dirname(testPath), { recursive: true });
126
122
  await Promise.all([
127
123
  writeFile(testPath, testCode, 'utf-8'),
128
- writeFile(componentPath, componentCode, 'utf-8'),
129
- writeFile(storyPath, storyCode, 'utf-8'),
124
+ writeFile(componentPath, reconciled.componentCode, 'utf-8'),
125
+ writeFile(storyPath, reconciled.storyCode, 'utf-8'),
130
126
  ]);
131
127
 
132
128
  debug('Wrote 3 files for %s', componentName);