@auto-engineer/component-implementor-react 1.95.0 → 1.97.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/.turbo/turbo-test.log +6 -6
- package/.turbo/turbo-type-check.log +1 -1
- package/CHANGELOG.md +90 -0
- package/dist/src/commands/implement-component.d.ts.map +1 -1
- package/dist/src/commands/implement-component.js +13 -16
- package/dist/src/commands/implement-component.js.map +1 -1
- package/dist/src/commands/implement-component.test.js +14 -5
- package/dist/src/commands/implement-component.test.js.map +1 -1
- package/dist/src/extract-code-block.d.ts +1 -0
- package/dist/src/extract-code-block.d.ts.map +1 -1
- package/dist/src/extract-code-block.js +12 -0
- package/dist/src/extract-code-block.js.map +1 -1
- package/dist/src/extract-code-block.test.js +28 -1
- package/dist/src/extract-code-block.test.js.map +1 -1
- package/dist/src/generate-component.d.ts +2 -13
- package/dist/src/generate-component.d.ts.map +1 -1
- package/dist/src/generate-component.js +4 -29
- package/dist/src/generate-component.js.map +1 -1
- package/dist/src/generate-component.test.js +18 -22
- package/dist/src/generate-component.test.js.map +1 -1
- package/dist/src/generate-story.d.ts +2 -12
- package/dist/src/generate-story.d.ts.map +1 -1
- package/dist/src/generate-story.js +4 -25
- package/dist/src/generate-story.js.map +1 -1
- package/dist/src/generate-story.test.js +17 -21
- package/dist/src/generate-story.test.js.map +1 -1
- package/dist/src/generate-test.d.ts +2 -12
- package/dist/src/generate-test.d.ts.map +1 -1
- package/dist/src/generate-test.js +4 -28
- package/dist/src/generate-test.js.map +1 -1
- package/dist/src/generate-test.test.js +17 -6
- package/dist/src/generate-test.test.js.map +1 -1
- package/dist/src/prompt.d.ts +64 -0
- package/dist/src/prompt.d.ts.map +1 -0
- package/dist/src/prompt.js +481 -0
- package/dist/src/prompt.js.map +1 -0
- package/dist/src/prompt.test.d.ts +2 -0
- package/dist/src/prompt.test.d.ts.map +1 -0
- package/dist/src/prompt.test.js +136 -0
- package/dist/src/prompt.test.js.map +1 -0
- package/dist/src/reconcile.d.ts +8 -0
- package/dist/src/reconcile.d.ts.map +1 -0
- package/dist/src/reconcile.js +18 -0
- package/dist/src/reconcile.js.map +1 -0
- package/dist/src/reconcile.test.d.ts +2 -0
- package/dist/src/reconcile.test.d.ts.map +1 -0
- package/dist/src/reconcile.test.js +108 -0
- package/dist/src/reconcile.test.js.map +1 -0
- package/dist/src/run.d.ts +2 -0
- package/dist/src/run.d.ts.map +1 -0
- package/dist/src/run.js +86 -0
- package/dist/src/run.js.map +1 -0
- package/dist/src/spec-contract.d.ts +9 -0
- package/dist/src/spec-contract.d.ts.map +1 -0
- package/dist/src/spec-contract.js +16 -0
- package/dist/src/spec-contract.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/improvement-prompt.md +208 -0
- package/inputs/action-button/spec.json +50 -0
- package/inputs/command-palette/spec.json +62 -0
- package/inputs/data-card/spec.json +59 -0
- package/inputs/editable-data-table/spec.json +70 -0
- package/inputs/multi-step-form/spec.json +66 -0
- package/inputs/notification-center/spec.json +67 -0
- package/inputs/search-input/spec.json +62 -0
- package/inputs/status-badge/spec.json +46 -0
- package/package.json +4 -3
- package/scripts/improve.ts +592 -0
- package/src/commands/implement-component.test.ts +14 -5
- package/src/commands/implement-component.ts +13 -17
- package/src/extract-code-block.test.ts +33 -1
- package/src/extract-code-block.ts +13 -0
- package/src/generate-component.test.ts +22 -26
- package/src/generate-component.ts +5 -46
- package/src/generate-story.test.ts +17 -21
- package/src/generate-story.ts +5 -40
- package/src/generate-test.test.ts +22 -7
- package/src/generate-test.ts +5 -44
- package/src/prompt.test.ts +163 -0
- package/src/prompt.ts +581 -0
- package/src/reconcile.test.ts +127 -0
- package/src/reconcile.ts +27 -0
- package/src/run.ts +106 -0
- package/src/spec-contract.ts +22 -0
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Improvement Loop for the Component Implementor React Agent
|
|
3
|
+
*
|
|
4
|
+
* Runs the 4-agent pipeline (component + test + story in parallel, then reconcile)
|
|
5
|
+
* against MULTIPLE spec scenarios, evaluates each with Claude CLI, aggregates
|
|
6
|
+
* scores, and iteratively improves the agents' system prompts until average
|
|
7
|
+
* quality reaches the target.
|
|
8
|
+
*
|
|
9
|
+
* Using multiple diverse scenarios prevents overfitting the prompts to
|
|
10
|
+
* a single component type. The improvement prompt enforces that all suggested
|
|
11
|
+
* changes are component-agnostic structural patterns.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* npx tsx scripts/improve.ts
|
|
15
|
+
*
|
|
16
|
+
* Input structure (minimum 3 scenarios recommended):
|
|
17
|
+
* inputs/
|
|
18
|
+
* action-button/
|
|
19
|
+
* spec.json
|
|
20
|
+
* status-badge/
|
|
21
|
+
* spec.json
|
|
22
|
+
* data-card/
|
|
23
|
+
* spec.json
|
|
24
|
+
*
|
|
25
|
+
* Environment:
|
|
26
|
+
* MAX_ITERATIONS — max improvement cycles (default: 5)
|
|
27
|
+
* TARGET_SCORE — minimum average score (default: 90)
|
|
28
|
+
* CLAUDE_MODEL — Claude CLI model flag (default: opus)
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import 'dotenv/config';
|
|
32
|
+
import { type ExecSyncOptions, execSync } from 'child_process';
|
|
33
|
+
import {
|
|
34
|
+
copyFileSync,
|
|
35
|
+
existsSync,
|
|
36
|
+
mkdirSync,
|
|
37
|
+
readdirSync,
|
|
38
|
+
readFileSync,
|
|
39
|
+
rmSync,
|
|
40
|
+
statSync,
|
|
41
|
+
unlinkSync,
|
|
42
|
+
writeFileSync,
|
|
43
|
+
} from 'fs';
|
|
44
|
+
import { dirname, resolve } from 'path';
|
|
45
|
+
import { fileURLToPath } from 'url';
|
|
46
|
+
|
|
47
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
48
|
+
const __dirname = dirname(__filename);
|
|
49
|
+
const PKG_ROOT = resolve(__dirname, '..');
|
|
50
|
+
|
|
51
|
+
// ─── Config ──────────────────────────────────────────────────────────────────
|
|
52
|
+
const MAX_ITERATIONS = parseInt(process.env.MAX_ITERATIONS ?? '5', 10);
|
|
53
|
+
const TARGET_SCORE = parseInt(process.env.TARGET_SCORE ?? '90', 10);
|
|
54
|
+
const CLAUDE_MODEL = process.env.CLAUDE_MODEL ?? 'opus';
|
|
55
|
+
|
|
56
|
+
// ─── Paths ───────────────────────────────────────────────────────────────────
|
|
57
|
+
const INPUTS_DIR = resolve(PKG_ROOT, 'inputs');
|
|
58
|
+
const OUTPUTS_DIR = resolve(PKG_ROOT, 'outputs');
|
|
59
|
+
const PROMPT_FILE = resolve(PKG_ROOT, 'src', 'prompt.ts');
|
|
60
|
+
const IMPROVEMENT_PROMPT_FILE = resolve(PKG_ROOT, 'improvement-prompt.md');
|
|
61
|
+
const RUNNER_FILE = resolve(PKG_ROOT, '.improve-runner.ts');
|
|
62
|
+
|
|
63
|
+
const EXEC_OPTS: ExecSyncOptions = {
|
|
64
|
+
cwd: PKG_ROOT,
|
|
65
|
+
env: { ...process.env },
|
|
66
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
70
|
+
function log(msg: string) {
|
|
71
|
+
console.log(` ${msg}`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function heading(msg: string) {
|
|
75
|
+
console.log(`\n${'─'.repeat(60)}`);
|
|
76
|
+
console.log(` ${msg}`);
|
|
77
|
+
console.log(`${'─'.repeat(60)}\n`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ─── Clean scenario outputs ─────────────────────────────────────────────────
|
|
81
|
+
function cleanScenarioOutputs(scenarios: string[]): void {
|
|
82
|
+
for (const scenario of scenarios) {
|
|
83
|
+
const outputDir = resolve(OUTPUTS_DIR, scenario);
|
|
84
|
+
if (existsSync(outputDir)) {
|
|
85
|
+
rmSync(outputDir, { recursive: true, force: true });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
log('Cleared previous scenario outputs.');
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ─── Discover scenarios ──────────────────────────────────────────────────────
|
|
92
|
+
function discoverScenarios(): string[] {
|
|
93
|
+
if (!existsSync(INPUTS_DIR)) {
|
|
94
|
+
return [];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const entries = readdirSync(INPUTS_DIR);
|
|
98
|
+
const scenarios: string[] = [];
|
|
99
|
+
|
|
100
|
+
for (const entry of entries) {
|
|
101
|
+
const entryPath = resolve(INPUTS_DIR, entry);
|
|
102
|
+
if (!statSync(entryPath).isDirectory()) continue;
|
|
103
|
+
const specPath = resolve(entryPath, 'spec.json');
|
|
104
|
+
if (existsSync(specPath)) {
|
|
105
|
+
scenarios.push(entry);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return scenarios.sort();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ─── Validate ────────────────────────────────────────────────────────────────
|
|
113
|
+
function validate(): string[] {
|
|
114
|
+
const scenarios = discoverScenarios();
|
|
115
|
+
|
|
116
|
+
if (scenarios.length === 0) {
|
|
117
|
+
console.error('\n ERROR: No scenarios found in inputs/');
|
|
118
|
+
console.error(' Create subdirectories with spec.json files:');
|
|
119
|
+
console.error(' inputs/action-button/spec.json');
|
|
120
|
+
console.error(' inputs/data-card/spec.json');
|
|
121
|
+
console.error(' inputs/search-input/spec.json\n');
|
|
122
|
+
process.exit(1);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (scenarios.length < 3) {
|
|
126
|
+
console.warn(`\n WARNING: Only ${scenarios.length} scenario(s) found.`);
|
|
127
|
+
console.warn(' Minimum 3 diverse scenarios recommended to prevent overfitting.\n');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (!existsSync(PROMPT_FILE)) {
|
|
131
|
+
console.error(`\n ERROR: Prompt file not found at ${PROMPT_FILE}\n`);
|
|
132
|
+
process.exit(1);
|
|
133
|
+
}
|
|
134
|
+
if (!existsSync(IMPROVEMENT_PROMPT_FILE)) {
|
|
135
|
+
console.error(`\n ERROR: Improvement prompt not found at ${IMPROVEMENT_PROMPT_FILE}\n`);
|
|
136
|
+
process.exit(1);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
log(`Found ${scenarios.length} scenario(s): ${scenarios.join(', ')}`);
|
|
140
|
+
return scenarios;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ─── Run agent for one scenario ──────────────────────────────────────────────
|
|
144
|
+
function runAgentForScenario(scenario: string): void {
|
|
145
|
+
log(` Running agent on ${scenario}...`);
|
|
146
|
+
|
|
147
|
+
const outputDir = resolve(OUTPUTS_DIR, scenario);
|
|
148
|
+
mkdirSync(outputDir, { recursive: true });
|
|
149
|
+
|
|
150
|
+
// The runner imports the generators directly, runs the 3-parallel + reconcile
|
|
151
|
+
// pipeline, and writes the outputs.
|
|
152
|
+
const runnerCode = `
|
|
153
|
+
import 'dotenv/config';
|
|
154
|
+
import { readFileSync, writeFileSync } from 'fs';
|
|
155
|
+
import { generateComponentFile } from './src/generate-component.js';
|
|
156
|
+
import { generateTestFile } from './src/generate-test.js';
|
|
157
|
+
import { generateStoryFile } from './src/generate-story.js';
|
|
158
|
+
import { reconcile } from './src/reconcile.js';
|
|
159
|
+
|
|
160
|
+
const spec = JSON.parse(readFileSync('inputs/${scenario}/spec.json', 'utf-8'));
|
|
161
|
+
const { componentName, specDeltas } = spec;
|
|
162
|
+
|
|
163
|
+
console.log(' Phase 1: Running 3 agents in parallel...');
|
|
164
|
+
const [componentCode, testCode, storyCode] = await Promise.all([
|
|
165
|
+
generateComponentFile({ componentName, specDeltas }),
|
|
166
|
+
generateTestFile({ componentName, specDeltas }),
|
|
167
|
+
generateStoryFile({ componentName, specDeltas }),
|
|
168
|
+
]);
|
|
169
|
+
|
|
170
|
+
console.log(' Phase 2: Reconciling...');
|
|
171
|
+
const reconciled = await reconcile({
|
|
172
|
+
componentName,
|
|
173
|
+
specDeltas,
|
|
174
|
+
componentCode,
|
|
175
|
+
testCode,
|
|
176
|
+
storyCode,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
writeFileSync('outputs/${scenario}/component.tsx', reconciled.componentCode);
|
|
180
|
+
writeFileSync('outputs/${scenario}/test.tsx', testCode);
|
|
181
|
+
writeFileSync('outputs/${scenario}/story.tsx', reconciled.storyCode);
|
|
182
|
+
|
|
183
|
+
console.log(' -> Wrote component.tsx, test.tsx, story.tsx');
|
|
184
|
+
`;
|
|
185
|
+
|
|
186
|
+
writeFileSync(RUNNER_FILE, runnerCode);
|
|
187
|
+
|
|
188
|
+
try {
|
|
189
|
+
execSync('npx tsx .improve-runner.ts', { ...EXEC_OPTS, stdio: 'inherit' });
|
|
190
|
+
} finally {
|
|
191
|
+
try {
|
|
192
|
+
unlinkSync(RUNNER_FILE);
|
|
193
|
+
} catch {}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// ─── Evaluate one scenario with Claude CLI ───────────────────────────────────
|
|
198
|
+
function evaluateScenario(scenario: string): { totalScore: number; raw: any } | null {
|
|
199
|
+
log(` Evaluating ${scenario}...`);
|
|
200
|
+
|
|
201
|
+
const improvementPrompt = readFileSync(IMPROVEMENT_PROMPT_FILE, 'utf-8');
|
|
202
|
+
const specInput = readFileSync(resolve(INPUTS_DIR, scenario, 'spec.json'), 'utf-8');
|
|
203
|
+
const componentOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'component.tsx'), 'utf-8');
|
|
204
|
+
const testOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'test.tsx'), 'utf-8');
|
|
205
|
+
const storyOutput = readFileSync(resolve(OUTPUTS_DIR, scenario, 'story.tsx'), 'utf-8');
|
|
206
|
+
const agentPrompts = readFileSync(PROMPT_FILE, 'utf-8');
|
|
207
|
+
|
|
208
|
+
const fullPrompt = [
|
|
209
|
+
improvementPrompt,
|
|
210
|
+
'',
|
|
211
|
+
'---',
|
|
212
|
+
'',
|
|
213
|
+
`## Input: Spec Deltas (scenario: ${scenario})`,
|
|
214
|
+
'',
|
|
215
|
+
'```json',
|
|
216
|
+
specInput,
|
|
217
|
+
'```',
|
|
218
|
+
'',
|
|
219
|
+
'## Output: Generated Component',
|
|
220
|
+
'',
|
|
221
|
+
'```tsx',
|
|
222
|
+
componentOutput,
|
|
223
|
+
'```',
|
|
224
|
+
'',
|
|
225
|
+
'## Output: Generated Test',
|
|
226
|
+
'',
|
|
227
|
+
'```tsx',
|
|
228
|
+
testOutput,
|
|
229
|
+
'```',
|
|
230
|
+
'',
|
|
231
|
+
'## Output: Generated Story',
|
|
232
|
+
'',
|
|
233
|
+
'```tsx',
|
|
234
|
+
storyOutput,
|
|
235
|
+
'```',
|
|
236
|
+
'',
|
|
237
|
+
'## Agent System Prompts (src/prompt.ts)',
|
|
238
|
+
'',
|
|
239
|
+
'```typescript',
|
|
240
|
+
agentPrompts,
|
|
241
|
+
'```',
|
|
242
|
+
'',
|
|
243
|
+
'Now evaluate the output against the rubric. Respond with ONLY the JSON scorecard — no markdown fences, no explanation, just the raw JSON object.',
|
|
244
|
+
].join('\n');
|
|
245
|
+
|
|
246
|
+
let result: string;
|
|
247
|
+
try {
|
|
248
|
+
result = execSync(`claude -p --model ${CLAUDE_MODEL}`, {
|
|
249
|
+
...EXEC_OPTS,
|
|
250
|
+
input: fullPrompt,
|
|
251
|
+
encoding: 'utf-8',
|
|
252
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
253
|
+
});
|
|
254
|
+
} catch (err: any) {
|
|
255
|
+
console.error(` Claude CLI evaluation failed for ${scenario}:`, err.message);
|
|
256
|
+
return null;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
260
|
+
if (!jsonMatch) {
|
|
261
|
+
console.error(` Failed to parse evaluation JSON for ${scenario}`);
|
|
262
|
+
return null;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
try {
|
|
266
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
267
|
+
return { totalScore: parsed.totalScore ?? 0, raw: parsed };
|
|
268
|
+
} catch (err: any) {
|
|
269
|
+
console.error(` JSON parse error for ${scenario}:`, err.message);
|
|
270
|
+
return null;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// ─── Aggregate improvements across scenarios ─────────────────────────────────
|
|
275
|
+
function aggregateImprovements(evaluations: { scenario: string; eval: any }[]): any[] {
|
|
276
|
+
const allImprovements: { improvement: any; scenario: string }[] = [];
|
|
277
|
+
for (const e of evaluations) {
|
|
278
|
+
for (const imp of e.eval.promptImprovements ?? []) {
|
|
279
|
+
allImprovements.push({ improvement: imp, scenario: e.scenario });
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Deduplicate by category + priority — prefer improvements that appear in multiple scenarios
|
|
284
|
+
const seen = new Map<string, { improvement: any; count: number; scenarios: string[] }>();
|
|
285
|
+
|
|
286
|
+
for (const { improvement, scenario } of allImprovements) {
|
|
287
|
+
const key = `${improvement.category}::${improvement.priority}::${improvement.desiredBehavior?.slice(0, 80)}`;
|
|
288
|
+
const existing = seen.get(key);
|
|
289
|
+
if (existing) {
|
|
290
|
+
existing.count++;
|
|
291
|
+
existing.scenarios.push(scenario);
|
|
292
|
+
} else {
|
|
293
|
+
seen.set(key, { improvement, count: 1, scenarios: [scenario] });
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Sort: improvements that appear across MORE scenarios come first (more generalizable)
|
|
298
|
+
const sorted = [...seen.values()].sort(
|
|
299
|
+
(a, b) => b.count - a.count || priorityOrder(a.improvement.priority) - priorityOrder(b.improvement.priority),
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
// Take top 10
|
|
303
|
+
return sorted.slice(0, 10).map((s) => ({
|
|
304
|
+
...s.improvement,
|
|
305
|
+
_appearsInScenarios: s.scenarios,
|
|
306
|
+
_frequency: s.count,
|
|
307
|
+
}));
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function priorityOrder(p: string): number {
|
|
311
|
+
switch (p) {
|
|
312
|
+
case 'critical':
|
|
313
|
+
return 0;
|
|
314
|
+
case 'high':
|
|
315
|
+
return 1;
|
|
316
|
+
case 'medium':
|
|
317
|
+
return 2;
|
|
318
|
+
case 'low':
|
|
319
|
+
return 3;
|
|
320
|
+
default:
|
|
321
|
+
return 4;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// ─── Apply improvements ─────────────────────────────────────────────────────
|
|
326
|
+
function applyImprovements(improvements: any[]): boolean {
|
|
327
|
+
if (improvements.length === 0) {
|
|
328
|
+
log('No improvements to apply.');
|
|
329
|
+
return false;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
log(`Applying ${improvements.length} aggregated improvements...`);
|
|
333
|
+
|
|
334
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
335
|
+
const backupPath = resolve(OUTPUTS_DIR, `prompt.backup.${timestamp}.ts`);
|
|
336
|
+
mkdirSync(OUTPUTS_DIR, { recursive: true });
|
|
337
|
+
copyFileSync(PROMPT_FILE, backupPath);
|
|
338
|
+
log(`Backed up prompt to outputs/prompt.backup.${timestamp}.ts`);
|
|
339
|
+
|
|
340
|
+
const currentPrompt = readFileSync(PROMPT_FILE, 'utf-8');
|
|
341
|
+
|
|
342
|
+
const improvementList = improvements
|
|
343
|
+
.map((imp: any, i: number) => {
|
|
344
|
+
const freq =
|
|
345
|
+
imp._frequency > 1
|
|
346
|
+
? ` (appeared in ${imp._frequency} scenarios: ${imp._appearsInScenarios?.join(', ')})`
|
|
347
|
+
: ` (from scenario: ${imp._appearsInScenarios?.[0]})`;
|
|
348
|
+
return (
|
|
349
|
+
`### Improvement ${i + 1} [${imp.priority}] — ${imp.category}${freq}\n` +
|
|
350
|
+
`**Current behavior:** ${imp.currentBehavior}\n` +
|
|
351
|
+
`**Desired behavior:** ${imp.desiredBehavior}\n` +
|
|
352
|
+
`**Suggested change:** ${imp.suggestedPromptChange}\n`
|
|
353
|
+
);
|
|
354
|
+
})
|
|
355
|
+
.join('\n');
|
|
356
|
+
|
|
357
|
+
const applyPrompt = [
|
|
358
|
+
'You are modifying an LLM system prompt file to improve the quality of its output.',
|
|
359
|
+
'Your job is to apply specific, targeted improvements. Be surgical.',
|
|
360
|
+
'',
|
|
361
|
+
'CRITICAL CONSTRAINT: This prompt must remain COMPONENT-AGNOSTIC. It will be used',
|
|
362
|
+
'to generate ANY React component (buttons, tables, modals, forms, dashboards, etc).',
|
|
363
|
+
'Do NOT introduce any component-specific vocabulary, prop names, or examples tied',
|
|
364
|
+
'to a particular component type. Every change must be expressed as a structural',
|
|
365
|
+
'pattern that works universally.',
|
|
366
|
+
'',
|
|
367
|
+
'The file contains FOUR system prompts — one per agent (Frontend Agent, Tester Agent,',
|
|
368
|
+
'Story Agent, Reconciler). Each improvement specifies which agent it targets. Apply',
|
|
369
|
+
"changes only to the targeted agent's prompt sections.",
|
|
370
|
+
'',
|
|
371
|
+
'## Current prompt file (src/prompt.ts)',
|
|
372
|
+
'',
|
|
373
|
+
'```typescript',
|
|
374
|
+
currentPrompt,
|
|
375
|
+
'```',
|
|
376
|
+
'',
|
|
377
|
+
'## Improvements to apply (ordered by cross-scenario frequency and priority)',
|
|
378
|
+
'',
|
|
379
|
+
improvementList,
|
|
380
|
+
'',
|
|
381
|
+
'## Instructions',
|
|
382
|
+
'',
|
|
383
|
+
"1. Apply each improvement to the correct agent's prompt section.",
|
|
384
|
+
'2. Be surgical — make minimum changes to address each improvement.',
|
|
385
|
+
'3. Do NOT restructure, reformat, or rewrite sections that are not targeted.',
|
|
386
|
+
'4. Preserve all existing rules, formatting, and structure.',
|
|
387
|
+
'5. Keep the file as valid TypeScript — do not break string literals, template literals, or exports.',
|
|
388
|
+
'6. NEVER introduce component-specific vocabulary into the prompt.',
|
|
389
|
+
'7. Express every new rule as a structural pattern, not a component instance.',
|
|
390
|
+
"8. Preserve the persona paragraphs at the start of each agent's preamble.",
|
|
391
|
+
'',
|
|
392
|
+
'Output ONLY the complete modified file content.',
|
|
393
|
+
'No markdown code fences. No explanations. No commentary.',
|
|
394
|
+
'Start directly with the first line of the TypeScript file.',
|
|
395
|
+
].join('\n');
|
|
396
|
+
|
|
397
|
+
let result: string;
|
|
398
|
+
try {
|
|
399
|
+
result = execSync(`claude -p --model ${CLAUDE_MODEL}`, {
|
|
400
|
+
...EXEC_OPTS,
|
|
401
|
+
input: applyPrompt,
|
|
402
|
+
encoding: 'utf-8',
|
|
403
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
404
|
+
});
|
|
405
|
+
} catch (err: any) {
|
|
406
|
+
console.error(' Claude CLI prompt-improvement failed:', err.message);
|
|
407
|
+
return false;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
let cleaned = result.trim();
|
|
411
|
+
if (cleaned.startsWith('```')) {
|
|
412
|
+
cleaned = cleaned.replace(/^```\w*\n?/, '').replace(/\n?```\s*$/, '');
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (cleaned.length < 500) {
|
|
416
|
+
console.error(' Modified prompt is suspiciously short — skipping write.');
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
writeFileSync(PROMPT_FILE, cleaned);
|
|
421
|
+
log('Prompt file updated.');
|
|
422
|
+
return true;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// ─── Print scorecard ─────────────────────────────────────────────────────────
|
|
426
|
+
function printAggregateScorecard(
|
|
427
|
+
results: { scenario: string; totalScore: number; eval: any }[],
|
|
428
|
+
avgScore: number,
|
|
429
|
+
): void {
|
|
430
|
+
console.log('');
|
|
431
|
+
console.log(' ┌──────────────────────────────────────────────┐');
|
|
432
|
+
console.log(` │ AVERAGE SCORE: ${avgScore.toFixed(1).padStart(5)} / 100 │`);
|
|
433
|
+
console.log(' ├──────────────────────────────────────────────┤');
|
|
434
|
+
|
|
435
|
+
for (const r of results) {
|
|
436
|
+
const name = r.scenario.padEnd(30);
|
|
437
|
+
console.log(` │ ${name} ${String(r.totalScore).padStart(3)} / 100 │`);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
console.log(' ├──────────────────────────────────────────────┤');
|
|
441
|
+
|
|
442
|
+
// Show per-category averages
|
|
443
|
+
const categories = new Map<string, { total: number; max: number; count: number }>();
|
|
444
|
+
for (const r of results) {
|
|
445
|
+
for (const [key, val] of Object.entries(r.eval.categories ?? {})) {
|
|
446
|
+
const v = val as any;
|
|
447
|
+
const existing = categories.get(key) ?? { total: 0, max: v.maxScore, count: 0 };
|
|
448
|
+
existing.total += v.score;
|
|
449
|
+
existing.count++;
|
|
450
|
+
categories.set(key, existing);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
for (const [key, val] of categories) {
|
|
454
|
+
const avg = (val.total / val.count).toFixed(1);
|
|
455
|
+
const name = key.padEnd(30);
|
|
456
|
+
console.log(` │ ${name} ${avg.padStart(4)} / ${String(val.max).padEnd(4)}│`);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
console.log(' └──────────────────────────────────────────────┘');
|
|
460
|
+
console.log('');
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ─── Save iteration artifacts ────────────────────────────────────────────────
|
|
464
|
+
function saveIteration(
|
|
465
|
+
iteration: number,
|
|
466
|
+
results: { scenario: string; totalScore: number; eval: any }[],
|
|
467
|
+
avgScore: number,
|
|
468
|
+
): void {
|
|
469
|
+
const iterDir = resolve(OUTPUTS_DIR, `iteration-${iteration}`);
|
|
470
|
+
mkdirSync(iterDir, { recursive: true });
|
|
471
|
+
|
|
472
|
+
for (const r of results) {
|
|
473
|
+
const scenarioDir = resolve(iterDir, r.scenario);
|
|
474
|
+
mkdirSync(scenarioDir, { recursive: true });
|
|
475
|
+
|
|
476
|
+
// Copy generated files
|
|
477
|
+
for (const file of ['component.tsx', 'test.tsx', 'story.tsx']) {
|
|
478
|
+
const srcPath = resolve(OUTPUTS_DIR, r.scenario, file);
|
|
479
|
+
if (existsSync(srcPath)) {
|
|
480
|
+
copyFileSync(srcPath, resolve(scenarioDir, file));
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
writeFileSync(resolve(scenarioDir, 'evaluation.json'), JSON.stringify(r.eval, null, 2));
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
copyFileSync(PROMPT_FILE, resolve(iterDir, 'prompt.ts'));
|
|
487
|
+
writeFileSync(
|
|
488
|
+
resolve(iterDir, 'summary.json'),
|
|
489
|
+
JSON.stringify(
|
|
490
|
+
{
|
|
491
|
+
avgScore,
|
|
492
|
+
scores: results.map((r) => ({ scenario: r.scenario, score: r.totalScore })),
|
|
493
|
+
},
|
|
494
|
+
null,
|
|
495
|
+
2,
|
|
496
|
+
),
|
|
497
|
+
);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// ─── Main loop ───────────────────────────────────────────────────────────────
|
|
501
|
+
async function main() {
|
|
502
|
+
console.log('');
|
|
503
|
+
const agentModel = process.env.CUSTOM_PROVIDER_DEFAULT_MODEL ?? 'unknown';
|
|
504
|
+
const providerName = process.env.CUSTOM_PROVIDER_NAME ?? 'unknown';
|
|
505
|
+
|
|
506
|
+
console.log('╔══════════════════════════════════════════════════╗');
|
|
507
|
+
console.log('║ Component Implementor — Improvement Loop ║');
|
|
508
|
+
console.log(`║ Target: ${TARGET_SCORE}/100 avg Max iterations: ${MAX_ITERATIONS} ║`);
|
|
509
|
+
console.log('╠══════════════════════════════════════════════════╣');
|
|
510
|
+
console.log(`║ Agent model: ${agentModel.padEnd(32)}║`);
|
|
511
|
+
console.log(`║ Provider: ${providerName.padEnd(32)}║`);
|
|
512
|
+
console.log(`║ Eval model: claude (${CLAUDE_MODEL})${''.padEnd(Math.max(0, 22 - CLAUDE_MODEL.length))}║`);
|
|
513
|
+
console.log('╚══════════════════════════════════════════════════╝');
|
|
514
|
+
|
|
515
|
+
const scenarios = validate();
|
|
516
|
+
|
|
517
|
+
for (let i = 1; i <= MAX_ITERATIONS; i++) {
|
|
518
|
+
heading(`Iteration ${i} / ${MAX_ITERATIONS}`);
|
|
519
|
+
|
|
520
|
+
// 1. Clean previous outputs — each iteration starts fresh
|
|
521
|
+
cleanScenarioOutputs(scenarios);
|
|
522
|
+
|
|
523
|
+
// 2. Run agent on all scenarios
|
|
524
|
+
log('Running agent on all scenarios...');
|
|
525
|
+
for (const scenario of scenarios) {
|
|
526
|
+
try {
|
|
527
|
+
runAgentForScenario(scenario);
|
|
528
|
+
} catch (err: any) {
|
|
529
|
+
console.error(` Agent failed on ${scenario}: ${err.message}`);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// 3. Evaluate all scenarios
|
|
534
|
+
log('\nEvaluating all scenarios...');
|
|
535
|
+
const results: { scenario: string; totalScore: number; eval: any }[] = [];
|
|
536
|
+
|
|
537
|
+
for (const scenario of scenarios) {
|
|
538
|
+
const outputPath = resolve(OUTPUTS_DIR, scenario, 'component.tsx');
|
|
539
|
+
if (!existsSync(outputPath)) {
|
|
540
|
+
log(` Skipping ${scenario} — no output generated.`);
|
|
541
|
+
continue;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
const evalResult = evaluateScenario(scenario);
|
|
545
|
+
if (evalResult) {
|
|
546
|
+
results.push({ scenario, totalScore: evalResult.totalScore, eval: evalResult.raw });
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
if (results.length === 0) {
|
|
551
|
+
console.error(' No scenarios evaluated successfully.');
|
|
552
|
+
continue;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// 4. Aggregate scores
|
|
556
|
+
const avgScore = results.reduce((sum, r) => sum + r.totalScore, 0) / results.length;
|
|
557
|
+
const minScore = Math.min(...results.map((r) => r.totalScore));
|
|
558
|
+
|
|
559
|
+
printAggregateScorecard(results, avgScore);
|
|
560
|
+
saveIteration(i, results, avgScore);
|
|
561
|
+
|
|
562
|
+
// 5. Check target
|
|
563
|
+
if (avgScore >= TARGET_SCORE) {
|
|
564
|
+
log(`Target average score reached! (${avgScore.toFixed(1)} >= ${TARGET_SCORE})`);
|
|
565
|
+
log(`Min score: ${minScore} across ${results.length} scenarios.`);
|
|
566
|
+
log('Final outputs in outputs/<scenario>/');
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
log(`Average ${avgScore.toFixed(1)} < ${TARGET_SCORE} target (min: ${minScore}).`);
|
|
571
|
+
|
|
572
|
+
// 6. Aggregate improvements from all scenarios and apply
|
|
573
|
+
if (i < MAX_ITERATIONS) {
|
|
574
|
+
const improvements = aggregateImprovements(results);
|
|
575
|
+
log(`\n ${improvements.length} aggregated improvements (cross-scenario deduped)`);
|
|
576
|
+
|
|
577
|
+
const improved = applyImprovements(improvements);
|
|
578
|
+
if (improved) {
|
|
579
|
+
log('Prompt updated — next iteration will use the improved prompt.');
|
|
580
|
+
} else {
|
|
581
|
+
log('Could not apply improvements — retrying with current prompt.');
|
|
582
|
+
}
|
|
583
|
+
} else {
|
|
584
|
+
log('Max iterations reached. Best outputs saved.');
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
main().catch((err) => {
|
|
590
|
+
console.error('Fatal error:', err);
|
|
591
|
+
process.exit(1);
|
|
592
|
+
});
|
|
@@ -55,12 +55,17 @@ describe('implement-component', () => {
|
|
|
55
55
|
});
|
|
56
56
|
|
|
57
57
|
describe('handleImplementComponent', () => {
|
|
58
|
-
it('generates 3 files and returns ComponentImplemented
|
|
58
|
+
it('generates 3 files via parallel agents + reconciliation and returns ComponentImplemented', async () => {
|
|
59
59
|
const mockGenerateText = vi.mocked(generateText);
|
|
60
|
+
// Phase 1: 3 parallel calls (component, test, story)
|
|
60
61
|
mockGenerateText
|
|
61
|
-
.mockResolvedValueOnce({ text: 'test file code' } as Awaited<ReturnType<typeof generateText>>)
|
|
62
62
|
.mockResolvedValueOnce({ text: 'component file code' } as Awaited<ReturnType<typeof generateText>>)
|
|
63
|
-
.mockResolvedValueOnce({ text: '
|
|
63
|
+
.mockResolvedValueOnce({ text: 'test file code' } as Awaited<ReturnType<typeof generateText>>)
|
|
64
|
+
.mockResolvedValueOnce({ text: 'story file code' } as Awaited<ReturnType<typeof generateText>>)
|
|
65
|
+
// Phase 2: reconciliation
|
|
66
|
+
.mockResolvedValueOnce({
|
|
67
|
+
text: '```tsx\nreconciled component\n```\n\n```tsx\nreconciled story\n```',
|
|
68
|
+
} as Awaited<ReturnType<typeof generateText>>);
|
|
64
69
|
|
|
65
70
|
vi.mocked(existsSync).mockReturnValue(false);
|
|
66
71
|
vi.mocked(writeFile).mockResolvedValue(undefined);
|
|
@@ -85,15 +90,19 @@ describe('implement-component', () => {
|
|
|
85
90
|
correlationId: 'cor-1',
|
|
86
91
|
});
|
|
87
92
|
|
|
93
|
+
expect(mockGenerateText).toHaveBeenCalledTimes(4);
|
|
88
94
|
expect(writeFile).toHaveBeenCalledTimes(3);
|
|
89
95
|
});
|
|
90
96
|
|
|
91
97
|
it('reads existing component when modifying', async () => {
|
|
92
98
|
const mockGenerateText = vi.mocked(generateText);
|
|
93
99
|
mockGenerateText
|
|
94
|
-
.mockResolvedValueOnce({ text: 'test code' } as Awaited<ReturnType<typeof generateText>>)
|
|
95
100
|
.mockResolvedValueOnce({ text: 'component code' } as Awaited<ReturnType<typeof generateText>>)
|
|
96
|
-
.mockResolvedValueOnce({ text: '
|
|
101
|
+
.mockResolvedValueOnce({ text: 'test code' } as Awaited<ReturnType<typeof generateText>>)
|
|
102
|
+
.mockResolvedValueOnce({ text: 'story code' } as Awaited<ReturnType<typeof generateText>>)
|
|
103
|
+
.mockResolvedValueOnce({
|
|
104
|
+
text: '```tsx\nrc\n```\n```tsx\nrs\n```',
|
|
105
|
+
} as Awaited<ReturnType<typeof generateText>>);
|
|
97
106
|
|
|
98
107
|
vi.mocked(existsSync).mockReturnValue(true);
|
|
99
108
|
vi.mocked(readFile).mockResolvedValue('existing component code' as never);
|
|
@@ -6,6 +6,7 @@ import createDebug from 'debug';
|
|
|
6
6
|
import { generateComponentFile } from '../generate-component';
|
|
7
7
|
import { generateStoryFile } from '../generate-story';
|
|
8
8
|
import { generateTestFile } from '../generate-test';
|
|
9
|
+
import { reconcile } from '../reconcile';
|
|
9
10
|
|
|
10
11
|
const debug = createDebug('auto:component-implementor-react:command');
|
|
11
12
|
|
|
@@ -100,33 +101,28 @@ export async function handleImplementComponent(
|
|
|
100
101
|
styling: payload.styling,
|
|
101
102
|
};
|
|
102
103
|
|
|
103
|
-
debug('
|
|
104
|
-
const testCode = await
|
|
105
|
-
componentName,
|
|
106
|
-
specDeltas,
|
|
107
|
-
|
|
108
|
-
|
|
104
|
+
debug('Running Frontend, Tester, and Story agents in parallel...');
|
|
105
|
+
const [componentCode, testCode, storyCode] = await Promise.all([
|
|
106
|
+
generateComponentFile({ componentName, specDeltas, existingComponent }),
|
|
107
|
+
generateTestFile({ componentName, specDeltas, existingComponent }),
|
|
108
|
+
generateStoryFile({ componentName, specDeltas }),
|
|
109
|
+
]);
|
|
109
110
|
|
|
110
|
-
debug('
|
|
111
|
-
const
|
|
111
|
+
debug('Reconciling component and story against tests...');
|
|
112
|
+
const reconciled = await reconcile({
|
|
112
113
|
componentName,
|
|
113
114
|
specDeltas,
|
|
115
|
+
componentCode,
|
|
114
116
|
testCode,
|
|
117
|
+
storyCode,
|
|
115
118
|
existingComponent,
|
|
116
119
|
});
|
|
117
120
|
|
|
118
|
-
debug('Generating story file...');
|
|
119
|
-
const storyCode = await generateStoryFile({
|
|
120
|
-
componentName,
|
|
121
|
-
specDeltas,
|
|
122
|
-
componentCode,
|
|
123
|
-
});
|
|
124
|
-
|
|
125
121
|
await mkdir(path.dirname(testPath), { recursive: true });
|
|
126
122
|
await Promise.all([
|
|
127
123
|
writeFile(testPath, testCode, 'utf-8'),
|
|
128
|
-
writeFile(componentPath, componentCode, 'utf-8'),
|
|
129
|
-
writeFile(storyPath, storyCode, 'utf-8'),
|
|
124
|
+
writeFile(componentPath, reconciled.componentCode, 'utf-8'),
|
|
125
|
+
writeFile(storyPath, reconciled.storyCode, 'utf-8'),
|
|
130
126
|
]);
|
|
131
127
|
|
|
132
128
|
debug('Wrote 3 files for %s', componentName);
|