executant 1.19.0 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -8
- package/dist/index.js +22 -388
- package/package.json +1 -1
- package/dist/prompts/retrospective-analysis.txt +0 -304
package/README.md
CHANGED
|
@@ -119,7 +119,6 @@ steps:
|
|
|
119
119
|
|
|
120
120
|
- **`llm_as_judge: true`** — after a step completes, Claude evaluates the output; retries with feedback on FAIL, up to 5×
|
|
121
121
|
- **`self_healing: true`** — on script failure, Claude diagnoses and repairs the command, then re-runs it, up to 5×
|
|
122
|
-
- **`self_improve: true`** — after the workflow finishes, Claude analyzes execution highlights and saves an improved YAML to `tasks/backlog/`
|
|
123
122
|
|
|
124
123
|
## Interjection
|
|
125
124
|
|
|
@@ -148,19 +147,22 @@ press i → ▷ don't delete that file, use git revert▌ esc to cancel
|
|
|
148
147
|
| `logging-demo.yaml` | Log steps, self-healing, judge |
|
|
149
148
|
| `git-status-summary.yaml` | Real-world git workflow |
|
|
150
149
|
| `repeat-demo.yaml` | Running a step N times with `repeat` |
|
|
150
|
+
| `file-demo.yaml` | File operations |
|
|
151
|
+
| `from-step-test.yaml` | Using `--from-step` to resume mid-workflow |
|
|
151
152
|
|
|
152
153
|
See the [`examples/`](examples/) directory.
|
|
153
154
|
|
|
154
155
|
## CLI
|
|
155
156
|
|
|
156
157
|
```bash
|
|
157
|
-
executant plan "description"
|
|
158
|
-
executant plan -q "description"
|
|
159
|
-
executant workflow.yaml
|
|
160
|
-
executant
|
|
161
|
-
executant --
|
|
162
|
-
executant --
|
|
163
|
-
executant
|
|
158
|
+
executant plan "description" # generate a workflow YAML (auto-detects fast path)
|
|
159
|
+
executant plan -q "description" # skip research pass (fast path)
|
|
160
|
+
executant refine workflow.yaml "instructions" # refine an existing workflow YAML
|
|
161
|
+
executant workflow.yaml # run a workflow
|
|
162
|
+
executant --ci workflow.yaml # headless, NDJSON to stdout
|
|
163
|
+
executant --step <name|n> wf.yaml # run one step by name or index
|
|
164
|
+
executant --from-step <n> wf.yaml # resume from step n
|
|
165
|
+
executant update # upgrade to latest version
|
|
164
166
|
```
|
|
165
167
|
|
|
166
168
|
## Development
|
package/dist/index.js
CHANGED
|
@@ -52,8 +52,8 @@ var init_update = __esm({
|
|
|
52
52
|
// src/index.ts
|
|
53
53
|
import React3 from "react";
|
|
54
54
|
import { render } from "ink";
|
|
55
|
-
import { readFileSync as
|
|
56
|
-
import { dirname as
|
|
55
|
+
import { readFileSync as readFileSync6 } from "node:fs";
|
|
56
|
+
import { dirname as dirname4, join as join4 } from "node:path";
|
|
57
57
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
58
58
|
|
|
59
59
|
// src/load-workflow.ts
|
|
@@ -160,8 +160,7 @@ var RawStepSchema = z.lazy(
|
|
|
160
160
|
var RawWorkflowSchema = z.object({
|
|
161
161
|
goal: z.string(),
|
|
162
162
|
steps: z.array(RawStepSchema),
|
|
163
|
-
vars: z.record(z.string(), z.string()).optional()
|
|
164
|
-
self_improve: z.boolean().optional()
|
|
163
|
+
vars: z.record(z.string(), z.string()).optional()
|
|
165
164
|
});
|
|
166
165
|
function loadWorkflow(filePath2) {
|
|
167
166
|
let raw;
|
|
@@ -193,7 +192,6 @@ ${detail}`);
|
|
|
193
192
|
return {
|
|
194
193
|
goal: doc.goal,
|
|
195
194
|
vars,
|
|
196
|
-
selfImprove: doc.self_improve,
|
|
197
195
|
tasks: doc.steps.map((step) => convertStep(step, vars))
|
|
198
196
|
};
|
|
199
197
|
}
|
|
@@ -334,8 +332,8 @@ var AsyncQueue = class {
|
|
|
334
332
|
}
|
|
335
333
|
next() {
|
|
336
334
|
if (this.buf.length > 0) return Promise.resolve(this.buf.shift());
|
|
337
|
-
return new Promise((
|
|
338
|
-
this.waiter =
|
|
335
|
+
return new Promise((resolve3) => {
|
|
336
|
+
this.waiter = resolve3;
|
|
339
337
|
});
|
|
340
338
|
}
|
|
341
339
|
async *[Symbol.asyncIterator]() {
|
|
@@ -371,8 +369,8 @@ async function* mergeStreamsToLines(...streams) {
|
|
|
371
369
|
yield* q;
|
|
372
370
|
}
|
|
373
371
|
function waitForExit(proc) {
|
|
374
|
-
return new Promise((
|
|
375
|
-
proc.on("close", (code) =>
|
|
372
|
+
return new Promise((resolve3, reject) => {
|
|
373
|
+
proc.on("close", (code) => resolve3(code ?? 0));
|
|
376
374
|
proc.on("error", reject);
|
|
377
375
|
});
|
|
378
376
|
}
|
|
@@ -1620,8 +1618,7 @@ var TOTAL_PLAN_STAGES = 3;
|
|
|
1620
1618
|
var WorkflowSchema = z3.object({
|
|
1621
1619
|
goal: z3.string(),
|
|
1622
1620
|
steps: z3.array(RawStepSchema).min(1),
|
|
1623
|
-
vars: z3.record(z3.string()).optional()
|
|
1624
|
-
self_improve: z3.boolean().optional()
|
|
1621
|
+
vars: z3.record(z3.string()).optional()
|
|
1625
1622
|
});
|
|
1626
1623
|
var PlanJudgeOutputSchema = z3.object({
|
|
1627
1624
|
pass: z3.boolean(),
|
|
@@ -2328,13 +2325,7 @@ function PlanApp({ description, events: events2 }) {
|
|
|
2328
2325
|
}
|
|
2329
2326
|
|
|
2330
2327
|
// src/logger.ts
|
|
2331
|
-
import {
|
|
2332
|
-
appendFileSync,
|
|
2333
|
-
existsSync as existsSync3,
|
|
2334
|
-
mkdirSync as mkdirSync3,
|
|
2335
|
-
readdirSync,
|
|
2336
|
-
writeFileSync as writeFileSync3
|
|
2337
|
-
} from "node:fs";
|
|
2328
|
+
import { appendFileSync, existsSync as existsSync3, mkdirSync as mkdirSync3, writeFileSync as writeFileSync3 } from "node:fs";
|
|
2338
2329
|
import { dirname as dirname3, join as join3, resolve as resolve2 } from "node:path";
|
|
2339
2330
|
function findExecutantLocalDir(startDir) {
|
|
2340
2331
|
let dir = resolve2(startDir);
|
|
@@ -2355,22 +2346,13 @@ var INIT_STATE = {
|
|
|
2355
2346
|
logFile: "",
|
|
2356
2347
|
stepIndex: -1,
|
|
2357
2348
|
stepName: "",
|
|
2358
|
-
stepStartMs: 0
|
|
2359
|
-
toolCount: 0,
|
|
2360
|
-
complexSequenceFile: "",
|
|
2361
|
-
selfHealingFile: "",
|
|
2362
|
-
judgeAttempt: 0,
|
|
2363
|
-
recentOutput: []
|
|
2349
|
+
stepStartMs: 0
|
|
2364
2350
|
};
|
|
2365
2351
|
function appendLog(logFile, text) {
|
|
2366
2352
|
if (logFile) appendFileSync(logFile, text + "\n");
|
|
2367
2353
|
}
|
|
2368
|
-
function highlightPath(ctx, stepIndex, suffix) {
|
|
2369
|
-
return join3(ctx.highlightsDir, `${ctx.ts}_step${stepIndex + 1}_${suffix}.md`);
|
|
2370
|
-
}
|
|
2371
2354
|
function onWorkflowStart(ctx, s) {
|
|
2372
2355
|
mkdirSync3(ctx.logDir, { recursive: true });
|
|
2373
|
-
mkdirSync3(ctx.highlightsDir, { recursive: true });
|
|
2374
2356
|
const logFile = join3(ctx.logDir, `${ctx.ts}_${ctx.slug}.log`);
|
|
2375
2357
|
writeFileSync3(
|
|
2376
2358
|
logFile,
|
|
@@ -2402,20 +2384,6 @@ ${"\u2501".repeat(51)}
|
|
|
2402
2384
|
);
|
|
2403
2385
|
return next;
|
|
2404
2386
|
}
|
|
2405
|
-
function finalizeComplexSequence(s) {
|
|
2406
|
-
if (s.toolCount >= 3 && s.complexSequenceFile) {
|
|
2407
|
-
appendFileSync(
|
|
2408
|
-
s.complexSequenceFile,
|
|
2409
|
-
`
|
|
2410
|
-
---
|
|
2411
|
-
|
|
2412
|
-
*Total tools used: ${s.toolCount}*
|
|
2413
|
-
|
|
2414
|
-
*Captured by Executant Logger*
|
|
2415
|
-
`
|
|
2416
|
-
);
|
|
2417
|
-
}
|
|
2418
|
-
}
|
|
2419
2387
|
function onStepComplete(s) {
|
|
2420
2388
|
appendLog(
|
|
2421
2389
|
s.logFile,
|
|
@@ -2423,131 +2391,21 @@ function onStepComplete(s) {
|
|
|
2423
2391
|
Step completed in ${((Date.now() - s.stepStartMs) / 1e3).toFixed(1)}s
|
|
2424
2392
|
`
|
|
2425
2393
|
);
|
|
2426
|
-
finalizeComplexSequence(s);
|
|
2427
2394
|
return s;
|
|
2428
2395
|
}
|
|
2429
2396
|
function onStepError(s, error) {
|
|
2430
2397
|
appendLog(s.logFile, `
|
|
2431
2398
|
Step failed: ${error.message}
|
|
2432
2399
|
`);
|
|
2433
|
-
finalizeComplexSequence(s);
|
|
2434
2400
|
return s;
|
|
2435
2401
|
}
|
|
2436
|
-
function
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
"",
|
|
2440
|
-
`**Task:** ${ctx.slug}`,
|
|
2441
|
-
`**Step:** ${s.stepName}`,
|
|
2442
|
-
...extra,
|
|
2443
|
-
`**Timestamp:** ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
2444
|
-
"",
|
|
2445
|
-
"---",
|
|
2446
|
-
""
|
|
2447
|
-
].join("\n") + "\n";
|
|
2448
|
-
}
|
|
2449
|
-
function complexSequenceHeader(ctx, s) {
|
|
2450
|
-
return buildHighlightHeader(ctx, s, "Complex Tool Sequence") + "## Claude's Tool Orchestration\n\nClaude used multiple tools to complete this step:\n\n";
|
|
2451
|
-
}
|
|
2452
|
-
function createComplexSequenceFile(ctx, s) {
|
|
2453
|
-
const path = highlightPath(ctx, s.stepIndex, "complex_sequence");
|
|
2454
|
-
writeFileSync3(path, complexSequenceHeader(ctx, s));
|
|
2455
|
-
return path;
|
|
2456
|
-
}
|
|
2457
|
-
function onTool(ctx, s, tool, input) {
|
|
2458
|
-
const desc = getToolArg(tool, input);
|
|
2459
|
-
appendLog(s.logFile, ` [${tool}] ${desc}`);
|
|
2460
|
-
const toolCount = s.toolCount + 1;
|
|
2461
|
-
const complexSequenceFile = toolCount === 3 ? createComplexSequenceFile(ctx, s) : s.complexSequenceFile;
|
|
2462
|
-
if (toolCount >= 3 && complexSequenceFile) {
|
|
2463
|
-
appendFileSync(
|
|
2464
|
-
complexSequenceFile,
|
|
2465
|
-
`${toolCount}. **${tool}** - ${desc}
|
|
2466
|
-
`
|
|
2467
|
-
);
|
|
2468
|
-
}
|
|
2469
|
-
return { ...s, toolCount, complexSequenceFile };
|
|
2470
|
-
}
|
|
2471
|
-
function saveJudgeHighlight(ctx, s, verdict, text) {
|
|
2472
|
-
writeFileSync3(
|
|
2473
|
-
highlightPath(ctx, s.stepIndex, `judge_${verdict}`),
|
|
2474
|
-
buildHighlightHeader(ctx, s, `Judge Verdict: ${verdict}`, [
|
|
2475
|
-
`**Attempt:** ${s.judgeAttempt}`
|
|
2476
|
-
]) + [text, "", "---", "", "*Auto-captured*", ""].join("\n")
|
|
2477
|
-
);
|
|
2402
|
+
function onTool(s, tool, input) {
|
|
2403
|
+
appendLog(s.logFile, ` [${tool}] ${getToolArg(tool, input)}`);
|
|
2404
|
+
return s;
|
|
2478
2405
|
}
|
|
2479
|
-
|
|
2480
|
-
{
|
|
2481
|
-
pattern: /\[judge\]\s+(PASS|FAIL)/i,
|
|
2482
|
-
apply: (ctx, s, text, match) => {
|
|
2483
|
-
const verdict = match[1].toUpperCase();
|
|
2484
|
-
const judgeAttempt = s.judgeAttempt + 1;
|
|
2485
|
-
saveJudgeHighlight(ctx, { ...s, judgeAttempt }, verdict, text);
|
|
2486
|
-
return { ...s, judgeAttempt };
|
|
2487
|
-
}
|
|
2488
|
-
},
|
|
2489
|
-
{
|
|
2490
|
-
pattern: /\[self-healing\].*failed.*exit\s+(\d+)/i,
|
|
2491
|
-
apply: (ctx, s, _text, match) => {
|
|
2492
|
-
const selfHealingFile = highlightPath(ctx, s.stepIndex, "self_healing");
|
|
2493
|
-
writeFileSync3(
|
|
2494
|
-
selfHealingFile,
|
|
2495
|
-
buildHighlightHeader(ctx, s, "Self-Healing Activation") + [
|
|
2496
|
-
"## \u274C Failure Detected",
|
|
2497
|
-
"",
|
|
2498
|
-
`**Exit Code:** ${match[1]}`,
|
|
2499
|
-
"",
|
|
2500
|
-
"**Recent Output:**",
|
|
2501
|
-
"```",
|
|
2502
|
-
s.recentOutput.join("\n"),
|
|
2503
|
-
"```",
|
|
2504
|
-
"",
|
|
2505
|
-
"---",
|
|
2506
|
-
"",
|
|
2507
|
-
"## \u{1F527} Claude's Healing Process",
|
|
2508
|
-
""
|
|
2509
|
-
].join("\n")
|
|
2510
|
-
);
|
|
2511
|
-
return { ...s, selfHealingFile, recentOutput: [] };
|
|
2512
|
-
}
|
|
2513
|
-
},
|
|
2514
|
-
{
|
|
2515
|
-
pattern: /\[self-healing\].*Re-running/i,
|
|
2516
|
-
apply: (_ctx, s) => {
|
|
2517
|
-
if (!s.selfHealingFile) return s;
|
|
2518
|
-
appendFileSync(
|
|
2519
|
-
s.selfHealingFile,
|
|
2520
|
-
[
|
|
2521
|
-
"",
|
|
2522
|
-
"*(See full log for Claude's diagnostic process)*",
|
|
2523
|
-
"",
|
|
2524
|
-
"---",
|
|
2525
|
-
"",
|
|
2526
|
-
"## \u2705 Resolution Applied",
|
|
2527
|
-
"",
|
|
2528
|
-
"The self-healing process completed. Check the full execution log to see Claude's analysis and fix.",
|
|
2529
|
-
"",
|
|
2530
|
-
"---",
|
|
2531
|
-
"",
|
|
2532
|
-
"*Auto-captured*",
|
|
2533
|
-
""
|
|
2534
|
-
].join("\n")
|
|
2535
|
-
);
|
|
2536
|
-
return { ...s, selfHealingFile: "" };
|
|
2537
|
-
}
|
|
2538
|
-
}
|
|
2539
|
-
];
|
|
2540
|
-
function onLogMessage(ctx, s, level, text) {
|
|
2406
|
+
function onLogMessage(s, level, text) {
|
|
2541
2407
|
appendLog(s.logFile, `[${level}] ${text}`);
|
|
2542
|
-
|
|
2543
|
-
for (const { pattern, apply } of LOG_MATCHERS) {
|
|
2544
|
-
const m = pattern.exec(text);
|
|
2545
|
-
if (m) {
|
|
2546
|
-
state = apply(ctx, state, text, m);
|
|
2547
|
-
break;
|
|
2548
|
-
}
|
|
2549
|
-
}
|
|
2550
|
-
return state;
|
|
2408
|
+
return s;
|
|
2551
2409
|
}
|
|
2552
2410
|
function onWorkflowComplete(ctx, s) {
|
|
2553
2411
|
appendLog(
|
|
@@ -2559,37 +2417,8 @@ Finished: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
|
2559
2417
|
${"\u2501".repeat(51)}
|
|
2560
2418
|
`
|
|
2561
2419
|
);
|
|
2562
|
-
const indexFile = join3(ctx.highlightsDir, "README.md");
|
|
2563
|
-
if (!existsSync3(indexFile)) {
|
|
2564
|
-
writeFileSync3(
|
|
2565
|
-
indexFile,
|
|
2566
|
-
[
|
|
2567
|
-
"# Execution Highlights",
|
|
2568
|
-
"",
|
|
2569
|
-
"This directory contains automatically extracted highlight moments from task executions.",
|
|
2570
|
-
"",
|
|
2571
|
-
"## Latest Highlights",
|
|
2572
|
-
""
|
|
2573
|
-
].join("\n")
|
|
2574
|
-
);
|
|
2575
|
-
}
|
|
2576
|
-
const highlights = readdirSync(ctx.highlightsDir).filter((f) => f.startsWith(ctx.ts) && f.endsWith(".md")).sort();
|
|
2577
|
-
if (highlights.length > 0) {
|
|
2578
|
-
const entries = highlights.map((f) => `- [${f.replace(/\.md$/, "")}](./${f})`).join("\n");
|
|
2579
|
-
appendFileSync(
|
|
2580
|
-
indexFile,
|
|
2581
|
-
`
|
|
2582
|
-
### ${ctx.slug} (${(/* @__PURE__ */ new Date()).toISOString()})
|
|
2583
|
-
${entries}
|
|
2584
|
-
`
|
|
2585
|
-
);
|
|
2586
|
-
}
|
|
2587
2420
|
return s;
|
|
2588
2421
|
}
|
|
2589
|
-
function onOutputText(s, text) {
|
|
2590
|
-
appendLog(s.logFile, text);
|
|
2591
|
-
return { ...s, recentOutput: [...s.recentOutput, text] };
|
|
2592
|
-
}
|
|
2593
2422
|
function reduce(ctx, s, event) {
|
|
2594
2423
|
switch (event.type) {
|
|
2595
2424
|
case "workflow:start":
|
|
@@ -2614,11 +2443,12 @@ function reduce(ctx, s, event) {
|
|
|
2614
2443
|
);
|
|
2615
2444
|
return s;
|
|
2616
2445
|
case "output:text":
|
|
2617
|
-
|
|
2446
|
+
appendLog(s.logFile, event.text);
|
|
2447
|
+
return s;
|
|
2618
2448
|
case "output:tool":
|
|
2619
|
-
return onTool(
|
|
2449
|
+
return onTool(s, event.tool, event.input);
|
|
2620
2450
|
case "log":
|
|
2621
|
-
return onLogMessage(
|
|
2451
|
+
return onLogMessage(s, event.level, event.text);
|
|
2622
2452
|
case "workflow:complete":
|
|
2623
2453
|
return onWorkflowComplete(ctx, s);
|
|
2624
2454
|
default:
|
|
@@ -2628,15 +2458,12 @@ function reduce(ctx, s, event) {
|
|
|
2628
2458
|
function createLogger(logDir, taskName) {
|
|
2629
2459
|
const ctx = {
|
|
2630
2460
|
logDir,
|
|
2631
|
-
highlightsDir: join3(logDir, "highlights"),
|
|
2632
2461
|
ts: formatTimestamp(/* @__PURE__ */ new Date()),
|
|
2633
2462
|
slug: slugify(taskName, 40) || "task"
|
|
2634
2463
|
};
|
|
2635
2464
|
const enabled = process.env["EXECUTANT_LOG"] !== "0";
|
|
2636
2465
|
let state = INIT_STATE;
|
|
2637
2466
|
return {
|
|
2638
|
-
getHighlightsDir: () => ctx.highlightsDir,
|
|
2639
|
-
getTimestamp: () => ctx.ts,
|
|
2640
2467
|
observe(event) {
|
|
2641
2468
|
if (!enabled) return;
|
|
2642
2469
|
try {
|
|
@@ -2654,176 +2481,6 @@ async function* withLogger(gen, logger2) {
|
|
|
2654
2481
|
}
|
|
2655
2482
|
}
|
|
2656
2483
|
|
|
2657
|
-
// src/retrospective.ts
|
|
2658
|
-
import {
|
|
2659
|
-
existsSync as existsSync4,
|
|
2660
|
-
mkdirSync as mkdirSync4,
|
|
2661
|
-
readdirSync as readdirSync2,
|
|
2662
|
-
readFileSync as readFileSync6,
|
|
2663
|
-
writeFileSync as writeFileSync4
|
|
2664
|
-
} from "node:fs";
|
|
2665
|
-
import { basename as basename2, dirname as dirname4, join as join4, resolve as resolve3 } from "node:path";
|
|
2666
|
-
import { spawnSync } from "node:child_process";
|
|
2667
|
-
import { load as parseYaml2 } from "js-yaml";
|
|
2668
|
-
import { z as z4 } from "zod";
|
|
2669
|
-
var RetrospectiveOutputSchema = z4.object({
|
|
2670
|
-
improved_yaml: z4.string(),
|
|
2671
|
-
changelog: z4.string()
|
|
2672
|
-
});
|
|
2673
|
-
var RETROSPECTIVE_PROMPT = loadPrompt("retrospective-analysis");
|
|
2674
|
-
async function runRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
|
|
2675
|
-
try {
|
|
2676
|
-
await doRetrospective(
|
|
2677
|
-
workflowFilePath,
|
|
2678
|
-
workflow2,
|
|
2679
|
-
highlightsDir,
|
|
2680
|
-
runTimestamp
|
|
2681
|
-
);
|
|
2682
|
-
} catch (err) {
|
|
2683
|
-
console.warn(
|
|
2684
|
-
`
|
|
2685
|
-
Self-improvement: retrospective failed: ${getErrorMessage(err)}`
|
|
2686
|
-
);
|
|
2687
|
-
}
|
|
2688
|
-
}
|
|
2689
|
-
async function doRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
|
|
2690
|
-
if (!existsSync4(highlightsDir)) {
|
|
2691
|
-
console.log("\nSelf-improvement: no highlights directory found, skipping.");
|
|
2692
|
-
return;
|
|
2693
|
-
}
|
|
2694
|
-
const allFiles = readdirSync2(highlightsDir);
|
|
2695
|
-
const runHighlights = allFiles.filter((f) => f.startsWith(runTimestamp) && f.endsWith(".md")).sort();
|
|
2696
|
-
if (runHighlights.length === 0) {
|
|
2697
|
-
console.log(
|
|
2698
|
-
"\nSelf-improvement: no highlights for this run \u2014 task completed without issues, skipping."
|
|
2699
|
-
);
|
|
2700
|
-
return;
|
|
2701
|
-
}
|
|
2702
|
-
const divider = "\u2501".repeat(51);
|
|
2703
|
-
console.log(`
|
|
2704
|
-
${divider}`);
|
|
2705
|
-
console.log(
|
|
2706
|
-
"Self-Improvement: Analyzing execution and generating improvements..."
|
|
2707
|
-
);
|
|
2708
|
-
console.log(`${divider}
|
|
2709
|
-
`);
|
|
2710
|
-
console.log(`Found ${runHighlights.length} highlight(s) to analyze`);
|
|
2711
|
-
const countByPattern = (pat) => runHighlights.filter((f) => f.includes(pat)).length;
|
|
2712
|
-
const judgeFailures = countByPattern("_judge_FAIL");
|
|
2713
|
-
const selfHealingCount = countByPattern("_self_healing");
|
|
2714
|
-
const complexSequences = countByPattern("_complex_sequence");
|
|
2715
|
-
const metrics = [
|
|
2716
|
-
`- Judge Failures: ${judgeFailures}`,
|
|
2717
|
-
`- Self-Healing Activations: ${selfHealingCount}`,
|
|
2718
|
-
`- Complex Tool Sequences: ${complexSequences}`,
|
|
2719
|
-
`- Total Highlights: ${runHighlights.length}`
|
|
2720
|
-
].join("\n");
|
|
2721
|
-
console.log(`
|
|
2722
|
-
Execution Metrics:
|
|
2723
|
-
${metrics}
|
|
2724
|
-
`);
|
|
2725
|
-
console.log("Analyzing execution and generating improvements...\n");
|
|
2726
|
-
const highlightContents = runHighlights.map((f) => {
|
|
2727
|
-
const content = readFileSync6(join4(highlightsDir, f), "utf8");
|
|
2728
|
-
return `### ${f}
|
|
2729
|
-
|
|
2730
|
-
${content}`;
|
|
2731
|
-
}).join("\n\n---\n\n");
|
|
2732
|
-
const originalYaml = readFileSync6(workflowFilePath, "utf8");
|
|
2733
|
-
const taskName = basename2(workflowFilePath, ".yaml");
|
|
2734
|
-
const prompt = fillTemplate(RETROSPECTIVE_PROMPT, {
|
|
2735
|
-
TASK_NAME: taskName,
|
|
2736
|
-
ORIGINAL_GOAL: workflow2.goal,
|
|
2737
|
-
ORIGINAL_YAML: originalYaml,
|
|
2738
|
-
HIGHLIGHTS: highlightContents,
|
|
2739
|
-
METRICS: metrics
|
|
2740
|
-
});
|
|
2741
|
-
const result = spawnSync(
|
|
2742
|
-
"claude",
|
|
2743
|
-
[
|
|
2744
|
-
"-p",
|
|
2745
|
-
prompt,
|
|
2746
|
-
"--allowedTools",
|
|
2747
|
-
"Read",
|
|
2748
|
-
"--permission-mode",
|
|
2749
|
-
"bypassPermissions",
|
|
2750
|
-
"--output-format",
|
|
2751
|
-
"text"
|
|
2752
|
-
],
|
|
2753
|
-
{
|
|
2754
|
-
encoding: "utf8",
|
|
2755
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
2756
|
-
stdio: ["ignore", "pipe", "pipe"]
|
|
2757
|
-
}
|
|
2758
|
-
);
|
|
2759
|
-
if (result.error) {
|
|
2760
|
-
console.warn(
|
|
2761
|
-
`Self-improvement: failed to run claude: ${result.error.message}`
|
|
2762
|
-
);
|
|
2763
|
-
return;
|
|
2764
|
-
}
|
|
2765
|
-
if (result.status !== 0) {
|
|
2766
|
-
const stderr = result.stderr ?? "";
|
|
2767
|
-
console.warn(
|
|
2768
|
-
`Self-improvement: claude exited with code ${result.status}${stderr ? ": " + stderr : ""}`
|
|
2769
|
-
);
|
|
2770
|
-
return;
|
|
2771
|
-
}
|
|
2772
|
-
const response = result.stdout ?? "";
|
|
2773
|
-
let parsed;
|
|
2774
|
-
try {
|
|
2775
|
-
parsed = JSON.parse(extractJson(response));
|
|
2776
|
-
} catch {
|
|
2777
|
-
console.warn(
|
|
2778
|
-
`Self-improvement: could not parse Claude response as JSON.
|
|
2779
|
-
Response: ${response.trim()}`
|
|
2780
|
-
);
|
|
2781
|
-
return;
|
|
2782
|
-
}
|
|
2783
|
-
const zodResult = RetrospectiveOutputSchema.safeParse(parsed);
|
|
2784
|
-
if (!zodResult.success) {
|
|
2785
|
-
console.warn(
|
|
2786
|
-
"Self-improvement: response schema mismatch \u2014 improved YAML not saved."
|
|
2787
|
-
);
|
|
2788
|
-
return;
|
|
2789
|
-
}
|
|
2790
|
-
const improvedYaml = zodResult.data.improved_yaml.trim();
|
|
2791
|
-
const changelog = zodResult.data.changelog.trim() || "No changelog generated.";
|
|
2792
|
-
try {
|
|
2793
|
-
parseYaml2(improvedYaml);
|
|
2794
|
-
} catch (err) {
|
|
2795
|
-
console.warn(
|
|
2796
|
-
`Self-improvement: generated YAML is invalid (${getErrorMessage(err)}), skipping save.`
|
|
2797
|
-
);
|
|
2798
|
-
return;
|
|
2799
|
-
}
|
|
2800
|
-
const startDir = dirname4(resolve3(workflowFilePath));
|
|
2801
|
-
const executantLocal = findExecutantLocalDir(startDir);
|
|
2802
|
-
const backlogDir = executantLocal ? join4(executantLocal, "tasks", "backlog") : join4(startDir, "..", "backlog");
|
|
2803
|
-
mkdirSync4(backlogDir, { recursive: true });
|
|
2804
|
-
const ts = formatTimestamp(/* @__PURE__ */ new Date());
|
|
2805
|
-
const slug = slugify(taskName, 40);
|
|
2806
|
-
const improvedFile = join4(backlogDir, `${ts}-${slug}-improved.yaml`);
|
|
2807
|
-
const changelogFile = join4(backlogDir, `${ts}-${slug}-changelog.md`);
|
|
2808
|
-
writeFileSync4(improvedFile, improvedYaml + "\n", "utf8");
|
|
2809
|
-
writeFileSync4(changelogFile, changelog + "\n", "utf8");
|
|
2810
|
-
console.log(`\u2705 Improved task saved: ${improvedFile}`);
|
|
2811
|
-
console.log(`\u2705 Changelog saved: ${changelogFile}`);
|
|
2812
|
-
console.log(`
|
|
2813
|
-
${divider}`);
|
|
2814
|
-
console.log("Improvement Summary");
|
|
2815
|
-
console.log(`${divider}
|
|
2816
|
-
`);
|
|
2817
|
-
console.log(changelog);
|
|
2818
|
-
}
|
|
2819
|
-
function extractJson(text) {
|
|
2820
|
-
const start = text.indexOf("{");
|
|
2821
|
-
const end = text.lastIndexOf("}");
|
|
2822
|
-
if (start === -1 || end === -1 || end <= start)
|
|
2823
|
-
throw new Error("no JSON object found in response");
|
|
2824
|
-
return text.slice(start, end + 1);
|
|
2825
|
-
}
|
|
2826
|
-
|
|
2827
2484
|
// src/types.ts
|
|
2828
2485
|
var InterjectChannel = class {
|
|
2829
2486
|
_queue = [];
|
|
@@ -2841,8 +2498,8 @@ var InterjectChannel = class {
|
|
|
2841
2498
|
|
|
2842
2499
|
// src/index.ts
|
|
2843
2500
|
var CURRENT_VERSION = JSON.parse(
|
|
2844
|
-
|
|
2845
|
-
|
|
2501
|
+
readFileSync6(
|
|
2502
|
+
join4(dirname4(fileURLToPath2(import.meta.url)), "../package.json"),
|
|
2846
2503
|
"utf-8"
|
|
2847
2504
|
)
|
|
2848
2505
|
).version;
|
|
@@ -3020,36 +2677,17 @@ function errorReplacer(_key, value) {
|
|
|
3020
2677
|
}
|
|
3021
2678
|
return value;
|
|
3022
2679
|
}
|
|
3023
|
-
async function maybeRunRetrospective(filePath2, workflow2, logger2) {
|
|
3024
|
-
if (!logger2) return;
|
|
3025
|
-
try {
|
|
3026
|
-
await runRetrospective(
|
|
3027
|
-
filePath2,
|
|
3028
|
-
workflow2,
|
|
3029
|
-
logger2.getHighlightsDir(),
|
|
3030
|
-
logger2.getTimestamp()
|
|
3031
|
-
);
|
|
3032
|
-
} catch (err) {
|
|
3033
|
-
console.warn(
|
|
3034
|
-
"[executant] retrospective failed (non-fatal):",
|
|
3035
|
-
getErrorMessage(err)
|
|
3036
|
-
);
|
|
3037
|
-
}
|
|
3038
|
-
}
|
|
3039
2680
|
if (ciMode) {
|
|
3040
2681
|
(async () => {
|
|
3041
2682
|
for await (const event of events) {
|
|
3042
2683
|
process.stdout.write(JSON.stringify(event, errorReplacer) + "\n");
|
|
3043
2684
|
}
|
|
3044
|
-
if (workflow.selfImprove) {
|
|
3045
|
-
await maybeRunRetrospective(filePath, workflow, logger);
|
|
3046
|
-
}
|
|
3047
2685
|
})().catch((err) => {
|
|
3048
2686
|
console.error(err);
|
|
3049
2687
|
process.exit(1);
|
|
3050
2688
|
});
|
|
3051
2689
|
} else {
|
|
3052
|
-
|
|
2690
|
+
render(
|
|
3053
2691
|
React3.createElement(App, {
|
|
3054
2692
|
workflow,
|
|
3055
2693
|
events,
|
|
@@ -3058,8 +2696,4 @@ if (ciMode) {
|
|
|
3058
2696
|
interjectChannel: channel
|
|
3059
2697
|
})
|
|
3060
2698
|
);
|
|
3061
|
-
if (workflow.selfImprove) {
|
|
3062
|
-
inkApp.waitUntilExit().then(() => maybeRunRetrospective(filePath, workflow, logger)).catch(() => {
|
|
3063
|
-
});
|
|
3064
|
-
}
|
|
3065
2699
|
}
|
package/package.json
CHANGED
|
@@ -1,304 +0,0 @@
|
|
|
1
|
-
# ============================================================================
|
|
2
|
-
# RETROSPECTIVE ANALYSIS PROMPT
|
|
3
|
-
# ============================================================================
|
|
4
|
-
# Purpose: Analyzes task execution highlights and generates improved task YAML
|
|
5
|
-
# Used by: src/retrospective.ts runRetrospective()
|
|
6
|
-
# Triggered when: A task completes with self_improve: true and has highlights
|
|
7
|
-
#
|
|
8
|
-
# Placeholders:
|
|
9
|
-
# {{TASK_NAME}} - Name of the task that was executed
|
|
10
|
-
# {{ORIGINAL_GOAL}} - The original goal statement (must be preserved)
|
|
11
|
-
# {{ORIGINAL_YAML}} - Complete original task YAML for reference
|
|
12
|
-
# {{HIGHLIGHTS}} - Aggregated highlight markdown files from execution
|
|
13
|
-
# {{METRICS}} - Execution metrics summary (failures, retries, etc.)
|
|
14
|
-
# ============================================================================
|
|
15
|
-
|
|
16
|
-
You are analyzing the execution of an Executant task to identify improvement opportunities.
|
|
17
|
-
|
|
18
|
-
# Task Information
|
|
19
|
-
|
|
20
|
-
**Task Name:** {{TASK_NAME}}
|
|
21
|
-
|
|
22
|
-
**Original Goal:** {{ORIGINAL_GOAL}}
|
|
23
|
-
|
|
24
|
-
# Execution Metrics
|
|
25
|
-
|
|
26
|
-
{{METRICS}}
|
|
27
|
-
|
|
28
|
-
# Execution Highlights
|
|
29
|
-
|
|
30
|
-
The following highlights were captured during execution. Each highlight represents a moment where the system encountered challenges:
|
|
31
|
-
|
|
32
|
-
{{HIGHLIGHTS}}
|
|
33
|
-
|
|
34
|
-
# Original Task YAML
|
|
35
|
-
|
|
36
|
-
```yaml
|
|
37
|
-
{{ORIGINAL_YAML}}
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
# Your Task
|
|
41
|
-
|
|
42
|
-
Analyze the execution highlights and generate an improved version of the task YAML that addresses the problems encountered during execution.
|
|
43
|
-
|
|
44
|
-
## Analysis Guidelines
|
|
45
|
-
|
|
46
|
-
### Interpreting Judge Failures (llm_as_judge: true)
|
|
47
|
-
|
|
48
|
-
Judge failures indicate that Claude's output didn't meet quality standards. Common causes:
|
|
49
|
-
|
|
50
|
-
**Unclear prompts** - The step instructions were too vague
|
|
51
|
-
- Fix: Add specific numbered sub-steps
|
|
52
|
-
- Fix: Define clear success criteria
|
|
53
|
-
- Fix: Specify what to check and how to verify it
|
|
54
|
-
|
|
55
|
-
**Missing criteria** - The prompt didn't explain what "good" looks like
|
|
56
|
-
- Fix: Add examples of expected output
|
|
57
|
-
- Fix: Specify quality thresholds (test coverage %, file count, etc.)
|
|
58
|
-
- Fix: Include validation steps
|
|
59
|
-
|
|
60
|
-
**Steps too large** - One step tried to do too much
|
|
61
|
-
- Fix: Break into smaller, focused steps
|
|
62
|
-
- Fix: Each step should have one clear objective
|
|
63
|
-
|
|
64
|
-
**Example Fix:**
|
|
65
|
-
```
|
|
66
|
-
BEFORE:
|
|
67
|
-
- name: "validate results"
|
|
68
|
-
llm_as_judge: true
|
|
69
|
-
prompt: "Validate the conversion results"
|
|
70
|
-
|
|
71
|
-
AFTER:
|
|
72
|
-
- name: "validate results"
|
|
73
|
-
llm_as_judge: true
|
|
74
|
-
prompt: |
|
|
75
|
-
Validate the TypeScript conversion by checking:
|
|
76
|
-
1. Read the generated .ts file
|
|
77
|
-
2. Verify all functions have type annotations
|
|
78
|
-
3. Check that tests pass (npm test)
|
|
79
|
-
4. Confirm no compilation errors (tsc --noEmit)
|
|
80
|
-
|
|
81
|
-
Success criteria: All 4 checks pass without errors.
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
### Interpreting Self-Healing Events (self_healing: true)
|
|
85
|
-
|
|
86
|
-
Self-healing activations indicate brittle script steps that failed during execution. Common causes:
|
|
87
|
-
|
|
88
|
-
**Missing dependencies** - Command not found, package not installed
|
|
89
|
-
- Fix: Add a script step to install/check dependencies first
|
|
90
|
-
- Fix: Use explicit paths instead of assuming commands are in PATH
|
|
91
|
-
|
|
92
|
-
**Wrong assumptions** - Script assumed files/directories exist
|
|
93
|
-
- Fix: Add checks or create directories in the script
|
|
94
|
-
- Fix: Use `mkdir -p` instead of `mkdir`
|
|
95
|
-
- Fix: Check file existence before operating on it
|
|
96
|
-
|
|
97
|
-
**Environment issues** - PWD, env vars, or paths incorrect
|
|
98
|
-
- Fix: Use absolute paths instead of relative
|
|
99
|
-
- Fix: cd to correct directory in the script
|
|
100
|
-
- Fix: Set required environment variables
|
|
101
|
-
|
|
102
|
-
**Race conditions** - Script ran before previous step completed
|
|
103
|
-
- Fix: Add wait/check logic
|
|
104
|
-
- Fix: Combine dependent commands with && in one script step
|
|
105
|
-
|
|
106
|
-
**Example Fix:**
|
|
107
|
-
```
|
|
108
|
-
BEFORE:
|
|
109
|
-
- name: "run tests"
|
|
110
|
-
type: script
|
|
111
|
-
self_healing: true
|
|
112
|
-
command: npm test
|
|
113
|
-
|
|
114
|
-
AFTER:
|
|
115
|
-
- name: "install dependencies"
|
|
116
|
-
type: script
|
|
117
|
-
command: npm install
|
|
118
|
-
|
|
119
|
-
- name: "run tests"
|
|
120
|
-
type: script
|
|
121
|
-
self_healing: true
|
|
122
|
-
command: npm test
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Interpreting Complex Tool Sequences
|
|
126
|
-
|
|
127
|
-
Complex tool sequences (3+ tools) indicate that Claude had to work hard to complete a step. Common causes:
|
|
128
|
-
|
|
129
|
-
**Vague instructions** - Step didn't specify what files to operate on
|
|
130
|
-
- Fix: List specific file paths to read/edit
|
|
131
|
-
- Fix: Specify glob patterns for file discovery
|
|
132
|
-
- Fix: Break discovery and operation into separate steps
|
|
133
|
-
|
|
134
|
-
**Exploratory work needed** - Claude had to search to understand the codebase
|
|
135
|
-
- Fix: Add a separate discovery/analysis step first
|
|
136
|
-
- Fix: Provide file paths in the prompt
|
|
137
|
-
- Fix: Include relevant code snippets in the prompt
|
|
138
|
-
|
|
139
|
-
**Multi-phase operations** - One step tried to do research + implementation
|
|
140
|
-
- Fix: Split into "research" step and "implementation" step
|
|
141
|
-
- Fix: First step outputs findings, second step acts on them
|
|
142
|
-
|
|
143
|
-
**Example Fix:**
|
|
144
|
-
```
|
|
145
|
-
BEFORE:
|
|
146
|
-
- name: "update imports"
|
|
147
|
-
prompt: "Update all imports to use the new module structure"
|
|
148
|
-
|
|
149
|
-
AFTER:
|
|
150
|
-
- name: "analyze imports"
|
|
151
|
-
prompt: |
|
|
152
|
-
Search the codebase for all import statements:
|
|
153
|
-
1. Use grep to find all imports in src/
|
|
154
|
-
2. List files that import from old modules
|
|
155
|
-
3. Create a plan for updating each file
|
|
156
|
-
|
|
157
|
-
- name: "update imports"
|
|
158
|
-
prompt: |
|
|
159
|
-
Update imports in the following files based on the analysis:
|
|
160
|
-
- src/components/Button.tsx
|
|
161
|
-
- src/utils/helpers.ts
|
|
162
|
-
- src/services/api.ts
|
|
163
|
-
|
|
164
|
-
Change: import from './old/' to import from '@/new/'
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
## Improvement Principles
|
|
168
|
-
|
|
169
|
-
1. **Preserve the original goal** - The task succeeded, so the goal is correct
|
|
170
|
-
2. **Fix problems shown in highlights** - Only address issues that actually occurred
|
|
171
|
-
3. **Be specific** - Add numbered steps, file paths, and clear criteria
|
|
172
|
-
4. **Break down large steps** - If a step caused many retries or complex tool sequences
|
|
173
|
-
5. **Add prerequisite steps** - If self-healing had to install deps or create files
|
|
174
|
-
6. **Keep self_improve: true** - Allow recursive improvement in future runs
|
|
175
|
-
7. **Document changes** - Explain what you changed and why in the changelog
|
|
176
|
-
|
|
177
|
-
## Improvement Patterns
|
|
178
|
-
|
|
179
|
-
### Pattern: Split Vague Prompt into Specific Sub-Steps
|
|
180
|
-
|
|
181
|
-
When a judge fails or complex tools are needed, make the prompt more specific:
|
|
182
|
-
|
|
183
|
-
```yaml
|
|
184
|
-
# BEFORE: Vague, requires exploration
|
|
185
|
-
- name: "refactor authentication"
|
|
186
|
-
llm_as_judge: true
|
|
187
|
-
prompt: "Refactor the authentication code"
|
|
188
|
-
|
|
189
|
-
# AFTER: Specific numbered steps
|
|
190
|
-
- name: "refactor authentication"
|
|
191
|
-
llm_as_judge: true
|
|
192
|
-
prompt: |
|
|
193
|
-
Refactor authentication by:
|
|
194
|
-
1. Reading src/auth/login.ts and src/auth/session.ts
|
|
195
|
-
2. Extracting common logic into src/auth/helpers.ts
|
|
196
|
-
3. Updating imports in both files
|
|
197
|
-
4. Running tests to verify: npm test src/auth/
|
|
198
|
-
|
|
199
|
-
Success: Tests pass, no code duplication between login.ts and session.ts
|
|
200
|
-
```
|
|
201
|
-
|
|
202
|
-
### Pattern: Add Prerequisite Step
|
|
203
|
-
|
|
204
|
-
When self-healing installs deps or fixes environment:
|
|
205
|
-
|
|
206
|
-
```yaml
|
|
207
|
-
# BEFORE: Brittle, assumes deps installed
|
|
208
|
-
steps:
|
|
209
|
-
- name: "build"
|
|
210
|
-
type: script
|
|
211
|
-
self_healing: true
|
|
212
|
-
command: npm run build
|
|
213
|
-
|
|
214
|
-
# AFTER: Explicit dependency step
|
|
215
|
-
steps:
|
|
216
|
-
- name: "install dependencies"
|
|
217
|
-
type: script
|
|
218
|
-
command: npm install
|
|
219
|
-
|
|
220
|
-
- name: "build"
|
|
221
|
-
type: script
|
|
222
|
-
command: npm run build
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
### Pattern: Split Research from Implementation
|
|
226
|
-
|
|
227
|
-
When complex tool sequences suggest exploratory work:
|
|
228
|
-
|
|
229
|
-
```yaml
|
|
230
|
-
# BEFORE: Combined research + work
|
|
231
|
-
- name: "fix bugs"
|
|
232
|
-
prompt: "Find and fix all bugs in the payment flow"
|
|
233
|
-
|
|
234
|
-
# AFTER: Separated discovery and fixing
|
|
235
|
-
- name: "identify payment bugs"
|
|
236
|
-
prompt: |
|
|
237
|
-
Analyze the payment flow for bugs:
|
|
238
|
-
1. Read src/payment/*.ts files
|
|
239
|
-
2. Check for error handling gaps
|
|
240
|
-
3. List files that need fixes
|
|
241
|
-
|
|
242
|
-
- name: "fix payment bugs"
|
|
243
|
-
llm_as_judge: true
|
|
244
|
-
prompt: |
|
|
245
|
-
Fix bugs identified in previous step:
|
|
246
|
-
- Add error handling in src/payment/checkout.ts
|
|
247
|
-
- Validate input in src/payment/process.ts
|
|
248
|
-
- Update tests in src/payment/__tests__/
|
|
249
|
-
|
|
250
|
-
Success: All payment tests pass
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
### Pattern: Add Explicit Success Criteria
|
|
254
|
-
|
|
255
|
-
When judge fails due to unclear expectations:
|
|
256
|
-
|
|
257
|
-
```yaml
|
|
258
|
-
# BEFORE: No clear success criteria
|
|
259
|
-
- name: "improve test coverage"
|
|
260
|
-
llm_as_judge: true
|
|
261
|
-
prompt: "Improve test coverage for the API module"
|
|
262
|
-
|
|
263
|
-
# AFTER: Explicit threshold and verification
|
|
264
|
-
- name: "improve test coverage"
|
|
265
|
-
llm_as_judge: true
|
|
266
|
-
prompt: |
|
|
267
|
-
Improve test coverage for src/api/ to at least 80%:
|
|
268
|
-
1. Run: npm test -- --coverage src/api/
|
|
269
|
-
2. Identify files with <80% coverage
|
|
270
|
-
3. Write tests for uncovered code paths
|
|
271
|
-
4. Re-run coverage and verify ≥80%
|
|
272
|
-
|
|
273
|
-
Success criteria: Coverage report shows ≥80% for all files in src/api/
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
# Output Format
|
|
277
|
-
|
|
278
|
-
Respond with a single JSON object:
|
|
279
|
-
{
|
|
280
|
-
"improved_yaml": "<complete improved task YAML — no markdown fences, raw YAML only>",
|
|
281
|
-
"changelog": "<markdown: Problems Identified / Changes Applied / Expected Impact>"
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
Output only the JSON object — no prose before or after.
|
|
285
|
-
|
|
286
|
-
# Important Requirements
|
|
287
|
-
|
|
288
|
-
1. **Always preserve the original goal** - Do not change the goal statement
|
|
289
|
-
2. **Keep self_improve: true** - This enables recursive improvement
|
|
290
|
-
3. **Only fix problems shown in highlights** - Don't add unnecessary changes
|
|
291
|
-
4. **Be specific in improvements** - Vague fixes won't help
|
|
292
|
-
5. **Generate valid YAML** - The improved task must be parseable
|
|
293
|
-
6. **Explain all changes** - The changelog should justify each modification
|
|
294
|
-
|
|
295
|
-
# Example Response
|
|
296
|
-
|
|
297
|
-
```json
|
|
298
|
-
{
|
|
299
|
-
"improved_yaml": "goal: \"Convert CoffeeScript to TypeScript with validation\"\nself_improve: true\n\nsteps:\n - name: \"install dependencies\"\n type: script\n command: npm install\n\n - name: \"convert to TypeScript\"\n type: script\n command: coffee2ts convert app.coffee\n\n - name: \"validate conversion\"\n llm_as_judge: true\n prompt: |\n Validate the TypeScript conversion by:\n 1. Reading app.ts and checking all functions have type annotations\n 2. Running: tsc --noEmit to check for type errors\n 3. Running: npm test to verify functionality\n\n Success criteria: No type errors, all tests pass",
|
|
300
|
-
"changelog": "## Problems Identified\n- Judge failure in \"validate conversion\": Instructions were too vague\n- Self-healing activation: npm dependencies were missing\n\n## Changes Applied\n\n### Step 1: install dependencies (NEW)\n- Before: Not present\n- After: Added explicit npm install step\n- Rationale: Self-healing had to install deps, do it upfront\n\n### Step 3: validate conversion (MODIFIED)\n- Before: \"Validate the results\"\n- After: Specific 3-step validation with success criteria\n- Rationale: Judge failed because unclear what to validate and how\n\n## Expected Impact\n- Judge retries: 1 → 0 (clearer validation steps)\n- Self-healing activations: 1 → 0 (deps installed first)"
|
|
301
|
-
}
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
Now analyze the highlights and generate the improved task YAML with detailed changelog.
|