executant 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/index.js +444 -199
- package/dist/prompts/plan-decompose.txt +40 -0
- package/dist/prompts/plan-judge.txt +3 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -87,6 +87,24 @@ steps:
|
|
|
87
87
|
command: npx eslint src/{{item}}
|
|
88
88
|
```
|
|
89
89
|
|
|
90
|
+
Use `steps:` inside a `forEach` or `repeat` to run **multiple child steps per iteration**:
|
|
91
|
+
|
|
92
|
+
```yaml
|
|
93
|
+
steps:
|
|
94
|
+
- name: verify each package
|
|
95
|
+
forEach: [packages/api, packages/web, packages/shared]
|
|
96
|
+
steps:
|
|
97
|
+
- name: lint {{item}}
|
|
98
|
+
type: script
|
|
99
|
+
command: npm run lint --workspace={{item}}
|
|
100
|
+
- name: test {{item}}
|
|
101
|
+
type: script
|
|
102
|
+
command: npm test --workspace={{item}}
|
|
103
|
+
- name: build {{item}}
|
|
104
|
+
type: script
|
|
105
|
+
command: npm run build --workspace={{item}}
|
|
106
|
+
```
|
|
107
|
+
|
|
90
108
|
Use `repeat: N` as shorthand when there is no meaningful list — just a count. `{{item}}` is the 1-based iteration number:
|
|
91
109
|
|
|
92
110
|
```yaml
|
|
@@ -110,6 +128,7 @@ steps:
|
|
|
110
128
|
| `hello-world.yaml` | Simple prompt steps |
|
|
111
129
|
| `mixed-workflow.yaml` | Script + prompt steps together |
|
|
112
130
|
| `foreach-demo.yaml` | Inline lists and shell command iteration |
|
|
131
|
+
| `nested-steps-demo.yaml` | Multiple child steps per forEach / repeat iteration |
|
|
113
132
|
| `vars-demo.yaml` | Variable substitution |
|
|
114
133
|
| `judge-demo.yaml` | LLM-as-judge retry loop |
|
|
115
134
|
| `logging-demo.yaml` | Log steps, self-healing, judge |
|
package/dist/index.js
CHANGED
|
@@ -113,22 +113,25 @@ function timestamp() {
|
|
|
113
113
|
|
|
114
114
|
// src/load-workflow.ts
|
|
115
115
|
import { z } from "zod";
|
|
116
|
-
var RawStepSchema = z.
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
116
|
+
var RawStepSchema = z.lazy(
|
|
117
|
+
() => z.object({
|
|
118
|
+
name: z.string(),
|
|
119
|
+
type: z.enum(["prompt", "script", "log", "command"]).optional(),
|
|
120
|
+
prompt: z.string().optional(),
|
|
121
|
+
command: z.string().optional(),
|
|
122
|
+
message: z.string().optional(),
|
|
123
|
+
continue_on_error: z.boolean().optional(),
|
|
124
|
+
self_healing: z.boolean().optional(),
|
|
125
|
+
max_healing_attempts: z.number().int().positive().optional(),
|
|
126
|
+
output: z.string().optional(),
|
|
127
|
+
llm_as_judge: z.boolean().optional(),
|
|
128
|
+
allowed_tools: z.array(z.string()).optional(),
|
|
129
|
+
forEach: z.union([z.array(z.string()), z.string()]).optional(),
|
|
130
|
+
repeat: z.number().int().positive().optional(),
|
|
131
|
+
context: z.array(z.string()).optional(),
|
|
132
|
+
steps: z.array(RawStepSchema).min(1).optional()
|
|
133
|
+
})
|
|
134
|
+
);
|
|
132
135
|
var RawWorkflowSchema = z.object({
|
|
133
136
|
goal: z.string(),
|
|
134
137
|
steps: z.array(RawStepSchema),
|
|
@@ -140,7 +143,9 @@ function loadWorkflow(filePath2) {
|
|
|
140
143
|
try {
|
|
141
144
|
raw = readFileSync2(filePath2, "utf8");
|
|
142
145
|
} catch (err) {
|
|
143
|
-
throw new Error(
|
|
146
|
+
throw new Error(
|
|
147
|
+
`Cannot read workflow file "${filePath2}": ${getErrorMessage(err)}`
|
|
148
|
+
);
|
|
144
149
|
}
|
|
145
150
|
let doc;
|
|
146
151
|
try {
|
|
@@ -164,16 +169,30 @@ function convertStep(step, vars) {
|
|
|
164
169
|
throw new Error(`Step "${name}" cannot have both repeat and forEach`);
|
|
165
170
|
}
|
|
166
171
|
if (step.repeat !== void 0 || step.forEach !== void 0) {
|
|
172
|
+
if (step.steps && (step.command || step.prompt || step.message)) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`Step "${name}" cannot have both steps and command/prompt/message`
|
|
175
|
+
);
|
|
176
|
+
}
|
|
167
177
|
const forEachValue = step.repeat !== void 0 ? Array.from({ length: step.repeat }, (_, i) => String(i + 1)) : step.forEach;
|
|
168
|
-
const
|
|
178
|
+
const stepWithoutLoop = {
|
|
179
|
+
...step,
|
|
180
|
+
repeat: void 0,
|
|
181
|
+
forEach: void 0,
|
|
182
|
+
steps: void 0
|
|
183
|
+
};
|
|
184
|
+
const inner = step.steps ? step.steps.map((s) => convertStep(s, vars)) : [convertInnerStep(stepWithoutLoop, vars, name, continueOnError)];
|
|
169
185
|
return {
|
|
170
186
|
type: "forEach",
|
|
171
187
|
name,
|
|
172
188
|
continueOnError,
|
|
173
189
|
forEach: forEachValue,
|
|
174
|
-
inner
|
|
190
|
+
inner
|
|
175
191
|
};
|
|
176
192
|
}
|
|
193
|
+
if (step.steps) {
|
|
194
|
+
throw new Error(`Step "${name}" has steps but no forEach or repeat`);
|
|
195
|
+
}
|
|
177
196
|
return convertInnerStep(step, vars, name, continueOnError);
|
|
178
197
|
}
|
|
179
198
|
function convertInnerStep(step, vars, name, continueOnError) {
|
|
@@ -181,7 +200,8 @@ function convertInnerStep(step, vars, name, continueOnError) {
|
|
|
181
200
|
switch (effectiveType) {
|
|
182
201
|
case "script":
|
|
183
202
|
case "command": {
|
|
184
|
-
if (!step.command)
|
|
203
|
+
if (!step.command)
|
|
204
|
+
throw new Error(`Step "${name}" has type script but no command`);
|
|
185
205
|
return {
|
|
186
206
|
type: "command",
|
|
187
207
|
name,
|
|
@@ -189,7 +209,9 @@ function convertInnerStep(step, vars, name, continueOnError) {
|
|
|
189
209
|
continueOnError,
|
|
190
210
|
selfHealing: step.self_healing === true,
|
|
191
211
|
maxHealingAttempts: step.max_healing_attempts,
|
|
192
|
-
...step.output && {
|
|
212
|
+
...step.output && {
|
|
213
|
+
output: resolveOutputFile(step.output, vars, name)
|
|
214
|
+
}
|
|
193
215
|
};
|
|
194
216
|
}
|
|
195
217
|
case "log": {
|
|
@@ -202,7 +224,8 @@ function convertInnerStep(step, vars, name, continueOnError) {
|
|
|
202
224
|
};
|
|
203
225
|
}
|
|
204
226
|
case "prompt": {
|
|
205
|
-
if (!step.prompt)
|
|
227
|
+
if (!step.prompt)
|
|
228
|
+
throw new Error(`Step "${name}" has type prompt but no prompt field`);
|
|
206
229
|
const contextFiles = resolveContextFiles(step.context, vars, name);
|
|
207
230
|
return {
|
|
208
231
|
type: "claude",
|
|
@@ -225,13 +248,17 @@ function inferType(step) {
|
|
|
225
248
|
}
|
|
226
249
|
function resolveVarPath(varName, vars, stepName, label) {
|
|
227
250
|
if (!(varName in vars)) {
|
|
228
|
-
throw new Error(
|
|
251
|
+
throw new Error(
|
|
252
|
+
`Step "${stepName}" ${label} references undefined var "${varName}" \u2014 add it to the vars section`
|
|
253
|
+
);
|
|
229
254
|
}
|
|
230
255
|
return vars[varName];
|
|
231
256
|
}
|
|
232
257
|
function resolveContextFiles(contextVarNames, vars, stepName) {
|
|
233
258
|
if (!contextVarNames || contextVarNames.length === 0) return [];
|
|
234
|
-
return contextVarNames.map(
|
|
259
|
+
return contextVarNames.map(
|
|
260
|
+
(varName) => resolveVarPath(varName, vars, stepName, "context")
|
|
261
|
+
);
|
|
235
262
|
}
|
|
236
263
|
function resolveOutputFile(varName, vars, stepName) {
|
|
237
264
|
return resolveVarPath(varName, vars, stepName, "output");
|
|
@@ -507,7 +534,7 @@ async function* runWorkflow(workflow2, options2 = {}) {
|
|
|
507
534
|
yield { type: "step:start", index: i, name: task.name };
|
|
508
535
|
try {
|
|
509
536
|
for await (const event of runStep(task)) {
|
|
510
|
-
if (event.type === "step:iteration" || event.type === "output:text" || event.type === "output:tool") {
|
|
537
|
+
if (event.type === "step:iteration" || event.type === "step:inner" || event.type === "output:text" || event.type === "output:tool") {
|
|
511
538
|
yield { ...event, index: i };
|
|
512
539
|
} else {
|
|
513
540
|
yield event;
|
|
@@ -568,20 +595,55 @@ async function* runLog(task) {
|
|
|
568
595
|
async function* runForEach(task) {
|
|
569
596
|
const items = await resolveItems(task.forEach);
|
|
570
597
|
const total = items.length;
|
|
598
|
+
const innerTotal = task.inner.length;
|
|
571
599
|
for (const [i, item] of items.entries()) {
|
|
572
600
|
yield { type: "step:iteration", index: -1, item, iteration: i + 1, total };
|
|
573
|
-
const
|
|
574
|
-
|
|
601
|
+
for (const [j, innerTask] of task.inner.entries()) {
|
|
602
|
+
const substituted = substituteItem(innerTask, item);
|
|
603
|
+
if (innerTotal > 1) {
|
|
604
|
+
yield {
|
|
605
|
+
type: "step:inner",
|
|
606
|
+
index: -1,
|
|
607
|
+
iteration: i + 1,
|
|
608
|
+
innerIndex: j,
|
|
609
|
+
innerTotal,
|
|
610
|
+
name: substituted.name
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
try {
|
|
614
|
+
yield* runStep(substituted);
|
|
615
|
+
} catch (err) {
|
|
616
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
617
|
+
if (!substituted.continueOnError) {
|
|
618
|
+
yield {
|
|
619
|
+
type: "log",
|
|
620
|
+
level: "warn",
|
|
621
|
+
text: `[forEach] Step "${substituted.name}" failed \u2014 aborting remaining children and iterations`
|
|
622
|
+
};
|
|
623
|
+
throw error;
|
|
624
|
+
}
|
|
625
|
+
yield {
|
|
626
|
+
type: "log",
|
|
627
|
+
level: "warn",
|
|
628
|
+
text: `[forEach] Step "${substituted.name}" failed (continuing): ${error.message}`
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
}
|
|
575
632
|
}
|
|
576
633
|
}
|
|
577
634
|
async function resolveItems(forEach) {
|
|
578
635
|
if (Array.isArray(forEach)) return forEach.filter(Boolean);
|
|
579
636
|
try {
|
|
580
|
-
const { stdout } = await execPromise(forEach, {
|
|
637
|
+
const { stdout } = await execPromise(forEach, {
|
|
638
|
+
shell: "/bin/sh",
|
|
639
|
+
timeout: 3e4
|
|
640
|
+
});
|
|
581
641
|
return stdout.split("\n").filter((l) => l.trim().length > 0);
|
|
582
642
|
} catch (err) {
|
|
583
|
-
throw new Error(
|
|
584
|
-
|
|
643
|
+
throw new Error(
|
|
644
|
+
`forEach shell command failed: ${getErrorMessage(err)}
|
|
645
|
+
Command: ${forEach}`
|
|
646
|
+
);
|
|
585
647
|
}
|
|
586
648
|
}
|
|
587
649
|
function substituteItem(task, item) {
|
|
@@ -590,12 +652,24 @@ function substituteItem(task, item) {
|
|
|
590
652
|
case "command":
|
|
591
653
|
return { ...task, name: sub(task.name), command: sub(task.command) };
|
|
592
654
|
case "claude":
|
|
593
|
-
return {
|
|
655
|
+
return {
|
|
656
|
+
...task,
|
|
657
|
+
name: sub(task.name),
|
|
658
|
+
prompt: sub(task.prompt),
|
|
659
|
+
allowedTools: task.allowedTools?.map(sub)
|
|
660
|
+
};
|
|
594
661
|
case "log":
|
|
595
662
|
return { ...task, name: sub(task.name), message: sub(task.message) };
|
|
663
|
+
case "forEach":
|
|
664
|
+
return {
|
|
665
|
+
...task,
|
|
666
|
+
name: sub(task.name),
|
|
667
|
+
forEach: Array.isArray(task.forEach) ? task.forEach : sub(task.forEach),
|
|
668
|
+
inner: task.inner.map((t) => substituteItem(t, item))
|
|
669
|
+
};
|
|
596
670
|
default: {
|
|
597
671
|
const _ = task;
|
|
598
|
-
throw new Error(`Unknown
|
|
672
|
+
throw new Error(`Unknown task type: ${JSON.stringify(_)}`);
|
|
599
673
|
}
|
|
600
674
|
}
|
|
601
675
|
}
|
|
@@ -607,7 +681,11 @@ async function* runCommandWithHealing(task) {
|
|
|
607
681
|
try {
|
|
608
682
|
yield* collectLines(runCommand(task), lines);
|
|
609
683
|
if (attempt > 0) {
|
|
610
|
-
yield {
|
|
684
|
+
yield {
|
|
685
|
+
type: "log",
|
|
686
|
+
level: "info",
|
|
687
|
+
text: `[self-healing] Command passed after ${attempt + 1} attempts`
|
|
688
|
+
};
|
|
611
689
|
}
|
|
612
690
|
return;
|
|
613
691
|
} catch (err) {
|
|
@@ -615,7 +693,11 @@ async function* runCommandWithHealing(task) {
|
|
|
615
693
|
const output = lines.join("\n");
|
|
616
694
|
const remaining = maxAttempts - attempt - 1;
|
|
617
695
|
if (remaining === 0) {
|
|
618
|
-
yield {
|
|
696
|
+
yield {
|
|
697
|
+
type: "log",
|
|
698
|
+
level: "warn",
|
|
699
|
+
text: `[self-healing] Exhausted ${maxAttempts} attempts`
|
|
700
|
+
};
|
|
619
701
|
throw new Error(
|
|
620
702
|
`Step "${task.name}" failed after ${maxAttempts} self-healing attempts (last exit code: ${exitCode})`
|
|
621
703
|
);
|
|
@@ -626,7 +708,12 @@ async function* runCommandWithHealing(task) {
|
|
|
626
708
|
text: `[self-healing] Attempt ${attempt + 1}/${maxAttempts} failed (exit ${exitCode}), invoking Claude to fix\u2026`
|
|
627
709
|
};
|
|
628
710
|
const historyBlock = buildAttemptHistory(attemptHistory);
|
|
629
|
-
const healPrompt = buildHealingPrompt(
|
|
711
|
+
const healPrompt = buildHealingPrompt(
|
|
712
|
+
task.command,
|
|
713
|
+
exitCode,
|
|
714
|
+
output,
|
|
715
|
+
historyBlock
|
|
716
|
+
);
|
|
630
717
|
const healTask = {
|
|
631
718
|
type: "claude",
|
|
632
719
|
name: `${task.name}:heal-${attempt + 1}`,
|
|
@@ -637,7 +724,8 @@ async function* runCommandWithHealing(task) {
|
|
|
637
724
|
const claudeLines = [];
|
|
638
725
|
for await (const event of runClaude(healTask)) {
|
|
639
726
|
if (event.type === "output:text") claudeLines.push(event.text);
|
|
640
|
-
else if (event.type === "output:tool")
|
|
727
|
+
else if (event.type === "output:tool")
|
|
728
|
+
toolCalls.push(formatToolCall(event.tool, event.input));
|
|
641
729
|
yield event;
|
|
642
730
|
}
|
|
643
731
|
attemptHistory.push({
|
|
@@ -645,7 +733,11 @@ async function* runCommandWithHealing(task) {
|
|
|
645
733
|
exitCode,
|
|
646
734
|
cmdOutput: output
|
|
647
735
|
});
|
|
648
|
-
yield {
|
|
736
|
+
yield {
|
|
737
|
+
type: "log",
|
|
738
|
+
level: "info",
|
|
739
|
+
text: `[self-healing] Re-running command (${remaining} attempt(s) left)\u2026`
|
|
740
|
+
};
|
|
649
741
|
}
|
|
650
742
|
}
|
|
651
743
|
}
|
|
@@ -657,21 +749,37 @@ async function* runClaudeWithJudge(task) {
|
|
|
657
749
|
${fillTemplate(JUDGE_RETRY_CONTEXT, { FEEDBACK: judgeContext })}`;
|
|
658
750
|
const lines = [];
|
|
659
751
|
yield* collectLines(runClaude({ ...task, prompt }), lines);
|
|
660
|
-
yield {
|
|
661
|
-
|
|
752
|
+
yield {
|
|
753
|
+
type: "log",
|
|
754
|
+
level: "info",
|
|
755
|
+
text: `[judge] Evaluating "${task.name}"\u2026`
|
|
756
|
+
};
|
|
757
|
+
const verdict = await evaluateWithJudge(
|
|
758
|
+
task.name,
|
|
759
|
+
task.prompt,
|
|
760
|
+
lines.join("\n")
|
|
761
|
+
);
|
|
662
762
|
if (verdict.pass) {
|
|
663
763
|
yield { type: "log", level: "info", text: "[judge] PASS" };
|
|
664
764
|
return;
|
|
665
765
|
}
|
|
666
766
|
judgeContext = verdict.feedback;
|
|
667
|
-
yield {
|
|
767
|
+
yield {
|
|
768
|
+
type: "log",
|
|
769
|
+
level: "warn",
|
|
770
|
+
text: `[judge] FAIL \u2014 ${verdict.feedback}`
|
|
771
|
+
};
|
|
668
772
|
const remaining = MAX_JUDGE_RETRIES - attempt - 1;
|
|
669
773
|
if (remaining === 0) {
|
|
670
774
|
throw new Error(
|
|
671
775
|
`Step "${task.name}" failed judge evaluation after ${MAX_JUDGE_RETRIES} attempts`
|
|
672
776
|
);
|
|
673
777
|
}
|
|
674
|
-
yield {
|
|
778
|
+
yield {
|
|
779
|
+
type: "log",
|
|
780
|
+
level: "info",
|
|
781
|
+
text: `[judge] Retrying (${remaining} attempt(s) left)\u2026`
|
|
782
|
+
};
|
|
675
783
|
}
|
|
676
784
|
}
|
|
677
785
|
async function evaluateWithJudge(stepName, stepInstructions, output) {
|
|
@@ -698,7 +806,9 @@ function readContextFile(filePath2) {
|
|
|
698
806
|
try {
|
|
699
807
|
return readFileSync3(filePath2, "utf8");
|
|
700
808
|
} catch (err) {
|
|
701
|
-
throw new Error(
|
|
809
|
+
throw new Error(
|
|
810
|
+
`Context file "${filePath2}" could not be read: ${getErrorMessage(err)}`
|
|
811
|
+
);
|
|
702
812
|
}
|
|
703
813
|
}
|
|
704
814
|
function expandContext(task) {
|
|
@@ -712,13 +822,23 @@ ${readContextFile(fp)}
|
|
|
712
822
|
${task.prompt}` };
|
|
713
823
|
}
|
|
714
824
|
function buildHealingPrompt(command, exitCode, output, attemptHistory) {
|
|
715
|
-
return fillTemplate(SELF_HEALING_PROMPT, {
|
|
825
|
+
return fillTemplate(SELF_HEALING_PROMPT, {
|
|
826
|
+
COMMAND: command,
|
|
827
|
+
EXIT_CODE: String(exitCode),
|
|
828
|
+
OUTPUT: output,
|
|
829
|
+
ATTEMPT_HISTORY: attemptHistory
|
|
830
|
+
});
|
|
716
831
|
}
|
|
717
832
|
function buildJudgePrompt(stepName, instructions, output) {
|
|
718
|
-
return fillTemplate(JUDGE_EVALUATION_PROMPT, {
|
|
833
|
+
return fillTemplate(JUDGE_EVALUATION_PROMPT, {
|
|
834
|
+
STEP_NAME: stepName,
|
|
835
|
+
STEP_INSTRUCTIONS: instructions,
|
|
836
|
+
OUTPUT: output
|
|
837
|
+
});
|
|
719
838
|
}
|
|
720
839
|
function formatToolCall(tool, input) {
|
|
721
|
-
if (tool === "Edit" || tool === "Write")
|
|
840
|
+
if (tool === "Edit" || tool === "Write")
|
|
841
|
+
return `${tool}(${String(input["file_path"] ?? "")})`;
|
|
722
842
|
if (tool === "Bash") return `Bash(${String(input["command"] ?? "")})`;
|
|
723
843
|
return tool;
|
|
724
844
|
}
|
|
@@ -835,7 +955,20 @@ function reducer(state, event) {
|
|
|
835
955
|
};
|
|
836
956
|
case "step:iteration":
|
|
837
957
|
return updateTask(state, event.index, {
|
|
838
|
-
iteration: {
|
|
958
|
+
iteration: {
|
|
959
|
+
current: event.iteration,
|
|
960
|
+
total: event.total,
|
|
961
|
+
item: event.item
|
|
962
|
+
},
|
|
963
|
+
inner: void 0
|
|
964
|
+
});
|
|
965
|
+
case "step:inner":
|
|
966
|
+
return updateTask(state, event.index, {
|
|
967
|
+
inner: {
|
|
968
|
+
index: event.innerIndex,
|
|
969
|
+
total: event.innerTotal,
|
|
970
|
+
name: event.name
|
|
971
|
+
}
|
|
839
972
|
});
|
|
840
973
|
case "output:text": {
|
|
841
974
|
const idx = event.index;
|
|
@@ -848,7 +981,10 @@ function reducer(state, event) {
|
|
|
848
981
|
const formatted = formatToolCall2(event.tool, event.input);
|
|
849
982
|
const next = formatted ? appendLine(state, idx, formatted) : state;
|
|
850
983
|
if (event.tool === "Write" && typeof event.input["file_path"] === "string") {
|
|
851
|
-
return {
|
|
984
|
+
return {
|
|
985
|
+
...next,
|
|
986
|
+
writtenFiles: [...next.writtenFiles, event.input["file_path"]]
|
|
987
|
+
};
|
|
852
988
|
}
|
|
853
989
|
return next;
|
|
854
990
|
}
|
|
@@ -871,7 +1007,9 @@ function reducer(state, event) {
|
|
|
871
1007
|
}
|
|
872
1008
|
}
|
|
873
1009
|
function updateTask(state, index, patch) {
|
|
874
|
-
const tasks = state.tasks.map(
|
|
1010
|
+
const tasks = state.tasks.map(
|
|
1011
|
+
(t, i) => i === index ? { ...t, ...patch } : t
|
|
1012
|
+
);
|
|
875
1013
|
return { ...state, tasks };
|
|
876
1014
|
}
|
|
877
1015
|
function appendLine(state, index, line) {
|
|
@@ -936,7 +1074,8 @@ function TaskRow({ taskState, isActive, index, tick }) {
|
|
|
936
1074
|
const color = statusColor(status, isActive);
|
|
937
1075
|
const elapsed = formatTaskElapsed(startTime, endTime, status);
|
|
938
1076
|
const iterInfo = taskState.iteration ? ` (${taskState.iteration.current}/${taskState.iteration.total}) ${taskState.iteration.item}` : "";
|
|
939
|
-
const
|
|
1077
|
+
const innerInfo = taskState.inner ? ` \u2014 ${taskState.inner.name} [${taskState.inner.index + 1}/${taskState.inner.total}]` : "";
|
|
1078
|
+
const label = `${index + 1}. ${task.name}${iterInfo}${innerInfo}`;
|
|
940
1079
|
return /* @__PURE__ */ jsxs(Box, { children: [
|
|
941
1080
|
/* @__PURE__ */ jsxs(Text, { color, children: [
|
|
942
1081
|
icon,
|
|
@@ -1146,25 +1285,9 @@ var PLAN_RETRY_SCHEMA_ERROR = loadPrompt("plan-retry-schema-error");
|
|
|
1146
1285
|
var PLAN_RETRY_JUDGE = loadPrompt("plan-retry-judge");
|
|
1147
1286
|
var MAX_PLAN_RETRIES = 3;
|
|
1148
1287
|
var TOTAL_PLAN_STAGES = 3;
|
|
1149
|
-
var StepSchema = z3.object({
|
|
1150
|
-
name: z3.string(),
|
|
1151
|
-
type: z3.enum(["prompt", "script", "log"]).optional(),
|
|
1152
|
-
prompt: z3.string().optional(),
|
|
1153
|
-
command: z3.string().optional(),
|
|
1154
|
-
message: z3.string().optional(),
|
|
1155
|
-
continue_on_error: z3.boolean().optional(),
|
|
1156
|
-
self_healing: z3.boolean().optional(),
|
|
1157
|
-
max_healing_attempts: z3.number().int().positive().optional(),
|
|
1158
|
-
output: z3.string().optional(),
|
|
1159
|
-
llm_as_judge: z3.boolean().optional(),
|
|
1160
|
-
allowed_tools: z3.array(z3.string()).optional(),
|
|
1161
|
-
forEach: z3.union([z3.array(z3.string()), z3.string()]).optional(),
|
|
1162
|
-
repeat: z3.number().int().positive().optional(),
|
|
1163
|
-
context: z3.array(z3.string()).optional()
|
|
1164
|
-
});
|
|
1165
1288
|
var WorkflowSchema = z3.object({
|
|
1166
1289
|
goal: z3.string(),
|
|
1167
|
-
steps: z3.array(
|
|
1290
|
+
steps: z3.array(RawStepSchema).min(1),
|
|
1168
1291
|
vars: z3.record(z3.string()).optional(),
|
|
1169
1292
|
self_improve: z3.boolean().optional()
|
|
1170
1293
|
});
|
|
@@ -1184,7 +1307,10 @@ function walkUp(startDir, check) {
|
|
|
1184
1307
|
}
|
|
1185
1308
|
}
|
|
1186
1309
|
function findGitRoot(startDir) {
|
|
1187
|
-
return walkUp(
|
|
1310
|
+
return walkUp(
|
|
1311
|
+
startDir,
|
|
1312
|
+
(dir) => existsSync(join2(dir, ".git")) ? dir : null
|
|
1313
|
+
);
|
|
1188
1314
|
}
|
|
1189
1315
|
function findProjectRoot(startDir) {
|
|
1190
1316
|
return walkUp(startDir, (dir) => {
|
|
@@ -1335,34 +1461,27 @@ function normalizeWorkflow(workflow2) {
|
|
|
1335
1461
|
return { ...workflow2, steps: collapseSequentialSteps(steps) };
|
|
1336
1462
|
}
|
|
1337
1463
|
function collapseSequentialSteps(steps) {
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
continue;
|
|
1360
|
-
}
|
|
1361
|
-
const { name: _name, ...rest } = step;
|
|
1362
|
-
result.push({ ...rest, name: `${prefix}_{{item}}`, repeat: n });
|
|
1363
|
-
i += n;
|
|
1364
|
-
}
|
|
1365
|
-
return result;
|
|
1464
|
+
return steps.reduce(
|
|
1465
|
+
({ out, skip }, step, i, arr) => {
|
|
1466
|
+
if (skip > 0) return { out, skip: skip - 1 };
|
|
1467
|
+
if (step.forEach !== void 0 || step.repeat !== void 0 || step.steps !== void 0) {
|
|
1468
|
+
return { out: [...out, step], skip: 0 };
|
|
1469
|
+
}
|
|
1470
|
+
const m = step.name.match(/^(.+?)_1$/);
|
|
1471
|
+
if (!m) return { out: [...out, step], skip: 0 };
|
|
1472
|
+
const prefix = m[1];
|
|
1473
|
+
let n = 1;
|
|
1474
|
+
while (i + n < arr.length && arr[i + n].name === `${prefix}_${n + 1}`)
|
|
1475
|
+
n++;
|
|
1476
|
+
if (n < 2) return { out: [...out, step], skip: 0 };
|
|
1477
|
+
const { name: _name, ...rest } = step;
|
|
1478
|
+
return {
|
|
1479
|
+
out: [...out, { ...rest, name: `${prefix}_{{item}}`, repeat: n }],
|
|
1480
|
+
skip: n - 1
|
|
1481
|
+
};
|
|
1482
|
+
},
|
|
1483
|
+
{ out: [], skip: 0 }
|
|
1484
|
+
).out;
|
|
1366
1485
|
}
|
|
1367
1486
|
async function* streamPlan(args) {
|
|
1368
1487
|
const { description, taskFile } = args;
|
|
@@ -1373,14 +1492,24 @@ async function* streamPlan(args) {
|
|
|
1373
1492
|
yield { type: "plan:stages", names: ["Decompose to Steps", "Validate"] };
|
|
1374
1493
|
researchDoc = "No codebase research performed \u2014 the task is self-contained. Work directly from the user's original goal.";
|
|
1375
1494
|
} else {
|
|
1376
|
-
yield {
|
|
1377
|
-
|
|
1495
|
+
yield {
|
|
1496
|
+
type: "plan:stages",
|
|
1497
|
+
names: ["Research & Planning", "Decompose to Steps", "Validate"]
|
|
1498
|
+
};
|
|
1499
|
+
yield {
|
|
1500
|
+
type: "plan:stage",
|
|
1501
|
+
stage: 1,
|
|
1502
|
+
total: TOTAL_PLAN_STAGES,
|
|
1503
|
+
name: "Research & Planning"
|
|
1504
|
+
};
|
|
1378
1505
|
const researchLines = [];
|
|
1379
1506
|
try {
|
|
1380
1507
|
const researchTask = {
|
|
1381
1508
|
type: "claude",
|
|
1382
1509
|
name: "plan:research",
|
|
1383
|
-
prompt: fillTemplate(PLAN_RESEARCH_PROMPT, {
|
|
1510
|
+
prompt: fillTemplate(PLAN_RESEARCH_PROMPT, {
|
|
1511
|
+
DESCRIPTION: description
|
|
1512
|
+
}),
|
|
1384
1513
|
allowedTools: ["Read", "Glob", "Grep"],
|
|
1385
1514
|
permissionMode: "bypassPermissions",
|
|
1386
1515
|
model: "opus"
|
|
@@ -1394,17 +1523,28 @@ async function* streamPlan(args) {
|
|
|
1394
1523
|
}
|
|
1395
1524
|
}
|
|
1396
1525
|
} catch (err) {
|
|
1397
|
-
yield {
|
|
1526
|
+
yield {
|
|
1527
|
+
type: "plan:error",
|
|
1528
|
+
message: `Research pass failed: ${getErrorMessage(err)}`
|
|
1529
|
+
};
|
|
1398
1530
|
return;
|
|
1399
1531
|
}
|
|
1400
1532
|
researchDoc = researchLines.join("\n");
|
|
1401
1533
|
if (!researchDoc.trim()) {
|
|
1402
|
-
yield {
|
|
1534
|
+
yield {
|
|
1535
|
+
type: "plan:error",
|
|
1536
|
+
message: "Research pass produced no output \u2014 cannot decompose"
|
|
1537
|
+
};
|
|
1403
1538
|
return;
|
|
1404
1539
|
}
|
|
1405
1540
|
}
|
|
1406
1541
|
const stages = skipResearch ? { decompose: 1, validate: 2, total: 2 } : { decompose: 2, validate: 3, total: TOTAL_PLAN_STAGES };
|
|
1407
|
-
yield {
|
|
1542
|
+
yield {
|
|
1543
|
+
type: "plan:stage",
|
|
1544
|
+
stage: stages.decompose,
|
|
1545
|
+
total: stages.total,
|
|
1546
|
+
name: "Decompose to Steps"
|
|
1547
|
+
};
|
|
1408
1548
|
let retryPrefix = "";
|
|
1409
1549
|
for (let attempt = 0; attempt < MAX_PLAN_RETRIES; attempt++) {
|
|
1410
1550
|
if (attempt > 0) {
|
|
@@ -1414,9 +1554,17 @@ async function* streamPlan(args) {
|
|
|
1414
1554
|
maxAttempts: MAX_PLAN_RETRIES,
|
|
1415
1555
|
reason: retryPrefix.replace(/\n/g, " ")
|
|
1416
1556
|
};
|
|
1417
|
-
yield {
|
|
1557
|
+
yield {
|
|
1558
|
+
type: "plan:stage",
|
|
1559
|
+
stage: stages.decompose,
|
|
1560
|
+
total: stages.total,
|
|
1561
|
+
name: "Decompose to Steps"
|
|
1562
|
+
};
|
|
1418
1563
|
}
|
|
1419
|
-
const basePrompt = fillTemplate(PLAN_DECOMPOSE_PROMPT, {
|
|
1564
|
+
const basePrompt = fillTemplate(PLAN_DECOMPOSE_PROMPT, {
|
|
1565
|
+
DESCRIPTION: description,
|
|
1566
|
+
RESEARCH_DOC: researchDoc
|
|
1567
|
+
});
|
|
1420
1568
|
const decomposeTask = {
|
|
1421
1569
|
type: "claude",
|
|
1422
1570
|
name: "plan:decompose",
|
|
@@ -1448,7 +1596,10 @@ ${basePrompt}` : basePrompt,
|
|
|
1448
1596
|
yield { type: "plan:error", message: msg };
|
|
1449
1597
|
return;
|
|
1450
1598
|
}
|
|
1451
|
-
retryPrefix = fillTemplate(PLAN_RETRY_PARSE_ERROR, {
|
|
1599
|
+
retryPrefix = fillTemplate(PLAN_RETRY_PARSE_ERROR, {
|
|
1600
|
+
ERROR: msg,
|
|
1601
|
+
EXCERPT: decomposeTextLines.join("\n")
|
|
1602
|
+
});
|
|
1452
1603
|
continue;
|
|
1453
1604
|
}
|
|
1454
1605
|
if (structuredOutput === void 0) {
|
|
@@ -1464,24 +1615,40 @@ ${basePrompt}` : basePrompt,
|
|
|
1464
1615
|
if (!zodResult.success) {
|
|
1465
1616
|
const issues = formatZodIssues(zodResult.error.issues);
|
|
1466
1617
|
if (attempt === MAX_PLAN_RETRIES - 1) {
|
|
1467
|
-
yield {
|
|
1468
|
-
|
|
1618
|
+
yield {
|
|
1619
|
+
type: "plan:error",
|
|
1620
|
+
message: `Plan did not match expected schema:
|
|
1621
|
+
${issues}`
|
|
1622
|
+
};
|
|
1469
1623
|
return;
|
|
1470
1624
|
}
|
|
1471
1625
|
retryPrefix = fillTemplate(PLAN_RETRY_SCHEMA_ERROR, { ISSUES: issues });
|
|
1472
1626
|
continue;
|
|
1473
1627
|
}
|
|
1474
|
-
yield {
|
|
1628
|
+
yield {
|
|
1629
|
+
type: "plan:stage",
|
|
1630
|
+
stage: stages.validate,
|
|
1631
|
+
total: stages.total,
|
|
1632
|
+
name: "Validate"
|
|
1633
|
+
};
|
|
1475
1634
|
const judgeResult = await runPass3Judge(description, zodResult.data);
|
|
1476
1635
|
if (judgeResult.skipped) {
|
|
1477
|
-
yield {
|
|
1636
|
+
yield {
|
|
1637
|
+
type: "plan:warn",
|
|
1638
|
+
message: "Judge skipped due to error \u2014 proceeding without validation"
|
|
1639
|
+
};
|
|
1478
1640
|
}
|
|
1479
1641
|
if (!judgeResult.pass && attempt < MAX_PLAN_RETRIES - 1) {
|
|
1480
|
-
retryPrefix = fillTemplate(PLAN_RETRY_JUDGE, {
|
|
1642
|
+
retryPrefix = fillTemplate(PLAN_RETRY_JUDGE, {
|
|
1643
|
+
FEEDBACK: judgeResult.feedback
|
|
1644
|
+
});
|
|
1481
1645
|
continue;
|
|
1482
1646
|
}
|
|
1483
1647
|
if (!judgeResult.pass) {
|
|
1484
|
-
yield {
|
|
1648
|
+
yield {
|
|
1649
|
+
type: "plan:warn",
|
|
1650
|
+
message: `Judge rejected plan but retries exhausted: ${judgeResult.feedback}`
|
|
1651
|
+
};
|
|
1485
1652
|
}
|
|
1486
1653
|
const { goal, vars, steps, ...rest } = normalizeWorkflow(zodResult.data);
|
|
1487
1654
|
const ordered = { goal, ...vars && { vars }, steps, ...rest };
|
|
@@ -1497,7 +1664,10 @@ ${issues}` };
|
|
|
1497
1664
|
yield { type: "plan:complete", taskFile, preview };
|
|
1498
1665
|
return;
|
|
1499
1666
|
}
|
|
1500
|
-
yield {
|
|
1667
|
+
yield {
|
|
1668
|
+
type: "plan:error",
|
|
1669
|
+
message: "Plan generation failed after maximum retries"
|
|
1670
|
+
};
|
|
1501
1671
|
}
|
|
1502
1672
|
|
|
1503
1673
|
// src/ui/PlanApp.tsx
|
|
@@ -1724,39 +1894,57 @@ function onWorkflowStart(ctx, s) {
|
|
|
1724
1894
|
mkdirSync3(ctx.logDir, { recursive: true });
|
|
1725
1895
|
mkdirSync3(ctx.highlightsDir, { recursive: true });
|
|
1726
1896
|
const logFile = join3(ctx.logDir, `${ctx.ts}_${ctx.slug}.log`);
|
|
1727
|
-
writeFileSync3(
|
|
1897
|
+
writeFileSync3(
|
|
1898
|
+
logFile,
|
|
1899
|
+
`# Execution Log
|
|
1728
1900
|
Task: ${ctx.slug}
|
|
1729
1901
|
Started: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
1730
1902
|
${"\u2501".repeat(51)}
|
|
1731
1903
|
|
|
1732
|
-
`
|
|
1904
|
+
`
|
|
1905
|
+
);
|
|
1733
1906
|
return { ...s, logFile };
|
|
1734
1907
|
}
|
|
1735
1908
|
function onStepStart(ctx, s, index, name) {
|
|
1736
|
-
const next = {
|
|
1737
|
-
|
|
1909
|
+
const next = {
|
|
1910
|
+
...INIT_STATE,
|
|
1911
|
+
logFile: s.logFile,
|
|
1912
|
+
stepIndex: index,
|
|
1913
|
+
stepName: name,
|
|
1914
|
+
stepStartMs: Date.now()
|
|
1915
|
+
};
|
|
1916
|
+
appendLog(
|
|
1917
|
+
next.logFile,
|
|
1918
|
+
`
|
|
1738
1919
|
${"\u2501".repeat(51)}
|
|
1739
1920
|
Step ${index + 1}: ${name}
|
|
1740
1921
|
Started: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
1741
1922
|
${"\u2501".repeat(51)}
|
|
1742
|
-
`
|
|
1923
|
+
`
|
|
1924
|
+
);
|
|
1743
1925
|
return next;
|
|
1744
1926
|
}
|
|
1745
1927
|
function finalizeComplexSequence(s) {
|
|
1746
1928
|
if (s.toolCount >= 3 && s.complexSequenceFile) {
|
|
1747
|
-
appendFileSync(
|
|
1929
|
+
appendFileSync(
|
|
1930
|
+
s.complexSequenceFile,
|
|
1931
|
+
`
|
|
1748
1932
|
---
|
|
1749
1933
|
|
|
1750
1934
|
*Total tools used: ${s.toolCount}*
|
|
1751
1935
|
|
|
1752
1936
|
*Captured by Executant Logger*
|
|
1753
|
-
`
|
|
1937
|
+
`
|
|
1938
|
+
);
|
|
1754
1939
|
}
|
|
1755
1940
|
}
|
|
1756
1941
|
function onStepComplete(s) {
|
|
1757
|
-
appendLog(
|
|
1942
|
+
appendLog(
|
|
1943
|
+
s.logFile,
|
|
1944
|
+
`
|
|
1758
1945
|
Step completed in ${((Date.now() - s.stepStartMs) / 1e3).toFixed(1)}s
|
|
1759
|
-
`
|
|
1946
|
+
`
|
|
1947
|
+
);
|
|
1760
1948
|
finalizeComplexSequence(s);
|
|
1761
1949
|
return s;
|
|
1762
1950
|
}
|
|
@@ -1794,29 +1982,35 @@ function onTool(ctx, s, tool, input) {
|
|
|
1794
1982
|
const toolCount = s.toolCount + 1;
|
|
1795
1983
|
const complexSequenceFile = toolCount === 3 ? createComplexSequenceFile(ctx, s) : s.complexSequenceFile;
|
|
1796
1984
|
if (toolCount >= 3 && complexSequenceFile) {
|
|
1797
|
-
appendFileSync(
|
|
1798
|
-
|
|
1985
|
+
appendFileSync(
|
|
1986
|
+
complexSequenceFile,
|
|
1987
|
+
`${toolCount}. **${tool}** - ${desc}
|
|
1988
|
+
`
|
|
1989
|
+
);
|
|
1799
1990
|
}
|
|
1800
1991
|
return { ...s, toolCount, complexSequenceFile };
|
|
1801
1992
|
}
|
|
1802
1993
|
function saveJudgeHighlight(ctx, s, verdict, text) {
|
|
1803
|
-
writeFileSync3(
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1994
|
+
writeFileSync3(
|
|
1995
|
+
highlightPath(ctx, s.stepIndex, `judge_${verdict}`),
|
|
1996
|
+
[
|
|
1997
|
+
`# Judge Verdict: ${verdict}`,
|
|
1998
|
+
"",
|
|
1999
|
+
`**Task:** ${ctx.slug}`,
|
|
2000
|
+
`**Step:** ${s.stepName}`,
|
|
2001
|
+
`**Attempt:** ${s.judgeAttempt}`,
|
|
2002
|
+
`**Timestamp:** ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
2003
|
+
"",
|
|
2004
|
+
"---",
|
|
2005
|
+
"",
|
|
2006
|
+
text,
|
|
2007
|
+
"",
|
|
2008
|
+
"---",
|
|
2009
|
+
"",
|
|
2010
|
+
"*Auto-captured*",
|
|
2011
|
+
""
|
|
2012
|
+
].join("\n")
|
|
2013
|
+
);
|
|
1820
2014
|
}
|
|
1821
2015
|
var LOG_MATCHERS = [
|
|
1822
2016
|
{
|
|
@@ -1832,29 +2026,32 @@ var LOG_MATCHERS = [
|
|
|
1832
2026
|
pattern: /\[self-healing\].*failed.*exit\s+(\d+)/i,
|
|
1833
2027
|
apply: (ctx, s, text, match) => {
|
|
1834
2028
|
const selfHealingFile = highlightPath(ctx, s.stepIndex, "self_healing");
|
|
1835
|
-
writeFileSync3(
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
2029
|
+
writeFileSync3(
|
|
2030
|
+
selfHealingFile,
|
|
2031
|
+
[
|
|
2032
|
+
"# Self-Healing Activation",
|
|
2033
|
+
"",
|
|
2034
|
+
`**Task:** ${ctx.slug}`,
|
|
2035
|
+
`**Step:** ${s.stepName}`,
|
|
2036
|
+
`**Timestamp:** ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
2037
|
+
"",
|
|
2038
|
+
"---",
|
|
2039
|
+
"",
|
|
2040
|
+
"## \u274C Failure Detected",
|
|
2041
|
+
"",
|
|
2042
|
+
`**Exit Code:** ${match[1]}`,
|
|
2043
|
+
"",
|
|
2044
|
+
"**Recent Output:**",
|
|
2045
|
+
"```",
|
|
2046
|
+
s.recentOutput.join("\n"),
|
|
2047
|
+
"```",
|
|
2048
|
+
"",
|
|
2049
|
+
"---",
|
|
2050
|
+
"",
|
|
2051
|
+
"## \u{1F527} Claude's Healing Process",
|
|
2052
|
+
""
|
|
2053
|
+
].join("\n")
|
|
2054
|
+
);
|
|
1858
2055
|
return { ...s, selfHealingFile, recentOutput: [] };
|
|
1859
2056
|
}
|
|
1860
2057
|
},
|
|
@@ -1862,21 +2059,24 @@ var LOG_MATCHERS = [
|
|
|
1862
2059
|
pattern: /\[self-healing\].*Re-running/i,
|
|
1863
2060
|
apply: (_ctx, s) => {
|
|
1864
2061
|
if (!s.selfHealingFile) return s;
|
|
1865
|
-
appendFileSync(
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
2062
|
+
appendFileSync(
|
|
2063
|
+
s.selfHealingFile,
|
|
2064
|
+
[
|
|
2065
|
+
"",
|
|
2066
|
+
"*(See full log for Claude's diagnostic process)*",
|
|
2067
|
+
"",
|
|
2068
|
+
"---",
|
|
2069
|
+
"",
|
|
2070
|
+
"## \u2705 Resolution Applied",
|
|
2071
|
+
"",
|
|
2072
|
+
"The self-healing process completed. Check the full execution log to see Claude's analysis and fix.",
|
|
2073
|
+
"",
|
|
2074
|
+
"---",
|
|
2075
|
+
"",
|
|
2076
|
+
"*Auto-captured*",
|
|
2077
|
+
""
|
|
2078
|
+
].join("\n")
|
|
2079
|
+
);
|
|
1880
2080
|
return { ...s, selfHealingFile: "" };
|
|
1881
2081
|
}
|
|
1882
2082
|
}
|
|
@@ -1893,30 +2093,39 @@ function onLogMessage(ctx, s, level, text) {
|
|
|
1893
2093
|
).state;
|
|
1894
2094
|
}
|
|
1895
2095
|
function onWorkflowComplete(ctx, s) {
|
|
1896
|
-
appendLog(
|
|
2096
|
+
appendLog(
|
|
2097
|
+
s.logFile,
|
|
2098
|
+
`
|
|
1897
2099
|
${"\u2501".repeat(51)}
|
|
1898
2100
|
Task Complete: ${ctx.slug}
|
|
1899
2101
|
Finished: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
1900
2102
|
${"\u2501".repeat(51)}
|
|
1901
|
-
`
|
|
2103
|
+
`
|
|
2104
|
+
);
|
|
1902
2105
|
const indexFile = join3(ctx.highlightsDir, "README.md");
|
|
1903
2106
|
if (!existsSync2(indexFile)) {
|
|
1904
|
-
writeFileSync3(
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
2107
|
+
writeFileSync3(
|
|
2108
|
+
indexFile,
|
|
2109
|
+
[
|
|
2110
|
+
"# Execution Highlights",
|
|
2111
|
+
"",
|
|
2112
|
+
"This directory contains automatically extracted highlight moments from task executions.",
|
|
2113
|
+
"",
|
|
2114
|
+
"## Latest Highlights",
|
|
2115
|
+
""
|
|
2116
|
+
].join("\n")
|
|
2117
|
+
);
|
|
1912
2118
|
}
|
|
1913
2119
|
const highlights = readdirSync(ctx.highlightsDir).filter((f) => f.startsWith(ctx.ts) && f.endsWith(".md")).sort();
|
|
1914
2120
|
if (highlights.length > 0) {
|
|
1915
2121
|
const entries = highlights.map((f) => `- [${f.replace(/\.md$/, "")}](./${f})`).join("\n");
|
|
1916
|
-
appendFileSync(
|
|
2122
|
+
appendFileSync(
|
|
2123
|
+
indexFile,
|
|
2124
|
+
`
|
|
1917
2125
|
### ${ctx.slug} (${(/* @__PURE__ */ new Date()).toISOString()})
|
|
1918
2126
|
${entries}
|
|
1919
|
-
`
|
|
2127
|
+
`
|
|
2128
|
+
);
|
|
1920
2129
|
}
|
|
1921
2130
|
return s;
|
|
1922
2131
|
}
|
|
@@ -1934,6 +2143,19 @@ function reduce(ctx, s, event) {
|
|
|
1934
2143
|
return onStepComplete(s);
|
|
1935
2144
|
case "step:error":
|
|
1936
2145
|
return onStepError(s, event.error);
|
|
2146
|
+
case "step:iteration":
|
|
2147
|
+
appendLog(
|
|
2148
|
+
s.logFile,
|
|
2149
|
+
`
|
|
2150
|
+
\u2500\u2500 iteration ${event.iteration}/${event.total}: ${event.item}`
|
|
2151
|
+
);
|
|
2152
|
+
return s;
|
|
2153
|
+
case "step:inner":
|
|
2154
|
+
appendLog(
|
|
2155
|
+
s.logFile,
|
|
2156
|
+
` \u21B3 [${event.innerIndex + 1}/${event.innerTotal}] ${event.name}`
|
|
2157
|
+
);
|
|
2158
|
+
return s;
|
|
1937
2159
|
case "output:text":
|
|
1938
2160
|
return onOutputText(s, event.text);
|
|
1939
2161
|
case "output:tool":
|
|
@@ -2115,16 +2337,21 @@ function extractJson(text) {
|
|
|
2115
2337
|
|
|
2116
2338
|
// src/index.ts
|
|
2117
2339
|
var CURRENT_VERSION = JSON.parse(
|
|
2118
|
-
readFileSync6(
|
|
2340
|
+
readFileSync6(
|
|
2341
|
+
join5(dirname5(fileURLToPath2(import.meta.url)), "../package.json"),
|
|
2342
|
+
"utf-8"
|
|
2343
|
+
)
|
|
2119
2344
|
).version;
|
|
2120
2345
|
var rawArgs = process.argv.slice(2);
|
|
2121
2346
|
if (rawArgs[0] === "plan") {
|
|
2122
2347
|
const planArgs = parsePlanArgs(rawArgs.slice(1));
|
|
2123
2348
|
const planEvents = streamPlan(planArgs);
|
|
2124
|
-
const inkApp = render(
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2349
|
+
const inkApp = render(
|
|
2350
|
+
React3.createElement(PlanApp, {
|
|
2351
|
+
description: planArgs.description,
|
|
2352
|
+
events: planEvents
|
|
2353
|
+
})
|
|
2354
|
+
);
|
|
2128
2355
|
try {
|
|
2129
2356
|
await inkApp.waitUntilExit();
|
|
2130
2357
|
} catch {
|
|
@@ -2173,7 +2400,15 @@ YAML \u2014 step fields (all step types):
|
|
|
2173
2400
|
forEach string or list
|
|
2174
2401
|
Inline YAML array OR a shell command whose newline-split
|
|
2175
2402
|
stdout provides the items. {{item}} is substituted per
|
|
2176
|
-
iteration in
|
|
2403
|
+
iteration in every child step's name, command, and prompt.
|
|
2404
|
+
repeat int Run this step N times; {{item}} is the 1-based
|
|
2405
|
+
iteration number. Mutually exclusive with forEach.
|
|
2406
|
+
steps list Multiple child steps to run per forEach/repeat
|
|
2407
|
+
iteration. Mutually exclusive with command/prompt on the
|
|
2408
|
+
parent step. Requires forEach or repeat.
|
|
2409
|
+
context list Var names whose file-path values are prepended to
|
|
2410
|
+
a prompt step's content at runtime.
|
|
2411
|
+
output string Var name; captures script stdout to that file path.
|
|
2177
2412
|
|
|
2178
2413
|
YAML \u2014 prompt step fields (type: prompt, or inferred when prompt is present):
|
|
2179
2414
|
prompt string (required) Instructions sent to Claude
|
|
@@ -2185,7 +2420,7 @@ YAML \u2014 prompt step fields (type: prompt, or inferred when prompt is present
|
|
|
2185
2420
|
YAML \u2014 script step fields (type: script | command, or inferred when command is present):
|
|
2186
2421
|
command string (required) Bash command to execute
|
|
2187
2422
|
self_healing bool On failure, Claude diagnoses and fixes iteratively
|
|
2188
|
-
up to 5 attempts with accumulated context (default:
|
|
2423
|
+
up to 5 attempts with accumulated context (default: false)
|
|
2189
2424
|
max_healing_attempts int Override max self-healing retries (default: 5)
|
|
2190
2425
|
|
|
2191
2426
|
YAML \u2014 log step fields (type: log, or inferred when message is present and prompt is absent):
|
|
@@ -2263,9 +2498,17 @@ function errorReplacer(_key, value) {
|
|
|
2263
2498
|
async function maybeRunRetrospective(filePath2, workflow2, logger2) {
|
|
2264
2499
|
if (!logger2) return;
|
|
2265
2500
|
try {
|
|
2266
|
-
await runRetrospective(
|
|
2501
|
+
await runRetrospective(
|
|
2502
|
+
filePath2,
|
|
2503
|
+
workflow2,
|
|
2504
|
+
logger2.getHighlightsDir(),
|
|
2505
|
+
logger2.getTimestamp()
|
|
2506
|
+
);
|
|
2267
2507
|
} catch (err) {
|
|
2268
|
-
console.warn(
|
|
2508
|
+
console.warn(
|
|
2509
|
+
"[executant] retrospective failed (non-fatal):",
|
|
2510
|
+
getErrorMessage(err)
|
|
2511
|
+
);
|
|
2269
2512
|
}
|
|
2270
2513
|
}
|
|
2271
2514
|
if (ciMode) {
|
|
@@ -2281,7 +2524,9 @@ if (ciMode) {
|
|
|
2281
2524
|
process.exit(1);
|
|
2282
2525
|
});
|
|
2283
2526
|
} else {
|
|
2284
|
-
const inkApp = render(
|
|
2527
|
+
const inkApp = render(
|
|
2528
|
+
React3.createElement(App, { workflow, events, options, updateCheck })
|
|
2529
|
+
);
|
|
2285
2530
|
if (workflow.selfImprove) {
|
|
2286
2531
|
inkApp.waitUntilExit().then(() => maybeRunRetrospective(filePath, workflow, logger)).catch(() => {
|
|
2287
2532
|
});
|
|
@@ -52,10 +52,27 @@ Complete structure with all available options:
|
|
|
52
52
|
"forEach": "git diff --name-only HEAD~1",
|
|
53
53
|
"prompt": "Review {{item}} for issues and suggest improvements."
|
|
54
54
|
},
|
|
55
|
+
{
|
|
56
|
+
"name": "foreach_multi_step",
|
|
57
|
+
"forEach": ["pkg/api", "pkg/web"],
|
|
58
|
+
"steps": [
|
|
59
|
+
{ "name": "lint {{item}}", "type": "script", "command": "cd {{item}} && npm run lint" },
|
|
60
|
+
{ "name": "test {{item}}", "type": "script", "command": "cd {{item}} && npm test" },
|
|
61
|
+
{ "name": "review {{item}}", "prompt": "Review the test results for {{item}} and summarize any issues." }
|
|
62
|
+
]
|
|
63
|
+
},
|
|
55
64
|
{
|
|
56
65
|
"name": "repeated_audit",
|
|
57
66
|
"repeat": 20,
|
|
58
67
|
"prompt": "Review the codebase for issues. This is pass {{item}} of 20."
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"name": "repeated_multi_step",
|
|
71
|
+
"repeat": 3,
|
|
72
|
+
"steps": [
|
|
73
|
+
{ "name": "build pass {{item}}", "type": "script", "command": "npm run build" },
|
|
74
|
+
{ "name": "test pass {{item}}", "type": "script", "command": "npm test" }
|
|
75
|
+
]
|
|
59
76
|
}
|
|
60
77
|
]
|
|
61
78
|
}
|
|
@@ -94,6 +111,9 @@ or commands.
|
|
|
94
111
|
|
|
95
112
|
**Pre-Output Self-Review — Vars (MANDATORY):**
|
|
96
113
|
Before finalising your JSON, scan every `prompt` and `command` field you wrote — every sentence, every numbered instruction, every parenthetical.
|
|
114
|
+
|
|
115
|
+
**`{{item}}` is NOT a path — never extract it to `vars`.** It is a runtime placeholder that the runner substitutes per iteration. Only treat actual string literals as paths requiring `vars` extraction.
|
|
116
|
+
|
|
97
117
|
For each field, identify ALL occurrences of paths, including:
|
|
98
118
|
- Direct path references (e.g., `src/middleware/rate-limit.ts`)
|
|
99
119
|
- Paths mentioned in narrative context (e.g., "match the style of tests in `src/tests/`")
|
|
@@ -158,11 +178,30 @@ Use the EXACT commands from the research document. Only skip a category if the r
|
|
|
158
178
|
|
|
159
179
|
**REQUIRED: Always use `forEach` instead of enumerating items inline in a prompt.**
|
|
160
180
|
|
|
181
|
+
**Use nested `steps:` inside `forEach` or `repeat` when:**
|
|
182
|
+
- Each iteration requires **two or more** distinct actions (e.g., lint THEN test THEN review) — if there is only one action per item, use `command` or `prompt` directly on the forEach step instead
|
|
183
|
+
- Replace `command`/`prompt` on the forEach step with a `steps` array of child steps
|
|
184
|
+
- Child steps support all standard step fields (`type`, `command`, `prompt`, `llm_as_judge`, etc.)
|
|
185
|
+
- `{{item}}` substitution applies to all child step `name`, `command`, and `prompt` fields
|
|
186
|
+
- Mutually exclusive with `command`/`prompt` on the parent step
|
|
187
|
+
|
|
188
|
+
```json
|
|
189
|
+
{
|
|
190
|
+
"name": "process each package",
|
|
191
|
+
"forEach": ["pkg/api", "pkg/web"],
|
|
192
|
+
"steps": [
|
|
193
|
+
{ "name": "lint {{item}}", "type": "script", "command": "cd {{item}} && npm run lint" },
|
|
194
|
+
{ "name": "test {{item}}", "type": "script", "command": "cd {{item}} && npm test" }
|
|
195
|
+
]
|
|
196
|
+
}
|
|
197
|
+
```
|
|
198
|
+
|
|
161
199
|
**Use `repeat: N` when:**
|
|
162
200
|
- The user asks to run the same prompt or command multiple times ("do this 20 times", "repeat 5 times", "run N iterations")
|
|
163
201
|
- The step is identical each time — only the iteration number ({{item}}) differs
|
|
164
202
|
- Prefer `repeat` over `forEach` when there is no meaningful list of items — just a count
|
|
165
203
|
- NEVER expand "do X N times" into N separate steps — always use `repeat: N`
|
|
204
|
+
- Combine with nested `steps:` when each iteration needs multiple sub-steps
|
|
166
205
|
|
|
167
206
|
## Atomicity (MANDATORY)
|
|
168
207
|
|
|
@@ -231,6 +270,7 @@ Generate a JSON object that:
|
|
|
231
270
|
10. Uses `output:` + `context:` to pass script step results to downstream prompt steps
|
|
232
271
|
11. Declares ALL file paths in `vars` — no hardcoded paths in prompts or commands, including paths in narrative or example context
|
|
233
272
|
12. Places `vars` before `steps` in the JSON output
|
|
273
|
+
13. Uses nested `steps:` inside `forEach`/`repeat` when each iteration needs multiple sequential actions
|
|
234
274
|
|
|
235
275
|
## Critical Rules
|
|
236
276
|
|
|
@@ -35,6 +35,7 @@ Does the workflow contain at least one of: a lint step, a test step, or a build
|
|
|
35
35
|
|
|
36
36
|
- Look for `type: "script"` steps whose `command` runs a linter, test runner, or build tool
|
|
37
37
|
(e.g., `npm run lint`, `npm test`, `npm run build`, `pytest`, `tsc --noEmit`, `go test`, etc.)
|
|
38
|
+
- **This includes script steps nested inside a `forEach` or `repeat`'s `steps:` array** — a lint/test/build step inside a loop's child steps satisfies the verification requirement
|
|
38
39
|
- A workflow with ZERO verification steps automatically FAILS regardless of other criteria
|
|
39
40
|
- A visual check prompt step with `llm_as_judge: true` as the final step is acceptable if no
|
|
40
41
|
lint/test/build commands exist in the project
|
|
@@ -45,6 +46,7 @@ Are steps focused on a single concern?
|
|
|
45
46
|
- Does any step do more than one distinct thing (e.g., "implement AND test")?
|
|
46
47
|
- Could any step be meaningfully split into two smaller steps?
|
|
47
48
|
- Steps that combine unrelated operations are too large
|
|
49
|
+
- **Exception**: A `forEach`/`repeat` step with a nested `steps:` array is NOT an atomicity violation — each child step should itself be atomic, and the parent is a loop container, not a combined action
|
|
48
50
|
|
|
49
51
|
### 3. Goal Coverage
|
|
50
52
|
Do the steps collectively accomplish the stated goal?
|
|
@@ -83,7 +85,7 @@ or
|
|
|
83
85
|
```
|
|
84
86
|
|
|
85
87
|
Rules:
|
|
86
|
-
- `pass` is `true` only if ALL
|
|
88
|
+
- `pass` is `true` only if ALL five criteria above are met
|
|
87
89
|
- `feedback` is an empty string when `pass` is `true`
|
|
88
90
|
- `feedback` must be specific and actionable when `pass` is `false` — say EXACTLY what is wrong
|
|
89
91
|
and what the decomposer must do to fix it
|