executant 1.16.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +198 -260
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -268,6 +268,7 @@ function convertInnerStep(step, vars, name, continueOnError) {
|
|
|
268
268
|
continueOnError,
|
|
269
269
|
llmAsJudge: step.llm_as_judge,
|
|
270
270
|
allowedTools: step.allowed_tools,
|
|
271
|
+
model: "sonnet",
|
|
271
272
|
...contextFiles.length > 0 && { contextFiles }
|
|
272
273
|
};
|
|
273
274
|
}
|
|
@@ -432,7 +433,7 @@ function resolveClaudePath() {
|
|
|
432
433
|
);
|
|
433
434
|
}
|
|
434
435
|
}
|
|
435
|
-
async function* runClaude(task
|
|
436
|
+
async function* runClaude(task) {
|
|
436
437
|
yield {
|
|
437
438
|
type: "log",
|
|
438
439
|
level: "info",
|
|
@@ -627,7 +628,7 @@ ${queued.join("\n")}
|
|
|
627
628
|
---
|
|
628
629
|
${expanded.prompt}`
|
|
629
630
|
} : expanded;
|
|
630
|
-
yield* enriched.llmAsJudge ? runClaudeWithJudge(enriched
|
|
631
|
+
yield* enriched.llmAsJudge ? runClaudeWithJudge(enriched) : runClaude(enriched);
|
|
631
632
|
break;
|
|
632
633
|
}
|
|
633
634
|
case "forEach":
|
|
@@ -788,7 +789,8 @@ async function* runCommandWithHealing(task) {
|
|
|
788
789
|
type: "claude",
|
|
789
790
|
name: `${task.name}:heal-${attempt + 1}`,
|
|
790
791
|
prompt: healPrompt,
|
|
791
|
-
allowedTools: ["Bash", "Read", "Write", "Edit", "Glob", "Grep"]
|
|
792
|
+
allowedTools: ["Bash", "Read", "Write", "Edit", "Glob", "Grep"],
|
|
793
|
+
model: "sonnet"
|
|
792
794
|
};
|
|
793
795
|
const toolCalls = [];
|
|
794
796
|
const claudeLines = [];
|
|
@@ -811,14 +813,14 @@ async function* runCommandWithHealing(task) {
|
|
|
811
813
|
}
|
|
812
814
|
}
|
|
813
815
|
}
|
|
814
|
-
async function* runClaudeWithJudge(task
|
|
816
|
+
async function* runClaudeWithJudge(task) {
|
|
815
817
|
let judgeContext = "";
|
|
816
818
|
for (let attempt = 0; attempt < MAX_JUDGE_RETRIES; attempt++) {
|
|
817
819
|
const prompt = attempt === 0 ? task.prompt : `${task.prompt}
|
|
818
820
|
|
|
819
821
|
${fillTemplate(JUDGE_RETRY_CONTEXT, { FEEDBACK: judgeContext })}`;
|
|
820
822
|
const lines = [];
|
|
821
|
-
yield* collectLines(runClaude({ ...task, prompt }
|
|
823
|
+
yield* collectLines(runClaude({ ...task, prompt }), lines);
|
|
822
824
|
yield {
|
|
823
825
|
type: "log",
|
|
824
826
|
level: "info",
|
|
@@ -859,8 +861,9 @@ async function evaluateWithJudge(stepName, stepInstructions, output) {
|
|
|
859
861
|
name: `judge:${stepName}`,
|
|
860
862
|
prompt: buildJudgePrompt(stepName, stepInstructions, output),
|
|
861
863
|
allowedTools: [],
|
|
862
|
-
permissionMode: "default"
|
|
864
|
+
permissionMode: "default",
|
|
863
865
|
// judge only reads text — no tool access needed
|
|
866
|
+
model: "sonnet"
|
|
864
867
|
},
|
|
865
868
|
JudgeOutputSchema
|
|
866
869
|
);
|
|
@@ -1012,18 +1015,18 @@ function formatToolCall2(tool, input) {
|
|
|
1012
1015
|
case "Read":
|
|
1013
1016
|
case "Edit":
|
|
1014
1017
|
case "Write":
|
|
1015
|
-
|
|
1018
|
+
case "Glob":
|
|
1019
|
+
case "Grep":
|
|
1020
|
+
return `[${tool}] ${getToolArg(tool, input)}`;
|
|
1016
1021
|
case "Bash":
|
|
1017
1022
|
return `[Bash] ${input["description"] ?? ""}
|
|
1018
1023
|
$ ${String(input["command"] ?? "").slice(0, 120)}`;
|
|
1019
|
-
case "Glob":
|
|
1020
|
-
return `[Glob] ${input["pattern"] ?? JSON.stringify(input)}`;
|
|
1021
|
-
case "Grep":
|
|
1022
|
-
return `[Grep] ${input["pattern"] ?? JSON.stringify(input)}`;
|
|
1023
1024
|
case "TodoWrite": {
|
|
1024
1025
|
const todos = input["todos"];
|
|
1025
1026
|
if (Array.isArray(todos)) {
|
|
1026
|
-
const inProgress = todos.filter(
|
|
1027
|
+
const inProgress = todos.filter(
|
|
1028
|
+
(t) => typeof t === "object" && t !== null && t["status"] === "in_progress"
|
|
1029
|
+
).map((t) => String(t["content"] ?? ""));
|
|
1027
1030
|
if (inProgress.length > 0) return `[Task] ${inProgress.join(", ")}`;
|
|
1028
1031
|
}
|
|
1029
1032
|
return "";
|
|
@@ -1813,111 +1816,58 @@ function collapseSequentialSteps(steps) {
|
|
|
1813
1816
|
{ out: [], skip: 0 }
|
|
1814
1817
|
).out;
|
|
1815
1818
|
}
|
|
1816
|
-
|
|
1817
|
-
const {
|
|
1818
|
-
const
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
}
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
allowedTools: ["Read", "Glob", "Grep"],
|
|
1844
|
-
permissionMode: "bypassPermissions",
|
|
1845
|
-
model: "opus",
|
|
1846
|
-
appendSystemPrompt: METHODOLOGY
|
|
1847
|
-
};
|
|
1848
|
-
for await (const event of runClaude(researchTask)) {
|
|
1849
|
-
if (event.type === "output:tool") {
|
|
1850
|
-
yield { type: "plan:tool", tool: event.tool, input: event.input };
|
|
1851
|
-
} else if (event.type === "output:text") {
|
|
1852
|
-
researchLines.push(event.text);
|
|
1853
|
-
yield { type: "plan:text", text: event.text };
|
|
1854
|
-
}
|
|
1855
|
-
}
|
|
1856
|
-
} catch (err) {
|
|
1857
|
-
yield {
|
|
1858
|
-
type: "plan:error",
|
|
1859
|
-
message: `Research pass failed: ${getErrorMessage(err)}`
|
|
1860
|
-
};
|
|
1861
|
-
return;
|
|
1862
|
-
}
|
|
1863
|
-
researchDoc = researchLines.join("\n");
|
|
1864
|
-
if (!researchDoc.trim()) {
|
|
1865
|
-
yield {
|
|
1866
|
-
type: "plan:error",
|
|
1867
|
-
message: "Research pass produced no output \u2014 cannot decompose"
|
|
1868
|
-
};
|
|
1869
|
-
return;
|
|
1870
|
-
}
|
|
1871
|
-
}
|
|
1872
|
-
const stages = skipResearch ? { decompose: 1, validate: 2, total: 2 } : { decompose: 2, validate: 3, total: TOTAL_PLAN_STAGES };
|
|
1873
|
-
yield {
|
|
1874
|
-
type: "plan:stage",
|
|
1875
|
-
stage: stages.decompose,
|
|
1876
|
-
total: stages.total,
|
|
1877
|
-
name: "Decompose to Steps"
|
|
1878
|
-
};
|
|
1819
|
+
function writeWorkflowFile(taskFile, workflow2) {
|
|
1820
|
+
const { goal, vars, steps, ...rest } = normalizeWorkflow(workflow2);
|
|
1821
|
+
const ordered = { goal, ...vars && { vars }, steps, ...rest };
|
|
1822
|
+
const yamlContent = dumpYaml(ordered, {
|
|
1823
|
+
lineWidth: -1,
|
|
1824
|
+
noRefs: true,
|
|
1825
|
+
quotingType: '"',
|
|
1826
|
+
forceQuotes: false
|
|
1827
|
+
}).trimEnd();
|
|
1828
|
+
writeFileSync2(taskFile, yamlContent + "\n", "utf8");
|
|
1829
|
+
const lines = yamlContent.split("\n");
|
|
1830
|
+
return lines.slice(0, 30).join("\n") + (lines.length > 30 ? "\n..." : "");
|
|
1831
|
+
}
|
|
1832
|
+
async function* runRetryLoop(config) {
|
|
1833
|
+
const {
|
|
1834
|
+
maxRetries,
|
|
1835
|
+
retryStageName,
|
|
1836
|
+
retryStage,
|
|
1837
|
+
retryTotal,
|
|
1838
|
+
validateStage,
|
|
1839
|
+
validateTotal,
|
|
1840
|
+
schemaErrorLabel,
|
|
1841
|
+
judgeRejectLabel,
|
|
1842
|
+
description,
|
|
1843
|
+
taskFile,
|
|
1844
|
+
buildTask
|
|
1845
|
+
} = config;
|
|
1879
1846
|
let retryPrefix = "";
|
|
1880
|
-
for (let attempt = 0; attempt <
|
|
1847
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1881
1848
|
if (attempt > 0) {
|
|
1882
1849
|
yield {
|
|
1883
1850
|
type: "plan:retry",
|
|
1884
1851
|
attempt: attempt + 1,
|
|
1885
|
-
maxAttempts:
|
|
1852
|
+
maxAttempts: maxRetries,
|
|
1886
1853
|
reason: retryPrefix.replace(/\n/g, " ")
|
|
1887
1854
|
};
|
|
1888
1855
|
yield {
|
|
1889
1856
|
type: "plan:stage",
|
|
1890
|
-
stage:
|
|
1891
|
-
total:
|
|
1892
|
-
name:
|
|
1857
|
+
stage: retryStage,
|
|
1858
|
+
total: retryTotal,
|
|
1859
|
+
name: retryStageName
|
|
1893
1860
|
};
|
|
1894
1861
|
}
|
|
1895
|
-
const
|
|
1896
|
-
DESCRIPTION: description,
|
|
1897
|
-
RESEARCH_DOC: researchDoc
|
|
1898
|
-
});
|
|
1899
|
-
const decomposeTask = {
|
|
1900
|
-
type: "claude",
|
|
1901
|
-
name: "plan:decompose",
|
|
1902
|
-
prompt: retryPrefix ? `${retryPrefix}
|
|
1903
|
-
|
|
1904
|
-
${basePrompt}` : basePrompt,
|
|
1905
|
-
allowedTools: [],
|
|
1906
|
-
permissionMode: "bypassPermissions",
|
|
1907
|
-
model: skipResearch ? "sonnet" : "opus",
|
|
1908
|
-
appendSystemPrompt: `${METHODOLOGY}
|
|
1909
|
-
|
|
1910
|
-
${PLAN_SYSTEM_RULES}`,
|
|
1911
|
-
jsonSchema: WORKFLOW_JSON_SCHEMA
|
|
1912
|
-
};
|
|
1862
|
+
const task = buildTask(retryPrefix);
|
|
1913
1863
|
let structuredOutput;
|
|
1914
|
-
const
|
|
1864
|
+
const textLines = [];
|
|
1915
1865
|
try {
|
|
1916
|
-
for await (const event of runClaude(
|
|
1866
|
+
for await (const event of runClaude(task)) {
|
|
1917
1867
|
if (event.type === "output:tool") {
|
|
1918
1868
|
yield { type: "plan:tool", tool: event.tool, input: event.input };
|
|
1919
1869
|
} else if (event.type === "output:text") {
|
|
1920
|
-
|
|
1870
|
+
textLines.push(event.text);
|
|
1921
1871
|
yield { type: "plan:text", text: event.text };
|
|
1922
1872
|
} else if (event.type === "output:structured") {
|
|
1923
1873
|
structuredOutput = event.data;
|
|
@@ -1925,19 +1875,19 @@ ${PLAN_SYSTEM_RULES}`,
|
|
|
1925
1875
|
}
|
|
1926
1876
|
} catch (err) {
|
|
1927
1877
|
const msg = getErrorMessage(err);
|
|
1928
|
-
if (attempt ===
|
|
1878
|
+
if (attempt === maxRetries - 1) {
|
|
1929
1879
|
yield { type: "plan:error", message: msg };
|
|
1930
1880
|
return;
|
|
1931
1881
|
}
|
|
1932
1882
|
retryPrefix = fillTemplate(PLAN_RETRY_PARSE_ERROR, {
|
|
1933
1883
|
ERROR: msg,
|
|
1934
|
-
EXCERPT:
|
|
1884
|
+
EXCERPT: textLines.join("\n")
|
|
1935
1885
|
});
|
|
1936
1886
|
continue;
|
|
1937
1887
|
}
|
|
1938
1888
|
if (structuredOutput === void 0) {
|
|
1939
1889
|
const issues = "No structured output returned \u2014 ensure the response is a JSON object";
|
|
1940
|
-
if (attempt ===
|
|
1890
|
+
if (attempt === maxRetries - 1) {
|
|
1941
1891
|
yield { type: "plan:error", message: issues };
|
|
1942
1892
|
return;
|
|
1943
1893
|
}
|
|
@@ -1947,10 +1897,10 @@ ${PLAN_SYSTEM_RULES}`,
|
|
|
1947
1897
|
const zodResult = WorkflowSchema.safeParse(structuredOutput);
|
|
1948
1898
|
if (!zodResult.success) {
|
|
1949
1899
|
const issues = formatZodIssues(zodResult.error.issues);
|
|
1950
|
-
if (attempt ===
|
|
1900
|
+
if (attempt === maxRetries - 1) {
|
|
1951
1901
|
yield {
|
|
1952
1902
|
type: "plan:error",
|
|
1953
|
-
message:
|
|
1903
|
+
message: `${schemaErrorLabel} did not match expected schema:
|
|
1954
1904
|
${issues}`
|
|
1955
1905
|
};
|
|
1956
1906
|
return;
|
|
@@ -1960,8 +1910,8 @@ ${issues}`
|
|
|
1960
1910
|
}
|
|
1961
1911
|
yield {
|
|
1962
1912
|
type: "plan:stage",
|
|
1963
|
-
stage:
|
|
1964
|
-
total:
|
|
1913
|
+
stage: validateStage,
|
|
1914
|
+
total: validateTotal,
|
|
1965
1915
|
name: "Validate"
|
|
1966
1916
|
};
|
|
1967
1917
|
const judgeResult = await runPass3Judge(description, zodResult.data);
|
|
@@ -1971,7 +1921,7 @@ ${issues}`
|
|
|
1971
1921
|
message: "Judge skipped due to error \u2014 proceeding without validation"
|
|
1972
1922
|
};
|
|
1973
1923
|
}
|
|
1974
|
-
if (!judgeResult.pass && attempt <
|
|
1924
|
+
if (!judgeResult.pass && attempt < maxRetries - 1) {
|
|
1975
1925
|
retryPrefix = fillTemplate(PLAN_RETRY_JUDGE, {
|
|
1976
1926
|
FEEDBACK: judgeResult.feedback
|
|
1977
1927
|
});
|
|
@@ -1980,37 +1930,120 @@ ${issues}`
|
|
|
1980
1930
|
if (!judgeResult.pass) {
|
|
1981
1931
|
yield {
|
|
1982
1932
|
type: "plan:warn",
|
|
1983
|
-
message: `Judge rejected
|
|
1933
|
+
message: `Judge rejected ${judgeRejectLabel} but retries exhausted: ${judgeResult.feedback}`
|
|
1984
1934
|
};
|
|
1985
1935
|
}
|
|
1986
|
-
const
|
|
1987
|
-
const ordered = { goal, ...vars && { vars }, steps, ...rest };
|
|
1988
|
-
const yamlContent = dumpYaml(ordered, {
|
|
1989
|
-
lineWidth: -1,
|
|
1990
|
-
noRefs: true,
|
|
1991
|
-
quotingType: '"',
|
|
1992
|
-
forceQuotes: false
|
|
1993
|
-
}).trimEnd();
|
|
1994
|
-
writeFileSync2(taskFile, yamlContent + "\n", "utf8");
|
|
1995
|
-
const yamlLines = yamlContent.split("\n");
|
|
1996
|
-
const preview = yamlLines.slice(0, 30).join("\n") + (yamlLines.length > 30 ? "\n..." : "");
|
|
1936
|
+
const preview = writeWorkflowFile(taskFile, zodResult.data);
|
|
1997
1937
|
yield { type: "plan:complete", taskFile, preview };
|
|
1998
1938
|
return;
|
|
1999
1939
|
}
|
|
2000
1940
|
yield {
|
|
2001
1941
|
type: "plan:error",
|
|
2002
|
-
message:
|
|
1942
|
+
message: `${schemaErrorLabel} generation failed after maximum retries`
|
|
1943
|
+
};
|
|
1944
|
+
}
|
|
1945
|
+
async function* streamPlan(args) {
|
|
1946
|
+
const { description, taskFile } = args;
|
|
1947
|
+
const skipResearch = args.fast || isSimpleRequest(description);
|
|
1948
|
+
yield { type: "plan:start", description };
|
|
1949
|
+
let researchDoc;
|
|
1950
|
+
if (skipResearch) {
|
|
1951
|
+
yield { type: "plan:stages", names: ["Decompose to Steps", "Validate"] };
|
|
1952
|
+
researchDoc = "No codebase research performed \u2014 the task is self-contained. Work directly from the user's original goal.";
|
|
1953
|
+
} else {
|
|
1954
|
+
yield {
|
|
1955
|
+
type: "plan:stages",
|
|
1956
|
+
names: ["Research & Planning", "Decompose to Steps", "Validate"]
|
|
1957
|
+
};
|
|
1958
|
+
yield {
|
|
1959
|
+
type: "plan:stage",
|
|
1960
|
+
stage: 1,
|
|
1961
|
+
total: TOTAL_PLAN_STAGES,
|
|
1962
|
+
name: "Research & Planning"
|
|
1963
|
+
};
|
|
1964
|
+
const researchLines = [];
|
|
1965
|
+
try {
|
|
1966
|
+
const researchTask = {
|
|
1967
|
+
type: "claude",
|
|
1968
|
+
name: "plan:research",
|
|
1969
|
+
prompt: fillTemplate(PLAN_RESEARCH_PROMPT, {
|
|
1970
|
+
DESCRIPTION: description
|
|
1971
|
+
}),
|
|
1972
|
+
allowedTools: ["Read", "Glob", "Grep"],
|
|
1973
|
+
permissionMode: "bypassPermissions",
|
|
1974
|
+
model: "opus",
|
|
1975
|
+
appendSystemPrompt: METHODOLOGY
|
|
1976
|
+
};
|
|
1977
|
+
for await (const event of runClaude(researchTask)) {
|
|
1978
|
+
if (event.type === "output:tool") {
|
|
1979
|
+
yield { type: "plan:tool", tool: event.tool, input: event.input };
|
|
1980
|
+
} else if (event.type === "output:text") {
|
|
1981
|
+
researchLines.push(event.text);
|
|
1982
|
+
yield { type: "plan:text", text: event.text };
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
} catch (err) {
|
|
1986
|
+
yield {
|
|
1987
|
+
type: "plan:error",
|
|
1988
|
+
message: `Research pass failed: ${getErrorMessage(err)}`
|
|
1989
|
+
};
|
|
1990
|
+
return;
|
|
1991
|
+
}
|
|
1992
|
+
researchDoc = researchLines.join("\n");
|
|
1993
|
+
if (!researchDoc.trim()) {
|
|
1994
|
+
yield {
|
|
1995
|
+
type: "plan:error",
|
|
1996
|
+
message: "Research pass produced no output \u2014 cannot decompose"
|
|
1997
|
+
};
|
|
1998
|
+
return;
|
|
1999
|
+
}
|
|
2000
|
+
}
|
|
2001
|
+
const stages = skipResearch ? { decompose: 1, validate: 2, total: 2 } : { decompose: 2, validate: 3, total: TOTAL_PLAN_STAGES };
|
|
2002
|
+
yield {
|
|
2003
|
+
type: "plan:stage",
|
|
2004
|
+
stage: stages.decompose,
|
|
2005
|
+
total: stages.total,
|
|
2006
|
+
name: "Decompose to Steps"
|
|
2003
2007
|
};
|
|
2008
|
+
yield* runRetryLoop({
|
|
2009
|
+
maxRetries: MAX_PLAN_RETRIES,
|
|
2010
|
+
retryStageName: "Decompose to Steps",
|
|
2011
|
+
retryStage: stages.decompose,
|
|
2012
|
+
retryTotal: stages.total,
|
|
2013
|
+
validateStage: stages.validate,
|
|
2014
|
+
validateTotal: stages.total,
|
|
2015
|
+
schemaErrorLabel: "Plan",
|
|
2016
|
+
judgeRejectLabel: "plan",
|
|
2017
|
+
description,
|
|
2018
|
+
taskFile,
|
|
2019
|
+
buildTask: (retryPrefix) => {
|
|
2020
|
+
const basePrompt = fillTemplate(PLAN_DECOMPOSE_PROMPT, {
|
|
2021
|
+
DESCRIPTION: description,
|
|
2022
|
+
RESEARCH_DOC: researchDoc
|
|
2023
|
+
});
|
|
2024
|
+
return {
|
|
2025
|
+
type: "claude",
|
|
2026
|
+
name: "plan:decompose",
|
|
2027
|
+
prompt: retryPrefix ? `${retryPrefix}
|
|
2028
|
+
|
|
2029
|
+
${basePrompt}` : basePrompt,
|
|
2030
|
+
allowedTools: [],
|
|
2031
|
+
permissionMode: "bypassPermissions",
|
|
2032
|
+
model: skipResearch ? "sonnet" : "opus",
|
|
2033
|
+
appendSystemPrompt: `${METHODOLOGY}
|
|
2034
|
+
|
|
2035
|
+
${PLAN_SYSTEM_RULES}`,
|
|
2036
|
+
jsonSchema: WORKFLOW_JSON_SCHEMA
|
|
2037
|
+
};
|
|
2038
|
+
}
|
|
2039
|
+
});
|
|
2004
2040
|
}
|
|
2005
2041
|
|
|
2006
2042
|
// src/refine.ts
|
|
2007
|
-
import { existsSync as existsSync2, readFileSync as readFileSync5
|
|
2008
|
-
import { load as loadYaml
|
|
2043
|
+
import { existsSync as existsSync2, readFileSync as readFileSync5 } from "node:fs";
|
|
2044
|
+
import { load as loadYaml } from "js-yaml";
|
|
2009
2045
|
var PLAN_REFINE_PROMPT = loadPrompt("plan-refine");
|
|
2010
2046
|
var PLAN_SYSTEM_RULES2 = loadPrompt("plan-system-rules");
|
|
2011
|
-
var PLAN_RETRY_PARSE_ERROR2 = loadPrompt("plan-retry-parse-error");
|
|
2012
|
-
var PLAN_RETRY_SCHEMA_ERROR2 = loadPrompt("plan-retry-schema-error");
|
|
2013
|
-
var PLAN_RETRY_JUDGE2 = loadPrompt("plan-retry-judge");
|
|
2014
2047
|
var MAX_REFINE_RETRIES = 3;
|
|
2015
2048
|
function parseRefineArgs(rawArgs2) {
|
|
2016
2049
|
if (rawArgs2[0] === "-h" || rawArgs2[0] === "--help") {
|
|
@@ -2091,122 +2124,39 @@ async function* streamRefine(args) {
|
|
|
2091
2124
|
yield { type: "plan:start", description };
|
|
2092
2125
|
yield { type: "plan:stages", names: ["Refine", "Validate"] };
|
|
2093
2126
|
yield { type: "plan:stage", stage: 1, total: 2, name: "Refine" };
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2127
|
+
yield* runRetryLoop({
|
|
2128
|
+
maxRetries: MAX_REFINE_RETRIES,
|
|
2129
|
+
retryStageName: "Refine",
|
|
2130
|
+
retryStage: 1,
|
|
2131
|
+
retryTotal: 2,
|
|
2132
|
+
validateStage: 2,
|
|
2133
|
+
validateTotal: 2,
|
|
2134
|
+
schemaErrorLabel: "Refined plan",
|
|
2135
|
+
judgeRejectLabel: "refinement",
|
|
2136
|
+
description,
|
|
2137
|
+
taskFile,
|
|
2138
|
+
buildTask: (retryPrefix) => {
|
|
2139
|
+
const basePrompt = fillTemplate(PLAN_REFINE_PROMPT, {
|
|
2140
|
+
DESCRIPTION: description,
|
|
2141
|
+
EXISTING_YAML: existingYaml,
|
|
2142
|
+
INSTRUCTIONS: instructions
|
|
2143
|
+
});
|
|
2144
|
+
return {
|
|
2145
|
+
type: "claude",
|
|
2146
|
+
name: "plan:refine",
|
|
2147
|
+
prompt: retryPrefix ? `${retryPrefix}
|
|
2114
2148
|
|
|
2115
2149
|
${basePrompt}` : basePrompt,
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2150
|
+
allowedTools: [],
|
|
2151
|
+
permissionMode: "bypassPermissions",
|
|
2152
|
+
model: "sonnet",
|
|
2153
|
+
appendSystemPrompt: `${METHODOLOGY}
|
|
2120
2154
|
|
|
2121
2155
|
${PLAN_SYSTEM_RULES2}`,
|
|
2122
|
-
|
|
2123
|
-
};
|
|
2124
|
-
let structuredOutput;
|
|
2125
|
-
const textLines = [];
|
|
2126
|
-
try {
|
|
2127
|
-
for await (const event of runClaude(refineTask)) {
|
|
2128
|
-
if (event.type === "output:tool") {
|
|
2129
|
-
yield { type: "plan:tool", tool: event.tool, input: event.input };
|
|
2130
|
-
} else if (event.type === "output:text") {
|
|
2131
|
-
textLines.push(event.text);
|
|
2132
|
-
yield { type: "plan:text", text: event.text };
|
|
2133
|
-
} else if (event.type === "output:structured") {
|
|
2134
|
-
structuredOutput = event.data;
|
|
2135
|
-
}
|
|
2136
|
-
}
|
|
2137
|
-
} catch (err) {
|
|
2138
|
-
const msg = getErrorMessage(err);
|
|
2139
|
-
if (attempt === MAX_REFINE_RETRIES - 1) {
|
|
2140
|
-
yield { type: "plan:error", message: msg };
|
|
2141
|
-
return;
|
|
2142
|
-
}
|
|
2143
|
-
retryPrefix = fillTemplate(PLAN_RETRY_PARSE_ERROR2, {
|
|
2144
|
-
ERROR: msg,
|
|
2145
|
-
EXCERPT: textLines.join("\n")
|
|
2146
|
-
});
|
|
2147
|
-
continue;
|
|
2148
|
-
}
|
|
2149
|
-
if (structuredOutput === void 0) {
|
|
2150
|
-
const issues = "No structured output returned \u2014 ensure the response is a JSON object";
|
|
2151
|
-
if (attempt === MAX_REFINE_RETRIES - 1) {
|
|
2152
|
-
yield { type: "plan:error", message: issues };
|
|
2153
|
-
return;
|
|
2154
|
-
}
|
|
2155
|
-
retryPrefix = fillTemplate(PLAN_RETRY_SCHEMA_ERROR2, { ISSUES: issues });
|
|
2156
|
-
continue;
|
|
2157
|
-
}
|
|
2158
|
-
const zodResult = WorkflowSchema.safeParse(structuredOutput);
|
|
2159
|
-
if (!zodResult.success) {
|
|
2160
|
-
const issues = formatZodIssues(zodResult.error.issues);
|
|
2161
|
-
if (attempt === MAX_REFINE_RETRIES - 1) {
|
|
2162
|
-
yield {
|
|
2163
|
-
type: "plan:error",
|
|
2164
|
-
message: `Refined plan did not match expected schema:
|
|
2165
|
-
${issues}`
|
|
2166
|
-
};
|
|
2167
|
-
return;
|
|
2168
|
-
}
|
|
2169
|
-
retryPrefix = fillTemplate(PLAN_RETRY_SCHEMA_ERROR2, { ISSUES: issues });
|
|
2170
|
-
continue;
|
|
2171
|
-
}
|
|
2172
|
-
yield { type: "plan:stage", stage: 2, total: 2, name: "Validate" };
|
|
2173
|
-
const judgeResult = await runPass3Judge(description, zodResult.data);
|
|
2174
|
-
if (judgeResult.skipped) {
|
|
2175
|
-
yield {
|
|
2176
|
-
type: "plan:warn",
|
|
2177
|
-
message: "Judge skipped due to error \u2014 proceeding without validation"
|
|
2178
|
-
};
|
|
2179
|
-
}
|
|
2180
|
-
if (!judgeResult.pass && attempt < MAX_REFINE_RETRIES - 1) {
|
|
2181
|
-
retryPrefix = fillTemplate(PLAN_RETRY_JUDGE2, {
|
|
2182
|
-
FEEDBACK: judgeResult.feedback
|
|
2183
|
-
});
|
|
2184
|
-
continue;
|
|
2185
|
-
}
|
|
2186
|
-
if (!judgeResult.pass) {
|
|
2187
|
-
yield {
|
|
2188
|
-
type: "plan:warn",
|
|
2189
|
-
message: `Judge rejected refinement but retries exhausted: ${judgeResult.feedback}`
|
|
2156
|
+
jsonSchema: WORKFLOW_JSON_SCHEMA
|
|
2190
2157
|
};
|
|
2191
2158
|
}
|
|
2192
|
-
|
|
2193
|
-
const ordered = { goal, ...vars && { vars }, steps, ...rest };
|
|
2194
|
-
const yamlContent = dumpYaml2(ordered, {
|
|
2195
|
-
lineWidth: -1,
|
|
2196
|
-
noRefs: true,
|
|
2197
|
-
quotingType: '"',
|
|
2198
|
-
forceQuotes: false
|
|
2199
|
-
}).trimEnd();
|
|
2200
|
-
writeFileSync3(taskFile, yamlContent + "\n", "utf8");
|
|
2201
|
-
const yamlLines = yamlContent.split("\n");
|
|
2202
|
-
const preview = yamlLines.slice(0, 30).join("\n") + (yamlLines.length > 30 ? "\n..." : "");
|
|
2203
|
-
yield { type: "plan:complete", taskFile, preview };
|
|
2204
|
-
return;
|
|
2205
|
-
}
|
|
2206
|
-
yield {
|
|
2207
|
-
type: "plan:error",
|
|
2208
|
-
message: "Refine failed after maximum retries"
|
|
2209
|
-
};
|
|
2159
|
+
});
|
|
2210
2160
|
}
|
|
2211
2161
|
|
|
2212
2162
|
// src/ui/PlanApp.tsx
|
|
@@ -2383,7 +2333,7 @@ import {
|
|
|
2383
2333
|
existsSync as existsSync3,
|
|
2384
2334
|
mkdirSync as mkdirSync3,
|
|
2385
2335
|
readdirSync,
|
|
2386
|
-
writeFileSync as
|
|
2336
|
+
writeFileSync as writeFileSync3
|
|
2387
2337
|
} from "node:fs";
|
|
2388
2338
|
import { dirname as dirname3, join as join3, resolve as resolve2 } from "node:path";
|
|
2389
2339
|
function findExecutantLocalDir(startDir) {
|
|
@@ -2422,7 +2372,7 @@ function onWorkflowStart(ctx, s) {
|
|
|
2422
2372
|
mkdirSync3(ctx.logDir, { recursive: true });
|
|
2423
2373
|
mkdirSync3(ctx.highlightsDir, { recursive: true });
|
|
2424
2374
|
const logFile = join3(ctx.logDir, `${ctx.ts}_${ctx.slug}.log`);
|
|
2425
|
-
|
|
2375
|
+
writeFileSync3(
|
|
2426
2376
|
logFile,
|
|
2427
2377
|
`# Execution Log
|
|
2428
2378
|
Task: ${ctx.slug}
|
|
@@ -2501,7 +2451,7 @@ function complexSequenceHeader(ctx, s) {
|
|
|
2501
2451
|
}
|
|
2502
2452
|
function createComplexSequenceFile(ctx, s) {
|
|
2503
2453
|
const path = highlightPath(ctx, s.stepIndex, "complex_sequence");
|
|
2504
|
-
|
|
2454
|
+
writeFileSync3(path, complexSequenceHeader(ctx, s));
|
|
2505
2455
|
return path;
|
|
2506
2456
|
}
|
|
2507
2457
|
function onTool(ctx, s, tool, input) {
|
|
@@ -2519,7 +2469,7 @@ function onTool(ctx, s, tool, input) {
|
|
|
2519
2469
|
return { ...s, toolCount, complexSequenceFile };
|
|
2520
2470
|
}
|
|
2521
2471
|
function saveJudgeHighlight(ctx, s, verdict, text) {
|
|
2522
|
-
|
|
2472
|
+
writeFileSync3(
|
|
2523
2473
|
highlightPath(ctx, s.stepIndex, `judge_${verdict}`),
|
|
2524
2474
|
buildHighlightHeader(ctx, s, `Judge Verdict: ${verdict}`, [
|
|
2525
2475
|
`**Attempt:** ${s.judgeAttempt}`
|
|
@@ -2540,7 +2490,7 @@ var LOG_MATCHERS = [
|
|
|
2540
2490
|
pattern: /\[self-healing\].*failed.*exit\s+(\d+)/i,
|
|
2541
2491
|
apply: (ctx, s, _text, match) => {
|
|
2542
2492
|
const selfHealingFile = highlightPath(ctx, s.stepIndex, "self_healing");
|
|
2543
|
-
|
|
2493
|
+
writeFileSync3(
|
|
2544
2494
|
selfHealingFile,
|
|
2545
2495
|
buildHighlightHeader(ctx, s, "Self-Healing Activation") + [
|
|
2546
2496
|
"## \u274C Failure Detected",
|
|
@@ -2611,7 +2561,7 @@ ${"\u2501".repeat(51)}
|
|
|
2611
2561
|
);
|
|
2612
2562
|
const indexFile = join3(ctx.highlightsDir, "README.md");
|
|
2613
2563
|
if (!existsSync3(indexFile)) {
|
|
2614
|
-
|
|
2564
|
+
writeFileSync3(
|
|
2615
2565
|
indexFile,
|
|
2616
2566
|
[
|
|
2617
2567
|
"# Execution Highlights",
|
|
@@ -2710,7 +2660,7 @@ import {
|
|
|
2710
2660
|
mkdirSync as mkdirSync4,
|
|
2711
2661
|
readdirSync as readdirSync2,
|
|
2712
2662
|
readFileSync as readFileSync6,
|
|
2713
|
-
writeFileSync as
|
|
2663
|
+
writeFileSync as writeFileSync4
|
|
2714
2664
|
} from "node:fs";
|
|
2715
2665
|
import { basename as basename2, dirname as dirname4, join as join4, resolve as resolve3 } from "node:path";
|
|
2716
2666
|
import { spawnSync } from "node:child_process";
|
|
@@ -2855,8 +2805,8 @@ Response: ${response.trim()}`
|
|
|
2855
2805
|
const slug = slugify(taskName, 40);
|
|
2856
2806
|
const improvedFile = join4(backlogDir, `${ts}-${slug}-improved.yaml`);
|
|
2857
2807
|
const changelogFile = join4(backlogDir, `${ts}-${slug}-changelog.md`);
|
|
2858
|
-
|
|
2859
|
-
|
|
2808
|
+
writeFileSync4(improvedFile, improvedYaml + "\n", "utf8");
|
|
2809
|
+
writeFileSync4(changelogFile, changelog + "\n", "utf8");
|
|
2860
2810
|
console.log(`\u2705 Improved task saved: ${improvedFile}`);
|
|
2861
2811
|
console.log(`\u2705 Changelog saved: ${changelogFile}`);
|
|
2862
2812
|
console.log(`
|
|
@@ -2876,22 +2826,10 @@ function extractJson(text) {
|
|
|
2876
2826
|
|
|
2877
2827
|
// src/types.ts
|
|
2878
2828
|
var InterjectChannel = class {
|
|
2879
|
-
sender = null;
|
|
2880
2829
|
_queue = [];
|
|
2881
|
-
/** Called by
|
|
2882
|
-
register(sender) {
|
|
2883
|
-
this.sender = sender;
|
|
2884
|
-
for (const msg of this._queue) sender(msg);
|
|
2885
|
-
this._queue = [];
|
|
2886
|
-
}
|
|
2887
|
-
/** Called by runClaude when a Claude step ends. */
|
|
2888
|
-
unregister() {
|
|
2889
|
-
this.sender = null;
|
|
2890
|
-
}
|
|
2891
|
-
/** Called by the TUI. Delivers immediately if a Claude step is running, else queues. */
|
|
2830
|
+
/** Called by the TUI when the user submits an interjection message. */
|
|
2892
2831
|
interject(message) {
|
|
2893
|
-
|
|
2894
|
-
else this._queue.push(message);
|
|
2832
|
+
this._queue.push(message);
|
|
2895
2833
|
}
|
|
2896
2834
|
/** Drains and returns any queued messages (for non-Claude steps to consume). */
|
|
2897
2835
|
consumeQueue() {
|