executant 1.9.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +173 -62
- package/dist/prompts/dev-approach.txt +16 -0
- package/dist/prompts/development-methodology.txt +71 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -156,6 +156,15 @@ function loadWorkflow(filePath2) {
|
|
|
156
156
|
${detail}`);
|
|
157
157
|
}
|
|
158
158
|
const vars = doc.vars ?? {};
|
|
159
|
+
const seen = /* @__PURE__ */ new Set();
|
|
160
|
+
for (const step of doc.steps) {
|
|
161
|
+
if (seen.has(step.name)) {
|
|
162
|
+
throw new Error(
|
|
163
|
+
`Duplicate step name "${step.name}" \u2014 step names must be unique within a workflow`
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
seen.add(step.name);
|
|
167
|
+
}
|
|
159
168
|
return {
|
|
160
169
|
goal: doc.goal,
|
|
161
170
|
vars,
|
|
@@ -370,25 +379,12 @@ async function* runCommand(task) {
|
|
|
370
379
|
// src/tasks/claude.ts
|
|
371
380
|
import { execSync, spawn as spawn2 } from "node:child_process";
|
|
372
381
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
382
|
+
var METHODOLOGY = loadPrompt("development-methodology");
|
|
373
383
|
var DEFAULT_TOOLS = ["Read", "Edit", "Write", "Bash", "Glob", "Grep"];
|
|
374
|
-
function
|
|
375
|
-
try {
|
|
376
|
-
return execSync("which claude", { env: process.env }).toString().trim();
|
|
377
|
-
} catch {
|
|
378
|
-
throw new Error(
|
|
379
|
-
"claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
|
|
380
|
-
);
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
async function* runClaude(task) {
|
|
384
|
+
function buildClaudeArgs(task) {
|
|
384
385
|
const allowedTools = task.allowedTools ?? DEFAULT_TOOLS;
|
|
385
|
-
yield {
|
|
386
|
-
type: "log",
|
|
387
|
-
level: "info",
|
|
388
|
-
text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
|
|
389
|
-
};
|
|
390
386
|
const permissionMode = task.permissionMode ?? "bypassPermissions";
|
|
391
|
-
|
|
387
|
+
return [
|
|
392
388
|
"--print",
|
|
393
389
|
task.prompt,
|
|
394
390
|
"--output-format",
|
|
@@ -402,6 +398,23 @@ async function* runClaude(task) {
|
|
|
402
398
|
...task.appendSystemPrompt ? ["--append-system-prompt", task.appendSystemPrompt] : [],
|
|
403
399
|
...task.jsonSchema ? ["--json-schema", JSON.stringify(task.jsonSchema)] : []
|
|
404
400
|
];
|
|
401
|
+
}
|
|
402
|
+
function resolveClaudePath() {
|
|
403
|
+
try {
|
|
404
|
+
return execSync("which claude", { env: process.env }).toString().trim();
|
|
405
|
+
} catch {
|
|
406
|
+
throw new Error(
|
|
407
|
+
"claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
|
|
408
|
+
);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
async function* runClaude(task) {
|
|
412
|
+
yield {
|
|
413
|
+
type: "log",
|
|
414
|
+
level: "info",
|
|
415
|
+
text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
|
|
416
|
+
};
|
|
417
|
+
const args = buildClaudeArgs(task);
|
|
405
418
|
const claudeBin = resolveClaudePath();
|
|
406
419
|
let proc;
|
|
407
420
|
try {
|
|
@@ -410,7 +423,9 @@ async function* runClaude(task) {
|
|
|
410
423
|
env: { ...process.env }
|
|
411
424
|
});
|
|
412
425
|
} catch (err) {
|
|
413
|
-
throw new Error(
|
|
426
|
+
throw new Error(
|
|
427
|
+
`Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`
|
|
428
|
+
);
|
|
414
429
|
}
|
|
415
430
|
const cleanup = () => {
|
|
416
431
|
try {
|
|
@@ -480,7 +495,10 @@ function isObject(v) {
|
|
|
480
495
|
return typeof v === "object" && v !== null && !Array.isArray(v);
|
|
481
496
|
}
|
|
482
497
|
function getArray(obj, ...keys) {
|
|
483
|
-
const result = keys.reduce(
|
|
498
|
+
const result = keys.reduce(
|
|
499
|
+
(cur, k) => isObject(cur) ? cur[k] : null,
|
|
500
|
+
obj
|
|
501
|
+
);
|
|
484
502
|
return Array.isArray(result) ? result : [];
|
|
485
503
|
}
|
|
486
504
|
function getString(obj, key) {
|
|
@@ -496,7 +514,9 @@ async function runClaudeStructured(task, schema) {
|
|
|
496
514
|
else if (event.type === "output:text") lines.push(event.text);
|
|
497
515
|
}
|
|
498
516
|
if (structuredOutput === void 0 && process.env["NODE_ENV"] !== "test") {
|
|
499
|
-
console.warn(
|
|
517
|
+
console.warn(
|
|
518
|
+
"[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing"
|
|
519
|
+
);
|
|
500
520
|
}
|
|
501
521
|
const data = structuredOutput ?? JSON.parse(extractJsonObject(lines.join("").trim()));
|
|
502
522
|
return schema.parse(data);
|
|
@@ -611,7 +631,11 @@ async function* runForEach(task) {
|
|
|
611
631
|
};
|
|
612
632
|
}
|
|
613
633
|
try {
|
|
614
|
-
|
|
634
|
+
for await (const event of runStep(substituted)) {
|
|
635
|
+
if (event.type !== "step:iteration" && event.type !== "step:inner") {
|
|
636
|
+
yield event;
|
|
637
|
+
}
|
|
638
|
+
}
|
|
615
639
|
} catch (err) {
|
|
616
640
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
617
641
|
if (!substituted.continueOnError) {
|
|
@@ -863,7 +887,7 @@ init_update();
|
|
|
863
887
|
|
|
864
888
|
// src/ui/App.tsx
|
|
865
889
|
import { useEffect as useEffect2, useReducer, useState } from "react";
|
|
866
|
-
import { Box as Box5, Text as Text5, useApp, useStdin } from "ink";
|
|
890
|
+
import { Box as Box5, Text as Text5, useApp, useStdin, useStdout } from "ink";
|
|
867
891
|
|
|
868
892
|
// src/ui/KeyboardHandler.tsx
|
|
869
893
|
import { useInput } from "ink";
|
|
@@ -998,7 +1022,7 @@ function reducer(state, event) {
|
|
|
998
1022
|
case "output:text": {
|
|
999
1023
|
const idx = event.index;
|
|
1000
1024
|
if (idx >= state.tasks.length) return state;
|
|
1001
|
-
return
|
|
1025
|
+
return appendLines(state, idx, event.text);
|
|
1002
1026
|
}
|
|
1003
1027
|
case "output:tool": {
|
|
1004
1028
|
const idx = event.index;
|
|
@@ -1022,7 +1046,7 @@ function reducer(state, event) {
|
|
|
1022
1046
|
case "log": {
|
|
1023
1047
|
const idx = state.currentIndex;
|
|
1024
1048
|
if (idx >= state.tasks.length) return state;
|
|
1025
|
-
return
|
|
1049
|
+
return appendLines(state, idx, `[${event.level}] ${event.text}`);
|
|
1026
1050
|
}
|
|
1027
1051
|
default: {
|
|
1028
1052
|
const _ = event;
|
|
@@ -1031,6 +1055,11 @@ function reducer(state, event) {
|
|
|
1031
1055
|
}
|
|
1032
1056
|
}
|
|
1033
1057
|
}
|
|
1058
|
+
var ANSI_RE2 = /\x1B(?:\[[0-9;?]*[A-Za-z]|\][^\x07]*\x07)|[\r]/g;
|
|
1059
|
+
var MAX_LOG_LINES = 300;
|
|
1060
|
+
function normalizeLines(text) {
|
|
1061
|
+
return text.replace(ANSI_RE2, "").split("\n");
|
|
1062
|
+
}
|
|
1034
1063
|
function updateTask(state, index, patch) {
|
|
1035
1064
|
const tasks = state.tasks.map(
|
|
1036
1065
|
(t, i) => i === index ? { ...t, ...patch } : t
|
|
@@ -1038,9 +1067,14 @@ function updateTask(state, index, patch) {
|
|
|
1038
1067
|
return { ...state, tasks };
|
|
1039
1068
|
}
|
|
1040
1069
|
function appendLine(state, index, line) {
|
|
1070
|
+
return appendLines(state, index, line);
|
|
1071
|
+
}
|
|
1072
|
+
function appendLines(state, index, text) {
|
|
1073
|
+
const newLines = normalizeLines(text);
|
|
1041
1074
|
const tasks = state.tasks.map((t, i) => {
|
|
1042
1075
|
if (i !== index) return t;
|
|
1043
|
-
const
|
|
1076
|
+
const combined = [...t.lines, ...newLines];
|
|
1077
|
+
const lines = combined.length > MAX_LOG_LINES ? combined.slice(-MAX_LOG_LINES) : combined;
|
|
1044
1078
|
return { ...t, lines };
|
|
1045
1079
|
});
|
|
1046
1080
|
return { ...state, tasks };
|
|
@@ -1198,14 +1232,24 @@ function LogPane({ lines, isActive = false, maxLines = 15 }) {
|
|
|
1198
1232
|
if (visible.length === 0) {
|
|
1199
1233
|
return /* @__PURE__ */ jsx3(Box3, { marginTop: 1, children: /* @__PURE__ */ jsx3(Text3, { dimColor: true, children: isActive ? "\u2838 waiting for output\u2026" : "\u2014 no output yet \u2014" }) });
|
|
1200
1234
|
}
|
|
1201
|
-
return /* @__PURE__ */ jsx3(
|
|
1202
|
-
|
|
1235
|
+
return /* @__PURE__ */ jsx3(
|
|
1236
|
+
Box3,
|
|
1203
1237
|
{
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1238
|
+
flexDirection: "column",
|
|
1239
|
+
marginTop: 1,
|
|
1240
|
+
borderStyle: "single",
|
|
1241
|
+
borderColor: theme.border,
|
|
1242
|
+
paddingX: 1,
|
|
1243
|
+
children: visible.map((line, i) => /* @__PURE__ */ jsx3(
|
|
1244
|
+
LogLine,
|
|
1245
|
+
{
|
|
1246
|
+
text: line,
|
|
1247
|
+
cursor: isActive && i === visible.length - 1
|
|
1248
|
+
},
|
|
1249
|
+
i
|
|
1250
|
+
))
|
|
1251
|
+
}
|
|
1252
|
+
);
|
|
1209
1253
|
}
|
|
1210
1254
|
function LogLine({ text, cursor }) {
|
|
1211
1255
|
const suffix = cursor ? /* @__PURE__ */ jsx3(Text3, { color: theme.primary, children: " \u258C" }) : null;
|
|
@@ -1218,18 +1262,41 @@ function LogLine({ text, cursor }) {
|
|
|
1218
1262
|
suffix
|
|
1219
1263
|
] });
|
|
1220
1264
|
}
|
|
1221
|
-
if (/^\s*\$\s/.test(text))
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1265
|
+
if (/^\s*\$\s/.test(text))
|
|
1266
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
|
|
1267
|
+
text,
|
|
1268
|
+
suffix
|
|
1269
|
+
] });
|
|
1270
|
+
if (text.startsWith("[warn]"))
|
|
1271
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
|
|
1272
|
+
text,
|
|
1273
|
+
suffix
|
|
1274
|
+
] });
|
|
1275
|
+
if (text.startsWith("[error]"))
|
|
1276
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.error, children: [
|
|
1277
|
+
text,
|
|
1278
|
+
suffix
|
|
1279
|
+
] });
|
|
1280
|
+
if (/^[\s]*(✓|✔|✅|done|success|compiled|built|passed)/i.test(text) && !/\b(error|fail|failed|warn|warning)\b/i.test(text))
|
|
1281
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.success, children: [
|
|
1282
|
+
text,
|
|
1283
|
+
suffix
|
|
1284
|
+
] });
|
|
1285
|
+
if (/\b(error|failed|fail)\b/i.test(text))
|
|
1286
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.error, children: [
|
|
1287
|
+
text,
|
|
1288
|
+
suffix
|
|
1289
|
+
] });
|
|
1290
|
+
if (/\b(warn|warning)\b/i.test(text))
|
|
1291
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
|
|
1292
|
+
text,
|
|
1293
|
+
suffix
|
|
1294
|
+
] });
|
|
1295
|
+
if (/^[·…⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]/.test(text))
|
|
1296
|
+
return /* @__PURE__ */ jsxs3(Text3, { color: theme.muted, children: [
|
|
1297
|
+
text,
|
|
1298
|
+
suffix
|
|
1299
|
+
] });
|
|
1233
1300
|
return /* @__PURE__ */ jsxs3(Text3, { children: [
|
|
1234
1301
|
text,
|
|
1235
1302
|
suffix
|
|
@@ -1300,6 +1367,13 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
|
|
|
1300
1367
|
};
|
|
1301
1368
|
}, [events2, exit]);
|
|
1302
1369
|
const { isRawModeSupported } = useStdin();
|
|
1370
|
+
const { stdout } = useStdout();
|
|
1371
|
+
const terminalRows = stdout?.rows ?? 24;
|
|
1372
|
+
const FIXED_OVERHEAD = 12;
|
|
1373
|
+
const logPaneMaxLines = Math.max(
|
|
1374
|
+
5,
|
|
1375
|
+
terminalRows - FIXED_OVERHEAD - state.tasks.length
|
|
1376
|
+
);
|
|
1303
1377
|
const [tick, setTick] = useState(0);
|
|
1304
1378
|
useInterval(() => {
|
|
1305
1379
|
if (!state.endTime) setTick((t) => t + 1);
|
|
@@ -1347,12 +1421,13 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
|
|
|
1347
1421
|
maxVisible: MAX_VISIBLE_ITERATIONS
|
|
1348
1422
|
}
|
|
1349
1423
|
) : null
|
|
1350
|
-
] },
|
|
1424
|
+
] }, i)) }),
|
|
1351
1425
|
activeTask && /* @__PURE__ */ jsx5(
|
|
1352
1426
|
LogPane,
|
|
1353
1427
|
{
|
|
1354
1428
|
lines: activeTask.lines,
|
|
1355
|
-
isActive: activeTask.status === "running"
|
|
1429
|
+
isActive: activeTask.status === "running",
|
|
1430
|
+
maxLines: logPaneMaxLines
|
|
1356
1431
|
}
|
|
1357
1432
|
),
|
|
1358
1433
|
state.endTime !== void 0 && state.writtenFiles.length > 0 && /* @__PURE__ */ jsxs4(Box5, { flexDirection: "column", marginTop: 1, children: [
|
|
@@ -1508,7 +1583,8 @@ async function runPass3Judge(description, workflow2) {
|
|
|
1508
1583
|
}),
|
|
1509
1584
|
allowedTools: [],
|
|
1510
1585
|
permissionMode: "default",
|
|
1511
|
-
model: "sonnet"
|
|
1586
|
+
model: "sonnet",
|
|
1587
|
+
appendSystemPrompt: METHODOLOGY
|
|
1512
1588
|
};
|
|
1513
1589
|
return await runClaudeStructured(task, PlanJudgeOutputSchema);
|
|
1514
1590
|
} catch {
|
|
@@ -1616,7 +1692,8 @@ async function* streamPlan(args) {
|
|
|
1616
1692
|
}),
|
|
1617
1693
|
allowedTools: ["Read", "Glob", "Grep"],
|
|
1618
1694
|
permissionMode: "bypassPermissions",
|
|
1619
|
-
model: "opus"
|
|
1695
|
+
model: "opus",
|
|
1696
|
+
appendSystemPrompt: METHODOLOGY
|
|
1620
1697
|
};
|
|
1621
1698
|
for await (const event of runClaude(researchTask)) {
|
|
1622
1699
|
if (event.type === "output:tool") {
|
|
@@ -1678,7 +1755,9 @@ ${basePrompt}` : basePrompt,
|
|
|
1678
1755
|
allowedTools: [],
|
|
1679
1756
|
permissionMode: "bypassPermissions",
|
|
1680
1757
|
model: skipResearch ? "sonnet" : "opus",
|
|
1681
|
-
appendSystemPrompt:
|
|
1758
|
+
appendSystemPrompt: `${METHODOLOGY}
|
|
1759
|
+
|
|
1760
|
+
${PLAN_SYSTEM_RULES}`,
|
|
1682
1761
|
jsonSchema: WORKFLOW_JSON_SCHEMA
|
|
1683
1762
|
};
|
|
1684
1763
|
let structuredOutput;
|
|
@@ -2302,7 +2381,13 @@ async function* withLogger(gen, logger2) {
|
|
|
2302
2381
|
}
|
|
2303
2382
|
|
|
2304
2383
|
// src/retrospective.ts
|
|
2305
|
-
import {
|
|
2384
|
+
import {
|
|
2385
|
+
existsSync as existsSync3,
|
|
2386
|
+
mkdirSync as mkdirSync4,
|
|
2387
|
+
readdirSync as readdirSync2,
|
|
2388
|
+
readFileSync as readFileSync5,
|
|
2389
|
+
writeFileSync as writeFileSync4
|
|
2390
|
+
} from "node:fs";
|
|
2306
2391
|
import { basename as basename2, dirname as dirname4, join as join4, resolve as resolve3 } from "node:path";
|
|
2307
2392
|
import { spawnSync } from "node:child_process";
|
|
2308
2393
|
import { load as parseYaml2 } from "js-yaml";
|
|
@@ -2314,10 +2399,17 @@ var RetrospectiveOutputSchema = z4.object({
|
|
|
2314
2399
|
var RETROSPECTIVE_PROMPT = loadPrompt("retrospective-analysis");
|
|
2315
2400
|
async function runRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
|
|
2316
2401
|
try {
|
|
2317
|
-
await doRetrospective(
|
|
2402
|
+
await doRetrospective(
|
|
2403
|
+
workflowFilePath,
|
|
2404
|
+
workflow2,
|
|
2405
|
+
highlightsDir,
|
|
2406
|
+
runTimestamp
|
|
2407
|
+
);
|
|
2318
2408
|
} catch (err) {
|
|
2319
|
-
console.warn(
|
|
2320
|
-
|
|
2409
|
+
console.warn(
|
|
2410
|
+
`
|
|
2411
|
+
Self-improvement: retrospective failed: ${getErrorMessage(err)}`
|
|
2412
|
+
);
|
|
2321
2413
|
}
|
|
2322
2414
|
}
|
|
2323
2415
|
async function doRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
|
|
@@ -2328,13 +2420,17 @@ async function doRetrospective(workflowFilePath, workflow2, highlightsDir, runTi
|
|
|
2328
2420
|
const allFiles = readdirSync2(highlightsDir);
|
|
2329
2421
|
const runHighlights = allFiles.filter((f) => f.startsWith(runTimestamp) && f.endsWith(".md")).sort();
|
|
2330
2422
|
if (runHighlights.length === 0) {
|
|
2331
|
-
console.log(
|
|
2423
|
+
console.log(
|
|
2424
|
+
"\nSelf-improvement: no highlights for this run \u2014 task completed without issues, skipping."
|
|
2425
|
+
);
|
|
2332
2426
|
return;
|
|
2333
2427
|
}
|
|
2334
2428
|
const divider = "\u2501".repeat(51);
|
|
2335
2429
|
console.log(`
|
|
2336
2430
|
${divider}`);
|
|
2337
|
-
console.log(
|
|
2431
|
+
console.log(
|
|
2432
|
+
"Self-Improvement: Analyzing execution and generating improvements..."
|
|
2433
|
+
);
|
|
2338
2434
|
console.log(`${divider}
|
|
2339
2435
|
`);
|
|
2340
2436
|
console.log(`Found ${runHighlights.length} highlight(s) to analyze`);
|
|
@@ -2380,15 +2476,23 @@ ${content}`;
|
|
|
2380
2476
|
"--output-format",
|
|
2381
2477
|
"text"
|
|
2382
2478
|
],
|
|
2383
|
-
{
|
|
2479
|
+
{
|
|
2480
|
+
encoding: "utf8",
|
|
2481
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
2482
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
2483
|
+
}
|
|
2384
2484
|
);
|
|
2385
2485
|
if (result.error) {
|
|
2386
|
-
console.warn(
|
|
2486
|
+
console.warn(
|
|
2487
|
+
`Self-improvement: failed to run claude: ${result.error.message}`
|
|
2488
|
+
);
|
|
2387
2489
|
return;
|
|
2388
2490
|
}
|
|
2389
2491
|
if (result.status !== 0) {
|
|
2390
2492
|
const stderr = result.stderr ?? "";
|
|
2391
|
-
console.warn(
|
|
2493
|
+
console.warn(
|
|
2494
|
+
`Self-improvement: claude exited with code ${result.status}${stderr ? ": " + stderr : ""}`
|
|
2495
|
+
);
|
|
2392
2496
|
return;
|
|
2393
2497
|
}
|
|
2394
2498
|
const response = result.stdout ?? "";
|
|
@@ -2396,13 +2500,17 @@ ${content}`;
|
|
|
2396
2500
|
try {
|
|
2397
2501
|
parsed = JSON.parse(extractJson(response));
|
|
2398
2502
|
} catch {
|
|
2399
|
-
console.warn(
|
|
2400
|
-
|
|
2503
|
+
console.warn(
|
|
2504
|
+
`Self-improvement: could not parse Claude response as JSON.
|
|
2505
|
+
Response: ${response.trim()}`
|
|
2506
|
+
);
|
|
2401
2507
|
return;
|
|
2402
2508
|
}
|
|
2403
2509
|
const zodResult = RetrospectiveOutputSchema.safeParse(parsed);
|
|
2404
2510
|
if (!zodResult.success) {
|
|
2405
|
-
console.warn(
|
|
2511
|
+
console.warn(
|
|
2512
|
+
"Self-improvement: response schema mismatch \u2014 improved YAML not saved."
|
|
2513
|
+
);
|
|
2406
2514
|
return;
|
|
2407
2515
|
}
|
|
2408
2516
|
const improvedYaml = zodResult.data.improved_yaml.trim();
|
|
@@ -2410,7 +2518,9 @@ Response: ${response.trim()}`);
|
|
|
2410
2518
|
try {
|
|
2411
2519
|
parseYaml2(improvedYaml);
|
|
2412
2520
|
} catch (err) {
|
|
2413
|
-
console.warn(
|
|
2521
|
+
console.warn(
|
|
2522
|
+
`Self-improvement: generated YAML is invalid (${getErrorMessage(err)}), skipping save.`
|
|
2523
|
+
);
|
|
2414
2524
|
return;
|
|
2415
2525
|
}
|
|
2416
2526
|
const startDir = dirname4(resolve3(workflowFilePath));
|
|
@@ -2435,7 +2545,8 @@ ${divider}`);
|
|
|
2435
2545
|
function extractJson(text) {
|
|
2436
2546
|
const start = text.indexOf("{");
|
|
2437
2547
|
const end = text.lastIndexOf("}");
|
|
2438
|
-
if (start === -1 || end === -1 || end <= start)
|
|
2548
|
+
if (start === -1 || end === -1 || end <= start)
|
|
2549
|
+
throw new Error("no JSON object found in response");
|
|
2439
2550
|
return text.slice(start, end + 1);
|
|
2440
2551
|
}
|
|
2441
2552
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# ============================================================================
|
|
2
|
+
# DEV APPROACH PROMPT
|
|
3
|
+
# ============================================================================
|
|
4
|
+
# Purpose: Eval-only template for testing development methodology adherence.
|
|
5
|
+
# Asks Claude to verbalize its process so behavioral criteria can be
|
|
6
|
+
# judged against the injected methodology system prompt.
|
|
7
|
+
# Used by: evals/development-methodology.eval.yaml
|
|
8
|
+
# Triggered when: npm run eval evals/development-methodology.eval.yaml
|
|
9
|
+
#
|
|
10
|
+
# Placeholders:
|
|
11
|
+
# {{TASK}} - The programming task to reason about
|
|
12
|
+
# ============================================================================
|
|
13
|
+
|
|
14
|
+
{{TASK}}
|
|
15
|
+
|
|
16
|
+
Before writing any code, briefly describe: what is still unclear and any assumptions you're making, what you need to learn or inspect first, how you would break this into slices, and how you would verify the implementation is correct.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ============================================================================
|
|
2
|
+
# DEVELOPMENT METHODOLOGY
|
|
3
|
+
# ============================================================================
|
|
4
|
+
# Purpose: Defines the software development loop injected into every Claude
|
|
5
|
+
# step that executant runs.
|
|
6
|
+
# Used by: src/tasks/claude.ts via --append-system-prompt
|
|
7
|
+
# Triggered when: Every Claude step invocation
|
|
8
|
+
# ============================================================================
|
|
9
|
+
|
|
10
|
+
Critical rules — these apply to every task, always:
|
|
11
|
+
|
|
12
|
+
1. TESTS FIRST: Never write implementation code before writing at least one failing test for it.
|
|
13
|
+
Wrong: create rate-limiter.ts → create rate-limiter.test.ts
|
|
14
|
+
Right: create rate-limiter.test.ts (failing) → create rate-limiter.ts to make it pass
|
|
15
|
+
Wrong slice order: Slice 1: write counter → Slice 2: write middleware → Slice 3: write tests
|
|
16
|
+
Right slice order: Slice 1: [test for counter, then counter] → Slice 2: [test for middleware, then middleware]
|
|
17
|
+
The test file always exists and fails before the implementation code for that feature is written.
|
|
18
|
+
|
|
19
|
+
2. VERIFICATION SEQUENCE: After every meaningful code change, run these four steps in exact order and fix all failures before continuing:
|
|
20
|
+
lint → typecheck → test → build
|
|
21
|
+
Never say "run tests" as your only verification step. Always name all four.
|
|
22
|
+
|
|
23
|
+
3. ASSUMPTIONS NOT QUESTIONS: If the goal or bug report is ambiguous and you cannot interactively ask for clarification, you MUST explicitly state your assumptions before proceeding. Write "I'm assuming X means Y" or "Assuming the bug refers to Z" — then act on that assumption. Do not proceed silently on an implicit assumption.
|
|
24
|
+
|
|
25
|
+
4. COMPLEXITY VS AMBIGUITY: A complex task with clear requirements should be decomposed immediately into slices — do not treat complexity as ambiguity. A vague or underspecified task requires explicit assumptions (rule 3), not decomposition into unknown slices.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
Knowledge loop — repeat until sufficient knowledge is acquired. Always in this order:
|
|
30
|
+
- Inspect existing code
|
|
31
|
+
- Inspect architecture and module boundaries
|
|
32
|
+
- Inspect APIs/contracts
|
|
33
|
+
- Inspect similar implementations and conventions/patterns
|
|
34
|
+
- Identify unknowns/risks
|
|
35
|
+
- Read external documentation only when internal inspection is insufficient
|
|
36
|
+
|
|
37
|
+
If uncertainty remains: build experiments/spikes and validate assumptions.
|
|
38
|
+
|
|
39
|
+
Decomposition loop — repeat until solid:
|
|
40
|
+
- Split into independently shippable slices
|
|
41
|
+
- Order by dependency and risk (riskiest first)
|
|
42
|
+
- Choose next smallest shippable slice
|
|
43
|
+
|
|
44
|
+
For each slice:
|
|
45
|
+
|
|
46
|
+
Spec loop — repeat until precise:
|
|
47
|
+
- Write behavior spec
|
|
48
|
+
- Define inputs, outputs, edge cases, failure modes, acceptance criteria
|
|
49
|
+
|
|
50
|
+
Test loop — apply rule 1. Repeat until tests express the full spec:
|
|
51
|
+
- Write failing tests before any implementation code
|
|
52
|
+
- Review coverage against spec
|
|
53
|
+
- Add missing cases
|
|
54
|
+
|
|
55
|
+
Implementation loop — build the smallest implementation that makes the tests pass.
|
|
56
|
+
|
|
57
|
+
Verification loop — apply rule 2 after every meaningful change:
|
|
58
|
+
1. lint
|
|
59
|
+
2. typecheck
|
|
60
|
+
3. test
|
|
61
|
+
4. build
|
|
62
|
+
|
|
63
|
+
Spec-check loop — repeat until implementation matches spec:
|
|
64
|
+
- Compare code against acceptance criteria
|
|
65
|
+
- Add test for any gap → fix gap → rerun lint → typecheck → test → build
|
|
66
|
+
|
|
67
|
+
Refactor loop — repeat until maintainable:
|
|
68
|
+
- Simplify names, remove duplication, improve boundaries
|
|
69
|
+
- Rerun lint → typecheck → test → build after every change
|
|
70
|
+
|
|
71
|
+
Commit — one slice = one commit.
|