executant 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -156,6 +156,15 @@ function loadWorkflow(filePath2) {
|
|
|
156
156
|
${detail}`);
|
|
157
157
|
}
|
|
158
158
|
const vars = doc.vars ?? {};
|
|
159
|
+
const seen = /* @__PURE__ */ new Set();
|
|
160
|
+
for (const step of doc.steps) {
|
|
161
|
+
if (seen.has(step.name)) {
|
|
162
|
+
throw new Error(
|
|
163
|
+
`Duplicate step name "${step.name}" \u2014 step names must be unique within a workflow`
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
seen.add(step.name);
|
|
167
|
+
}
|
|
159
168
|
return {
|
|
160
169
|
goal: doc.goal,
|
|
161
170
|
vars,
|
|
@@ -370,25 +379,12 @@ async function* runCommand(task) {
|
|
|
370
379
|
// src/tasks/claude.ts
|
|
371
380
|
import { execSync, spawn as spawn2 } from "node:child_process";
|
|
372
381
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
382
|
+
var METHODOLOGY = loadPrompt("development-methodology");
|
|
373
383
|
var DEFAULT_TOOLS = ["Read", "Edit", "Write", "Bash", "Glob", "Grep"];
|
|
374
|
-
function
|
|
375
|
-
try {
|
|
376
|
-
return execSync("which claude", { env: process.env }).toString().trim();
|
|
377
|
-
} catch {
|
|
378
|
-
throw new Error(
|
|
379
|
-
"claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
|
|
380
|
-
);
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
async function* runClaude(task) {
|
|
384
|
+
function buildClaudeArgs(task) {
|
|
384
385
|
const allowedTools = task.allowedTools ?? DEFAULT_TOOLS;
|
|
385
|
-
yield {
|
|
386
|
-
type: "log",
|
|
387
|
-
level: "info",
|
|
388
|
-
text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
|
|
389
|
-
};
|
|
390
386
|
const permissionMode = task.permissionMode ?? "bypassPermissions";
|
|
391
|
-
|
|
387
|
+
return [
|
|
392
388
|
"--print",
|
|
393
389
|
task.prompt,
|
|
394
390
|
"--output-format",
|
|
@@ -402,6 +398,23 @@ async function* runClaude(task) {
|
|
|
402
398
|
...task.appendSystemPrompt ? ["--append-system-prompt", task.appendSystemPrompt] : [],
|
|
403
399
|
...task.jsonSchema ? ["--json-schema", JSON.stringify(task.jsonSchema)] : []
|
|
404
400
|
];
|
|
401
|
+
}
|
|
402
|
+
function resolveClaudePath() {
|
|
403
|
+
try {
|
|
404
|
+
return execSync("which claude", { env: process.env }).toString().trim();
|
|
405
|
+
} catch {
|
|
406
|
+
throw new Error(
|
|
407
|
+
"claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
|
|
408
|
+
);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
async function* runClaude(task) {
|
|
412
|
+
yield {
|
|
413
|
+
type: "log",
|
|
414
|
+
level: "info",
|
|
415
|
+
text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
|
|
416
|
+
};
|
|
417
|
+
const args = buildClaudeArgs(task);
|
|
405
418
|
const claudeBin = resolveClaudePath();
|
|
406
419
|
let proc;
|
|
407
420
|
try {
|
|
@@ -410,7 +423,9 @@ async function* runClaude(task) {
|
|
|
410
423
|
env: { ...process.env }
|
|
411
424
|
});
|
|
412
425
|
} catch (err) {
|
|
413
|
-
throw new Error(
|
|
426
|
+
throw new Error(
|
|
427
|
+
`Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`
|
|
428
|
+
);
|
|
414
429
|
}
|
|
415
430
|
const cleanup = () => {
|
|
416
431
|
try {
|
|
@@ -480,7 +495,10 @@ function isObject(v) {
|
|
|
480
495
|
return typeof v === "object" && v !== null && !Array.isArray(v);
|
|
481
496
|
}
|
|
482
497
|
function getArray(obj, ...keys) {
|
|
483
|
-
const result = keys.reduce(
|
|
498
|
+
const result = keys.reduce(
|
|
499
|
+
(cur, k) => isObject(cur) ? cur[k] : null,
|
|
500
|
+
obj
|
|
501
|
+
);
|
|
484
502
|
return Array.isArray(result) ? result : [];
|
|
485
503
|
}
|
|
486
504
|
function getString(obj, key) {
|
|
@@ -496,7 +514,9 @@ async function runClaudeStructured(task, schema) {
|
|
|
496
514
|
else if (event.type === "output:text") lines.push(event.text);
|
|
497
515
|
}
|
|
498
516
|
if (structuredOutput === void 0 && process.env["NODE_ENV"] !== "test") {
|
|
499
|
-
console.warn(
|
|
517
|
+
console.warn(
|
|
518
|
+
"[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing"
|
|
519
|
+
);
|
|
500
520
|
}
|
|
501
521
|
const data = structuredOutput ?? JSON.parse(extractJsonObject(lines.join("").trim()));
|
|
502
522
|
return schema.parse(data);
|
|
@@ -611,7 +631,11 @@ async function* runForEach(task) {
|
|
|
611
631
|
};
|
|
612
632
|
}
|
|
613
633
|
try {
|
|
614
|
-
|
|
634
|
+
for await (const event of runStep(substituted)) {
|
|
635
|
+
if (event.type !== "step:iteration" && event.type !== "step:inner") {
|
|
636
|
+
yield event;
|
|
637
|
+
}
|
|
638
|
+
}
|
|
615
639
|
} catch (err) {
|
|
616
640
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
617
641
|
if (!substituted.continueOnError) {
|
|
@@ -1347,7 +1371,7 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
|
|
|
1347
1371
|
maxVisible: MAX_VISIBLE_ITERATIONS
|
|
1348
1372
|
}
|
|
1349
1373
|
) : null
|
|
1350
|
-
] },
|
|
1374
|
+
] }, i)) }),
|
|
1351
1375
|
activeTask && /* @__PURE__ */ jsx5(
|
|
1352
1376
|
LogPane,
|
|
1353
1377
|
{
|
|
@@ -1508,7 +1532,8 @@ async function runPass3Judge(description, workflow2) {
|
|
|
1508
1532
|
}),
|
|
1509
1533
|
allowedTools: [],
|
|
1510
1534
|
permissionMode: "default",
|
|
1511
|
-
model: "sonnet"
|
|
1535
|
+
model: "sonnet",
|
|
1536
|
+
appendSystemPrompt: METHODOLOGY
|
|
1512
1537
|
};
|
|
1513
1538
|
return await runClaudeStructured(task, PlanJudgeOutputSchema);
|
|
1514
1539
|
} catch {
|
|
@@ -1616,7 +1641,8 @@ async function* streamPlan(args) {
|
|
|
1616
1641
|
}),
|
|
1617
1642
|
allowedTools: ["Read", "Glob", "Grep"],
|
|
1618
1643
|
permissionMode: "bypassPermissions",
|
|
1619
|
-
model: "opus"
|
|
1644
|
+
model: "opus",
|
|
1645
|
+
appendSystemPrompt: METHODOLOGY
|
|
1620
1646
|
};
|
|
1621
1647
|
for await (const event of runClaude(researchTask)) {
|
|
1622
1648
|
if (event.type === "output:tool") {
|
|
@@ -1678,7 +1704,9 @@ ${basePrompt}` : basePrompt,
|
|
|
1678
1704
|
allowedTools: [],
|
|
1679
1705
|
permissionMode: "bypassPermissions",
|
|
1680
1706
|
model: skipResearch ? "sonnet" : "opus",
|
|
1681
|
-
appendSystemPrompt:
|
|
1707
|
+
appendSystemPrompt: `${METHODOLOGY}
|
|
1708
|
+
|
|
1709
|
+
${PLAN_SYSTEM_RULES}`,
|
|
1682
1710
|
jsonSchema: WORKFLOW_JSON_SCHEMA
|
|
1683
1711
|
};
|
|
1684
1712
|
let structuredOutput;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# ============================================================================
|
|
2
|
+
# DEV APPROACH PROMPT
|
|
3
|
+
# ============================================================================
|
|
4
|
+
# Purpose: Eval-only template for testing development methodology adherence.
|
|
5
|
+
# Asks Claude to verbalize its process so behavioral criteria can be
|
|
6
|
+
# judged against the injected methodology system prompt.
|
|
7
|
+
# Used by: evals/development-methodology.eval.yaml
|
|
8
|
+
# Triggered when: npm run eval evals/development-methodology.eval.yaml
|
|
9
|
+
#
|
|
10
|
+
# Placeholders:
|
|
11
|
+
# {{TASK}} - The programming task to reason about
|
|
12
|
+
# ============================================================================
|
|
13
|
+
|
|
14
|
+
{{TASK}}
|
|
15
|
+
|
|
16
|
+
Before writing any code, briefly describe: what is still unclear and any assumptions you're making, what you need to learn or inspect first, how you would break this into slices, and how you would verify the implementation is correct.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ============================================================================
|
|
2
|
+
# DEVELOPMENT METHODOLOGY
|
|
3
|
+
# ============================================================================
|
|
4
|
+
# Purpose: Defines the software development loop injected into every Claude
|
|
5
|
+
# step that executant runs.
|
|
6
|
+
# Used by: src/tasks/claude.ts via --append-system-prompt
|
|
7
|
+
# Triggered when: Every Claude step invocation
|
|
8
|
+
# ============================================================================
|
|
9
|
+
|
|
10
|
+
Critical rules — these apply to every task, always:
|
|
11
|
+
|
|
12
|
+
1. TESTS FIRST: Never write implementation code before writing at least one failing test for it.
|
|
13
|
+
Wrong: create rate-limiter.ts → create rate-limiter.test.ts
|
|
14
|
+
Right: create rate-limiter.test.ts (failing) → create rate-limiter.ts to make it pass
|
|
15
|
+
Wrong slice order: Slice 1: write counter → Slice 2: write middleware → Slice 3: write tests
|
|
16
|
+
Right slice order: Slice 1: [test for counter, then counter] → Slice 2: [test for middleware, then middleware]
|
|
17
|
+
The test file always exists and fails before the implementation code for that feature is written.
|
|
18
|
+
|
|
19
|
+
2. VERIFICATION SEQUENCE: After every meaningful code change, run these four steps in exact order and fix all failures before continuing:
|
|
20
|
+
lint → typecheck → test → build
|
|
21
|
+
Never say "run tests" as your only verification step. Always name all four.
|
|
22
|
+
|
|
23
|
+
3. ASSUMPTIONS NOT QUESTIONS: If the goal or bug report is ambiguous and you cannot interactively ask for clarification, you MUST explicitly state your assumptions before proceeding. Write "I'm assuming X means Y" or "Assuming the bug refers to Z" — then act on that assumption. Do not proceed silently on an implicit assumption.
|
|
24
|
+
|
|
25
|
+
4. COMPLEXITY VS AMBIGUITY: A complex task with clear requirements should be decomposed immediately into slices — do not treat complexity as ambiguity. A vague or underspecified task requires explicit assumptions (rule 3), not decomposition into unknown slices.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
Knowledge loop — repeat until sufficient knowledge is acquired. Always in this order:
|
|
30
|
+
- Inspect existing code
|
|
31
|
+
- Inspect architecture and module boundaries
|
|
32
|
+
- Inspect APIs/contracts
|
|
33
|
+
- Inspect similar implementations and conventions/patterns
|
|
34
|
+
- Identify unknowns/risks
|
|
35
|
+
- Read external documentation only when internal inspection is insufficient
|
|
36
|
+
|
|
37
|
+
If uncertainty remains: build experiments/spikes and validate assumptions.
|
|
38
|
+
|
|
39
|
+
Decomposition loop — repeat until solid:
|
|
40
|
+
- Split into independently shippable slices
|
|
41
|
+
- Order by dependency and risk (riskiest first)
|
|
42
|
+
- Choose next smallest shippable slice
|
|
43
|
+
|
|
44
|
+
For each slice:
|
|
45
|
+
|
|
46
|
+
Spec loop — repeat until precise:
|
|
47
|
+
- Write behavior spec
|
|
48
|
+
- Define inputs, outputs, edge cases, failure modes, acceptance criteria
|
|
49
|
+
|
|
50
|
+
Test loop — apply rule 1. Repeat until tests express the full spec:
|
|
51
|
+
- Write failing tests before any implementation code
|
|
52
|
+
- Review coverage against spec
|
|
53
|
+
- Add missing cases
|
|
54
|
+
|
|
55
|
+
Implementation loop — build the smallest implementation that makes the tests pass.
|
|
56
|
+
|
|
57
|
+
Verification loop — apply rule 2 after every meaningful change:
|
|
58
|
+
1. lint
|
|
59
|
+
2. typecheck
|
|
60
|
+
3. test
|
|
61
|
+
4. build
|
|
62
|
+
|
|
63
|
+
Spec-check loop — repeat until implementation matches spec:
|
|
64
|
+
- Compare code against acceptance criteria
|
|
65
|
+
- Add test for any gap → fix gap → rerun lint → typecheck → test → build
|
|
66
|
+
|
|
67
|
+
Refactor loop — repeat until maintainable:
|
|
68
|
+
- Simplify names, remove duplication, improve boundaries
|
|
69
|
+
- Rerun lint → typecheck → test → build after every change
|
|
70
|
+
|
|
71
|
+
Commit — one slice = one commit.
|