executant 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -156,6 +156,15 @@ function loadWorkflow(filePath2) {
156
156
  ${detail}`);
157
157
  }
158
158
  const vars = doc.vars ?? {};
159
+ const seen = /* @__PURE__ */ new Set();
160
+ for (const step of doc.steps) {
161
+ if (seen.has(step.name)) {
162
+ throw new Error(
163
+ `Duplicate step name "${step.name}" \u2014 step names must be unique within a workflow`
164
+ );
165
+ }
166
+ seen.add(step.name);
167
+ }
159
168
  return {
160
169
  goal: doc.goal,
161
170
  vars,
@@ -370,25 +379,12 @@ async function* runCommand(task) {
370
379
  // src/tasks/claude.ts
371
380
  import { execSync, spawn as spawn2 } from "node:child_process";
372
381
  import { zodToJsonSchema } from "zod-to-json-schema";
382
+ var METHODOLOGY = loadPrompt("development-methodology");
373
383
  var DEFAULT_TOOLS = ["Read", "Edit", "Write", "Bash", "Glob", "Grep"];
374
- function resolveClaudePath() {
375
- try {
376
- return execSync("which claude", { env: process.env }).toString().trim();
377
- } catch {
378
- throw new Error(
379
- "claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
380
- );
381
- }
382
- }
383
- async function* runClaude(task) {
384
+ function buildClaudeArgs(task) {
384
385
  const allowedTools = task.allowedTools ?? DEFAULT_TOOLS;
385
- yield {
386
- type: "log",
387
- level: "info",
388
- text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
389
- };
390
386
  const permissionMode = task.permissionMode ?? "bypassPermissions";
391
- const args = [
387
+ return [
392
388
  "--print",
393
389
  task.prompt,
394
390
  "--output-format",
@@ -402,6 +398,23 @@ async function* runClaude(task) {
402
398
  ...task.appendSystemPrompt ? ["--append-system-prompt", task.appendSystemPrompt] : [],
403
399
  ...task.jsonSchema ? ["--json-schema", JSON.stringify(task.jsonSchema)] : []
404
400
  ];
401
+ }
402
+ function resolveClaudePath() {
403
+ try {
404
+ return execSync("which claude", { env: process.env }).toString().trim();
405
+ } catch {
406
+ throw new Error(
407
+ "claude CLI not found. Ensure it is installed and in PATH.\n brew install claude OR npm install -g @anthropic-ai/claude-code"
408
+ );
409
+ }
410
+ }
411
+ async function* runClaude(task) {
412
+ yield {
413
+ type: "log",
414
+ level: "info",
415
+ text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
416
+ };
417
+ const args = buildClaudeArgs(task);
405
418
  const claudeBin = resolveClaudePath();
406
419
  let proc;
407
420
  try {
@@ -410,7 +423,9 @@ async function* runClaude(task) {
410
423
  env: { ...process.env }
411
424
  });
412
425
  } catch (err) {
413
- throw new Error(`Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`);
426
+ throw new Error(
427
+ `Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`
428
+ );
414
429
  }
415
430
  const cleanup = () => {
416
431
  try {
@@ -480,7 +495,10 @@ function isObject(v) {
480
495
  return typeof v === "object" && v !== null && !Array.isArray(v);
481
496
  }
482
497
  function getArray(obj, ...keys) {
483
- const result = keys.reduce((cur, k) => isObject(cur) ? cur[k] : null, obj);
498
+ const result = keys.reduce(
499
+ (cur, k) => isObject(cur) ? cur[k] : null,
500
+ obj
501
+ );
484
502
  return Array.isArray(result) ? result : [];
485
503
  }
486
504
  function getString(obj, key) {
@@ -496,7 +514,9 @@ async function runClaudeStructured(task, schema) {
496
514
  else if (event.type === "output:text") lines.push(event.text);
497
515
  }
498
516
  if (structuredOutput === void 0 && process.env["NODE_ENV"] !== "test") {
499
- console.warn("[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing");
517
+ console.warn(
518
+ "[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing"
519
+ );
500
520
  }
501
521
  const data = structuredOutput ?? JSON.parse(extractJsonObject(lines.join("").trim()));
502
522
  return schema.parse(data);
@@ -611,7 +631,11 @@ async function* runForEach(task) {
611
631
  };
612
632
  }
613
633
  try {
614
- yield* runStep(substituted);
634
+ for await (const event of runStep(substituted)) {
635
+ if (event.type !== "step:iteration" && event.type !== "step:inner") {
636
+ yield event;
637
+ }
638
+ }
615
639
  } catch (err) {
616
640
  const error = err instanceof Error ? err : new Error(String(err));
617
641
  if (!substituted.continueOnError) {
@@ -1347,7 +1371,7 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
1347
1371
  maxVisible: MAX_VISIBLE_ITERATIONS
1348
1372
  }
1349
1373
  ) : null
1350
- ] }, taskState.task.name)) }),
1374
+ ] }, i)) }),
1351
1375
  activeTask && /* @__PURE__ */ jsx5(
1352
1376
  LogPane,
1353
1377
  {
@@ -1508,7 +1532,8 @@ async function runPass3Judge(description, workflow2) {
1508
1532
  }),
1509
1533
  allowedTools: [],
1510
1534
  permissionMode: "default",
1511
- model: "sonnet"
1535
+ model: "sonnet",
1536
+ appendSystemPrompt: METHODOLOGY
1512
1537
  };
1513
1538
  return await runClaudeStructured(task, PlanJudgeOutputSchema);
1514
1539
  } catch {
@@ -1616,7 +1641,8 @@ async function* streamPlan(args) {
1616
1641
  }),
1617
1642
  allowedTools: ["Read", "Glob", "Grep"],
1618
1643
  permissionMode: "bypassPermissions",
1619
- model: "opus"
1644
+ model: "opus",
1645
+ appendSystemPrompt: METHODOLOGY
1620
1646
  };
1621
1647
  for await (const event of runClaude(researchTask)) {
1622
1648
  if (event.type === "output:tool") {
@@ -1678,7 +1704,9 @@ ${basePrompt}` : basePrompt,
1678
1704
  allowedTools: [],
1679
1705
  permissionMode: "bypassPermissions",
1680
1706
  model: skipResearch ? "sonnet" : "opus",
1681
- appendSystemPrompt: PLAN_SYSTEM_RULES,
1707
+ appendSystemPrompt: `${METHODOLOGY}
1708
+
1709
+ ${PLAN_SYSTEM_RULES}`,
1682
1710
  jsonSchema: WORKFLOW_JSON_SCHEMA
1683
1711
  };
1684
1712
  let structuredOutput;
@@ -0,0 +1,16 @@
1
+ # ============================================================================
2
+ # DEV APPROACH PROMPT
3
+ # ============================================================================
4
+ # Purpose: Eval-only template for testing development methodology adherence.
5
+ # Asks Claude to verbalize its process so behavioral criteria can be
6
+ # judged against the injected methodology system prompt.
7
+ # Used by: evals/development-methodology.eval.yaml
8
+ # Triggered when: npm run eval evals/development-methodology.eval.yaml
9
+ #
10
+ # Placeholders:
11
+ # {{TASK}} - The programming task to reason about
12
+ # ============================================================================
13
+
14
+ {{TASK}}
15
+
16
+ Before writing any code, briefly describe: what is still unclear and any assumptions you're making, what you need to learn or inspect first, how you would break this into slices, and how you would verify the implementation is correct.
@@ -0,0 +1,71 @@
1
+ # ============================================================================
2
+ # DEVELOPMENT METHODOLOGY
3
+ # ============================================================================
4
+ # Purpose: Defines the software development loop injected into every Claude
5
+ # step that executant runs.
6
+ # Used by: src/tasks/claude.ts via --append-system-prompt
7
+ # Triggered when: Every Claude step invocation
8
+ # ============================================================================
9
+
10
+ Critical rules — these apply to every task, always:
11
+
12
+ 1. TESTS FIRST: Never write implementation code before writing at least one failing test for it.
13
+ Wrong: create rate-limiter.ts → create rate-limiter.test.ts
14
+ Right: create rate-limiter.test.ts (failing) → create rate-limiter.ts to make it pass
15
+ Wrong slice order: Slice 1: write counter → Slice 2: write middleware → Slice 3: write tests
16
+ Right slice order: Slice 1: [test for counter, then counter] → Slice 2: [test for middleware, then middleware]
17
+ The test file always exists and fails before the implementation code for that feature is written.
18
+
19
+ 2. VERIFICATION SEQUENCE: After every meaningful code change, run these four steps in exact order and fix all failures before continuing:
20
+ lint → typecheck → test → build
21
+ Never say "run tests" as your only verification step. Always name all four.
22
+
23
+ 3. ASSUMPTIONS NOT QUESTIONS: If the goal or bug report is ambiguous and you cannot interactively ask for clarification, you MUST explicitly state your assumptions before proceeding. Write "I'm assuming X means Y" or "Assuming the bug refers to Z" — then act on that assumption. Do not proceed silently on an implicit assumption.
24
+
25
+ 4. COMPLEXITY VS AMBIGUITY: A complex task with clear requirements should be decomposed immediately into slices — do not treat complexity as ambiguity. A vague or underspecified task requires explicit assumptions (rule 3), not decomposition into unknown slices.
26
+
27
+ ---
28
+
29
+ Knowledge loop — repeat until sufficient knowledge is acquired. Always in this order:
30
+ - Inspect existing code
31
+ - Inspect architecture and module boundaries
32
+ - Inspect APIs/contracts
33
+ - Inspect similar implementations and conventions/patterns
34
+ - Identify unknowns/risks
35
+ - Read external documentation only when internal inspection is insufficient
36
+
37
+ If uncertainty remains: build experiments/spikes and validate assumptions.
38
+
39
+ Decomposition loop — repeat until solid:
40
+ - Split into independently shippable slices
41
+ - Order by dependency and risk (riskiest first)
42
+ - Choose next smallest shippable slice
43
+
44
+ For each slice:
45
+
46
+ Spec loop — repeat until precise:
47
+ - Write behavior spec
48
+ - Define inputs, outputs, edge cases, failure modes, acceptance criteria
49
+
50
+ Test loop — apply rule 1. Repeat until tests express the full spec:
51
+ - Write failing tests before any implementation code
52
+ - Review coverage against spec
53
+ - Add missing cases
54
+
55
+ Implementation loop — build the smallest implementation that makes the tests pass.
56
+
57
+ Verification loop — apply rule 2 after every meaningful change:
58
+ 1. lint
59
+ 2. typecheck
60
+ 3. test
61
+ 4. build
62
+
63
+ Spec-check loop — repeat until implementation matches spec:
64
+ - Compare code against acceptance criteria
65
+ - Add test for any gap → fix gap → rerun lint → typecheck → test → build
66
+
67
+ Refactor loop — repeat until maintainable:
68
+ - Simplify names, remove duplication, improve boundaries
69
+ - Rerun lint → typecheck → test → build after every change
70
+
71
+ Commit — one slice = one commit.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "executant",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "Harness for YAML-defined workflows that enables stepping through Claude sessions and bash commands",
5
5
  "repository": {
6
6
  "type": "git",