npm - executant - Versions diffs - 1.9.0 → 1.10.0 - Mend

executant 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js +52 -24
package/dist/prompts/dev-approach.txt +16 -0
package/dist/prompts/development-methodology.txt +71 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -156,6 +156,15 @@ function loadWorkflow(filePath2) {
 ${detail}`);
   }
   const vars = doc.vars ?? {};
+  const seen = /* @__PURE__ */ new Set();
+  for (const step of doc.steps) {
+    if (seen.has(step.name)) {
+      throw new Error(
+        `Duplicate step name "${step.name}" \u2014 step names must be unique within a workflow`
+      );
+    }
+    seen.add(step.name);
+  }
   return {
     goal: doc.goal,
     vars,
@@ -370,25 +379,12 @@ async function* runCommand(task) {
 // src/tasks/claude.ts
 import { execSync, spawn as spawn2 } from "node:child_process";
 import { zodToJsonSchema } from "zod-to-json-schema";
+var METHODOLOGY = loadPrompt("development-methodology");
 var DEFAULT_TOOLS = ["Read", "Edit", "Write", "Bash", "Glob", "Grep"];
-function resolveClaudePath() {
-  try {
-    return execSync("which claude", { env: process.env }).toString().trim();
-  } catch {
-    throw new Error(
-      "claude CLI not found. Ensure it is installed and in PATH.\n  brew install claude  OR  npm install -g @anthropic-ai/claude-code"
-    );
-  }
-}
-async function* runClaude(task) {
+function buildClaudeArgs(task) {
   const allowedTools = task.allowedTools ?? DEFAULT_TOOLS;
-  yield {
-    type: "log",
-    level: "info",
-    text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
-  };
   const permissionMode = task.permissionMode ?? "bypassPermissions";
-  const args = [
+  return [
     "--print",
     task.prompt,
     "--output-format",
@@ -402,6 +398,23 @@ async function* runClaude(task) {
     ...task.appendSystemPrompt ? ["--append-system-prompt", task.appendSystemPrompt] : [],
     ...task.jsonSchema ? ["--json-schema", JSON.stringify(task.jsonSchema)] : []
   ];
+}
+function resolveClaudePath() {
+  try {
+    return execSync("which claude", { env: process.env }).toString().trim();
+  } catch {
+    throw new Error(
+      "claude CLI not found. Ensure it is installed and in PATH.\n  brew install claude  OR  npm install -g @anthropic-ai/claude-code"
+    );
+  }
+}
+async function* runClaude(task) {
+  yield {
+    type: "log",
+    level: "info",
+    text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
+  };
+  const args = buildClaudeArgs(task);
   const claudeBin = resolveClaudePath();
   let proc;
   try {
@@ -410,7 +423,9 @@ async function* runClaude(task) {
       env: { ...process.env }
     });
   } catch (err) {
-    throw new Error(`Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`);
+    throw new Error(
+      `Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`
+    );
   }
   const cleanup = () => {
     try {
@@ -480,7 +495,10 @@ function isObject(v) {
   return typeof v === "object" && v !== null && !Array.isArray(v);
 }
 function getArray(obj, ...keys) {
-  const result = keys.reduce((cur, k) => isObject(cur) ? cur[k] : null, obj);
+  const result = keys.reduce(
+    (cur, k) => isObject(cur) ? cur[k] : null,
+    obj
+  );
   return Array.isArray(result) ? result : [];
 }
 function getString(obj, key) {
@@ -496,7 +514,9 @@ async function runClaudeStructured(task, schema) {
     else if (event.type === "output:text") lines.push(event.text);
   }
   if (structuredOutput === void 0 && process.env["NODE_ENV"] !== "test") {
-    console.warn("[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing");
+    console.warn(
+      "[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing"
+    );
   }
   const data = structuredOutput ?? JSON.parse(extractJsonObject(lines.join("").trim()));
   return schema.parse(data);
@@ -611,7 +631,11 @@ async function* runForEach(task) {
         };
       }
       try {
-        yield* runStep(substituted);
+        for await (const event of runStep(substituted)) {
+          if (event.type !== "step:iteration" && event.type !== "step:inner") {
+            yield event;
+          }
+        }
       } catch (err) {
         const error = err instanceof Error ? err : new Error(String(err));
         if (!substituted.continueOnError) {
@@ -1347,7 +1371,7 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
           maxVisible: MAX_VISIBLE_ITERATIONS
         }
       ) : null
-    ] }, taskState.task.name)) }),
+    ] }, i)) }),
     activeTask && /* @__PURE__ */ jsx5(
       LogPane,
       {
@@ -1508,7 +1532,8 @@ async function runPass3Judge(description, workflow2) {
       }),
       allowedTools: [],
       permissionMode: "default",
-      model: "sonnet"
+      model: "sonnet",
+      appendSystemPrompt: METHODOLOGY
     };
     return await runClaudeStructured(task, PlanJudgeOutputSchema);
   } catch {
@@ -1616,7 +1641,8 @@ async function* streamPlan(args) {
         }),
         allowedTools: ["Read", "Glob", "Grep"],
         permissionMode: "bypassPermissions",
-        model: "opus"
+        model: "opus",
+        appendSystemPrompt: METHODOLOGY
       };
       for await (const event of runClaude(researchTask)) {
         if (event.type === "output:tool") {
@@ -1678,7 +1704,9 @@ ${basePrompt}` : basePrompt,
       allowedTools: [],
       permissionMode: "bypassPermissions",
       model: skipResearch ? "sonnet" : "opus",
-      appendSystemPrompt: PLAN_SYSTEM_RULES,
+      appendSystemPrompt: `${METHODOLOGY}
+${PLAN_SYSTEM_RULES}`,
       jsonSchema: WORKFLOW_JSON_SCHEMA
     };
     let structuredOutput;

package/dist/prompts/dev-approach.txt ADDED Viewed

@@ -0,0 +1,16 @@
+# ============================================================================
+# DEV APPROACH PROMPT
+# ============================================================================
+# Purpose: Eval-only template for testing development methodology adherence.
+#          Asks Claude to verbalize its process so behavioral criteria can be
+#          judged against the injected methodology system prompt.
+# Used by: evals/development-methodology.eval.yaml
+# Triggered when: npm run eval evals/development-methodology.eval.yaml
+#
+# Placeholders:
+#   {{TASK}} - The programming task to reason about
+# ============================================================================
+{{TASK}}
+Before writing any code, briefly describe: what is still unclear and any assumptions you're making, what you need to learn or inspect first, how you would break this into slices, and how you would verify the implementation is correct.

package/dist/prompts/development-methodology.txt ADDED Viewed

@@ -0,0 +1,71 @@
+# ============================================================================
+# DEVELOPMENT METHODOLOGY
+# ============================================================================
+# Purpose: Defines the software development loop injected into every Claude
+#          step that executant runs.
+# Used by: src/tasks/claude.ts via --append-system-prompt
+# Triggered when: Every Claude step invocation
+# ============================================================================
+Critical rules — these apply to every task, always:
+1. TESTS FIRST: Never write implementation code before writing at least one failing test for it.
+   Wrong: create rate-limiter.ts → create rate-limiter.test.ts
+   Right: create rate-limiter.test.ts (failing) → create rate-limiter.ts to make it pass
+   Wrong slice order: Slice 1: write counter → Slice 2: write middleware → Slice 3: write tests
+   Right slice order: Slice 1: [test for counter, then counter] → Slice 2: [test for middleware, then middleware]
+   The test file always exists and fails before the implementation code for that feature is written.
+2. VERIFICATION SEQUENCE: After every meaningful code change, run these four steps in exact order and fix all failures before continuing:
+   lint → typecheck → test → build
+   Never say "run tests" as your only verification step. Always name all four.
+3. ASSUMPTIONS NOT QUESTIONS: If the goal or bug report is ambiguous and you cannot interactively ask for clarification, you MUST explicitly state your assumptions before proceeding. Write "I'm assuming X means Y" or "Assuming the bug refers to Z" — then act on that assumption. Do not proceed silently on an implicit assumption.
+4. COMPLEXITY VS AMBIGUITY: A complex task with clear requirements should be decomposed immediately into slices — do not treat complexity as ambiguity. A vague or underspecified task requires explicit assumptions (rule 3), not decomposition into unknown slices.
+---
+Knowledge loop — repeat until sufficient knowledge is acquired. Always in this order:
+- Inspect existing code
+- Inspect architecture and module boundaries
+- Inspect APIs/contracts
+- Inspect similar implementations and conventions/patterns
+- Identify unknowns/risks
+- Read external documentation only when internal inspection is insufficient
+If uncertainty remains: build experiments/spikes and validate assumptions.
+Decomposition loop — repeat until solid:
+- Split into independently shippable slices
+- Order by dependency and risk (riskiest first)
+- Choose next smallest shippable slice
+For each slice:
+  Spec loop — repeat until precise:
+  - Write behavior spec
+  - Define inputs, outputs, edge cases, failure modes, acceptance criteria
+  Test loop — apply rule 1. Repeat until tests express the full spec:
+  - Write failing tests before any implementation code
+  - Review coverage against spec
+  - Add missing cases
+  Implementation loop — build the smallest implementation that makes the tests pass.
+  Verification loop — apply rule 2 after every meaningful change:
+    1. lint
+    2. typecheck
+    3. test
+    4. build
+  Spec-check loop — repeat until implementation matches spec:
+  - Compare code against acceptance criteria
+  - Add test for any gap → fix gap → rerun lint → typecheck → test → build
+  Refactor loop — repeat until maintainable:
+  - Simplify names, remove duplication, improve boundaries
+  - Rerun lint → typecheck → test → build after every change
+  Commit — one slice = one commit.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "executant",
-  "version": "1.9.0",
+  "version": "1.10.0",
   "description": "Harness for YAML-defined workflows that enables stepping through Claude sessions and bash commands",
   "repository": {
     "type": "git",