npm - executant - Versions diffs - 1.9.0 → 1.10.1 - Mend

executant 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js +173 -62
package/dist/prompts/dev-approach.txt +16 -0
package/dist/prompts/development-methodology.txt +71 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -156,6 +156,15 @@ function loadWorkflow(filePath2) {
 ${detail}`);
   }
   const vars = doc.vars ?? {};
+  const seen = /* @__PURE__ */ new Set();
+  for (const step of doc.steps) {
+    if (seen.has(step.name)) {
+      throw new Error(
+        `Duplicate step name "${step.name}" \u2014 step names must be unique within a workflow`
+      );
+    }
+    seen.add(step.name);
+  }
   return {
     goal: doc.goal,
     vars,
@@ -370,25 +379,12 @@ async function* runCommand(task) {
 // src/tasks/claude.ts
 import { execSync, spawn as spawn2 } from "node:child_process";
 import { zodToJsonSchema } from "zod-to-json-schema";
+var METHODOLOGY = loadPrompt("development-methodology");
 var DEFAULT_TOOLS = ["Read", "Edit", "Write", "Bash", "Glob", "Grep"];
-function resolveClaudePath() {
-  try {
-    return execSync("which claude", { env: process.env }).toString().trim();
-  } catch {
-    throw new Error(
-      "claude CLI not found. Ensure it is installed and in PATH.\n  brew install claude  OR  npm install -g @anthropic-ai/claude-code"
-    );
-  }
-}
-async function* runClaude(task) {
+function buildClaudeArgs(task) {
   const allowedTools = task.allowedTools ?? DEFAULT_TOOLS;
-  yield {
-    type: "log",
-    level: "info",
-    text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
-  };
   const permissionMode = task.permissionMode ?? "bypassPermissions";
-  const args = [
+  return [
     "--print",
     task.prompt,
     "--output-format",
@@ -402,6 +398,23 @@ async function* runClaude(task) {
     ...task.appendSystemPrompt ? ["--append-system-prompt", task.appendSystemPrompt] : [],
     ...task.jsonSchema ? ["--json-schema", JSON.stringify(task.jsonSchema)] : []
   ];
+}
+function resolveClaudePath() {
+  try {
+    return execSync("which claude", { env: process.env }).toString().trim();
+  } catch {
+    throw new Error(
+      "claude CLI not found. Ensure it is installed and in PATH.\n  brew install claude  OR  npm install -g @anthropic-ai/claude-code"
+    );
+  }
+}
+async function* runClaude(task) {
+  yield {
+    type: "log",
+    level: "info",
+    text: `claude -p "${task.prompt.slice(0, 60).replace(/\n/g, " ")}\u2026"`
+  };
+  const args = buildClaudeArgs(task);
   const claudeBin = resolveClaudePath();
   let proc;
   try {
@@ -410,7 +423,9 @@ async function* runClaude(task) {
       env: { ...process.env }
     });
   } catch (err) {
-    throw new Error(`Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`);
+    throw new Error(
+      `Failed to spawn claude (${claudeBin}): ${getErrorMessage(err)}`
+    );
   }
   const cleanup = () => {
     try {
@@ -480,7 +495,10 @@ function isObject(v) {
   return typeof v === "object" && v !== null && !Array.isArray(v);
 }
 function getArray(obj, ...keys) {
-  const result = keys.reduce((cur, k) => isObject(cur) ? cur[k] : null, obj);
+  const result = keys.reduce(
+    (cur, k) => isObject(cur) ? cur[k] : null,
+    obj
+  );
   return Array.isArray(result) ? result : [];
 }
 function getString(obj, key) {
@@ -496,7 +514,9 @@ async function runClaudeStructured(task, schema) {
     else if (event.type === "output:text") lines.push(event.text);
   }
   if (structuredOutput === void 0 && process.env["NODE_ENV"] !== "test") {
-    console.warn("[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing");
+    console.warn(
+      "[executant] runClaudeStructured: no output:structured event \u2014 falling back to text parsing"
+    );
   }
   const data = structuredOutput ?? JSON.parse(extractJsonObject(lines.join("").trim()));
   return schema.parse(data);
@@ -611,7 +631,11 @@ async function* runForEach(task) {
         };
       }
       try {
-        yield* runStep(substituted);
+        for await (const event of runStep(substituted)) {
+          if (event.type !== "step:iteration" && event.type !== "step:inner") {
+            yield event;
+          }
+        }
       } catch (err) {
         const error = err instanceof Error ? err : new Error(String(err));
         if (!substituted.continueOnError) {
@@ -863,7 +887,7 @@ init_update();
 // src/ui/App.tsx
 import { useEffect as useEffect2, useReducer, useState } from "react";
-import { Box as Box5, Text as Text5, useApp, useStdin } from "ink";
+import { Box as Box5, Text as Text5, useApp, useStdin, useStdout } from "ink";
 // src/ui/KeyboardHandler.tsx
 import { useInput } from "ink";
@@ -998,7 +1022,7 @@ function reducer(state, event) {
     case "output:text": {
       const idx = event.index;
       if (idx >= state.tasks.length) return state;
-      return appendLine(state, idx, event.text);
+      return appendLines(state, idx, event.text);
     }
     case "output:tool": {
       const idx = event.index;
@@ -1022,7 +1046,7 @@ function reducer(state, event) {
     case "log": {
       const idx = state.currentIndex;
       if (idx >= state.tasks.length) return state;
-      return appendLine(state, idx, `[${event.level}] ${event.text}`);
+      return appendLines(state, idx, `[${event.level}] ${event.text}`);
     }
     default: {
       const _ = event;
@@ -1031,6 +1055,11 @@ function reducer(state, event) {
     }
   }
 }
+var ANSI_RE2 = /\x1B(?:\[[0-9;?]*[A-Za-z]|\][^\x07]*\x07)|[\r]/g;
+var MAX_LOG_LINES = 300;
+function normalizeLines(text) {
+  return text.replace(ANSI_RE2, "").split("\n");
+}
 function updateTask(state, index, patch) {
   const tasks = state.tasks.map(
     (t, i) => i === index ? { ...t, ...patch } : t
@@ -1038,9 +1067,14 @@ function updateTask(state, index, patch) {
   return { ...state, tasks };
 }
 function appendLine(state, index, line) {
+  return appendLines(state, index, line);
+}
+function appendLines(state, index, text) {
+  const newLines = normalizeLines(text);
   const tasks = state.tasks.map((t, i) => {
     if (i !== index) return t;
-    const lines = [...t.lines, line];
+    const combined = [...t.lines, ...newLines];
+    const lines = combined.length > MAX_LOG_LINES ? combined.slice(-MAX_LOG_LINES) : combined;
     return { ...t, lines };
   });
   return { ...state, tasks };
@@ -1198,14 +1232,24 @@ function LogPane({ lines, isActive = false, maxLines = 15 }) {
   if (visible.length === 0) {
     return /* @__PURE__ */ jsx3(Box3, { marginTop: 1, children: /* @__PURE__ */ jsx3(Text3, { dimColor: true, children: isActive ? "\u2838 waiting for output\u2026" : "\u2014 no output yet \u2014" }) });
   }
-  return /* @__PURE__ */ jsx3(Box3, { flexDirection: "column", marginTop: 1, borderStyle: "single", borderColor: theme.border, paddingX: 1, children: visible.map((line, i) => /* @__PURE__ */ jsx3(
-    LogLine,
+  return /* @__PURE__ */ jsx3(
+    Box3,
     {
-      text: line,
-      cursor: isActive && i === visible.length - 1
-    },
-    i
-  )) });
+      flexDirection: "column",
+      marginTop: 1,
+      borderStyle: "single",
+      borderColor: theme.border,
+      paddingX: 1,
+      children: visible.map((line, i) => /* @__PURE__ */ jsx3(
+        LogLine,
+        {
+          text: line,
+          cursor: isActive && i === visible.length - 1
+        },
+        i
+      ))
+    }
+  );
 }
 function LogLine({ text, cursor }) {
   const suffix = cursor ? /* @__PURE__ */ jsx3(Text3, { color: theme.primary, children: " \u258C" }) : null;
@@ -1218,18 +1262,41 @@ function LogLine({ text, cursor }) {
       suffix
     ] });
   }
-  if (/^\s*\$\s/.test(text)) return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
-    text,
-    suffix
-  ] });
-  if (text.startsWith("[warn]")) return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
-    text,
-    suffix
-  ] });
-  if (text.startsWith("[error]")) return /* @__PURE__ */ jsxs3(Text3, { color: theme.error, children: [
-    text,
-    suffix
-  ] });
+  if (/^\s*\$\s/.test(text))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
+      text,
+      suffix
+    ] });
+  if (text.startsWith("[warn]"))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
+      text,
+      suffix
+    ] });
+  if (text.startsWith("[error]"))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.error, children: [
+      text,
+      suffix
+    ] });
+  if (/^[\s]*(✓|✔|✅|done|success|compiled|built|passed)/i.test(text) && !/\b(error|fail|failed|warn|warning)\b/i.test(text))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.success, children: [
+      text,
+      suffix
+    ] });
+  if (/\b(error|failed|fail)\b/i.test(text))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.error, children: [
+      text,
+      suffix
+    ] });
+  if (/\b(warn|warning)\b/i.test(text))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.warning, children: [
+      text,
+      suffix
+    ] });
+  if (/^[·…⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏]/.test(text))
+    return /* @__PURE__ */ jsxs3(Text3, { color: theme.muted, children: [
+      text,
+      suffix
+    ] });
   return /* @__PURE__ */ jsxs3(Text3, { children: [
     text,
     suffix
@@ -1300,6 +1367,13 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
     };
   }, [events2, exit]);
   const { isRawModeSupported } = useStdin();
+  const { stdout } = useStdout();
+  const terminalRows = stdout?.rows ?? 24;
+  const FIXED_OVERHEAD = 12;
+  const logPaneMaxLines = Math.max(
+    5,
+    terminalRows - FIXED_OVERHEAD - state.tasks.length
+  );
   const [tick, setTick] = useState(0);
   useInterval(() => {
     if (!state.endTime) setTick((t) => t + 1);
@@ -1347,12 +1421,13 @@ function App({ workflow: workflow2, events: events2, options: options2, updateCh
           maxVisible: MAX_VISIBLE_ITERATIONS
         }
       ) : null
-    ] }, taskState.task.name)) }),
+    ] }, i)) }),
     activeTask && /* @__PURE__ */ jsx5(
       LogPane,
       {
         lines: activeTask.lines,
-        isActive: activeTask.status === "running"
+        isActive: activeTask.status === "running",
+        maxLines: logPaneMaxLines
       }
     ),
     state.endTime !== void 0 && state.writtenFiles.length > 0 && /* @__PURE__ */ jsxs4(Box5, { flexDirection: "column", marginTop: 1, children: [
@@ -1508,7 +1583,8 @@ async function runPass3Judge(description, workflow2) {
       }),
       allowedTools: [],
       permissionMode: "default",
-      model: "sonnet"
+      model: "sonnet",
+      appendSystemPrompt: METHODOLOGY
     };
     return await runClaudeStructured(task, PlanJudgeOutputSchema);
   } catch {
@@ -1616,7 +1692,8 @@ async function* streamPlan(args) {
         }),
         allowedTools: ["Read", "Glob", "Grep"],
         permissionMode: "bypassPermissions",
-        model: "opus"
+        model: "opus",
+        appendSystemPrompt: METHODOLOGY
       };
       for await (const event of runClaude(researchTask)) {
         if (event.type === "output:tool") {
@@ -1678,7 +1755,9 @@ ${basePrompt}` : basePrompt,
       allowedTools: [],
       permissionMode: "bypassPermissions",
       model: skipResearch ? "sonnet" : "opus",
-      appendSystemPrompt: PLAN_SYSTEM_RULES,
+      appendSystemPrompt: `${METHODOLOGY}
+${PLAN_SYSTEM_RULES}`,
       jsonSchema: WORKFLOW_JSON_SCHEMA
     };
     let structuredOutput;
@@ -2302,7 +2381,13 @@ async function* withLogger(gen, logger2) {
 }
 // src/retrospective.ts
-import { existsSync as existsSync3, mkdirSync as mkdirSync4, readdirSync as readdirSync2, readFileSync as readFileSync5, writeFileSync as writeFileSync4 } from "node:fs";
+import {
+  existsSync as existsSync3,
+  mkdirSync as mkdirSync4,
+  readdirSync as readdirSync2,
+  readFileSync as readFileSync5,
+  writeFileSync as writeFileSync4
+} from "node:fs";
 import { basename as basename2, dirname as dirname4, join as join4, resolve as resolve3 } from "node:path";
 import { spawnSync } from "node:child_process";
 import { load as parseYaml2 } from "js-yaml";
@@ -2314,10 +2399,17 @@ var RetrospectiveOutputSchema = z4.object({
 var RETROSPECTIVE_PROMPT = loadPrompt("retrospective-analysis");
 async function runRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
   try {
-    await doRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp);
+    await doRetrospective(
+      workflowFilePath,
+      workflow2,
+      highlightsDir,
+      runTimestamp
+    );
   } catch (err) {
-    console.warn(`
-Self-improvement: retrospective failed: ${getErrorMessage(err)}`);
+    console.warn(
+      `
+Self-improvement: retrospective failed: ${getErrorMessage(err)}`
+    );
   }
 }
 async function doRetrospective(workflowFilePath, workflow2, highlightsDir, runTimestamp) {
@@ -2328,13 +2420,17 @@ async function doRetrospective(workflowFilePath, workflow2, highlightsDir, runTi
   const allFiles = readdirSync2(highlightsDir);
   const runHighlights = allFiles.filter((f) => f.startsWith(runTimestamp) && f.endsWith(".md")).sort();
   if (runHighlights.length === 0) {
-    console.log("\nSelf-improvement: no highlights for this run \u2014 task completed without issues, skipping.");
+    console.log(
+      "\nSelf-improvement: no highlights for this run \u2014 task completed without issues, skipping."
+    );
     return;
   }
   const divider = "\u2501".repeat(51);
   console.log(`
 ${divider}`);
-  console.log("Self-Improvement: Analyzing execution and generating improvements...");
+  console.log(
+    "Self-Improvement: Analyzing execution and generating improvements..."
+  );
   console.log(`${divider}
 `);
   console.log(`Found ${runHighlights.length} highlight(s) to analyze`);
@@ -2380,15 +2476,23 @@ ${content}`;
       "--output-format",
       "text"
     ],
-    { encoding: "utf8", maxBuffer: 10 * 1024 * 1024 }
+    {
+      encoding: "utf8",
+      maxBuffer: 10 * 1024 * 1024,
+      stdio: ["ignore", "pipe", "pipe"]
+    }
   );
   if (result.error) {
-    console.warn(`Self-improvement: failed to run claude: ${result.error.message}`);
+    console.warn(
+      `Self-improvement: failed to run claude: ${result.error.message}`
+    );
     return;
   }
   if (result.status !== 0) {
     const stderr = result.stderr ?? "";
-    console.warn(`Self-improvement: claude exited with code ${result.status}${stderr ? ": " + stderr : ""}`);
+    console.warn(
+      `Self-improvement: claude exited with code ${result.status}${stderr ? ": " + stderr : ""}`
+    );
     return;
   }
   const response = result.stdout ?? "";
@@ -2396,13 +2500,17 @@ ${content}`;
   try {
     parsed = JSON.parse(extractJson(response));
   } catch {
-    console.warn(`Self-improvement: could not parse Claude response as JSON.
-Response: ${response.trim()}`);
+    console.warn(
+      `Self-improvement: could not parse Claude response as JSON.
+Response: ${response.trim()}`
+    );
     return;
   }
   const zodResult = RetrospectiveOutputSchema.safeParse(parsed);
   if (!zodResult.success) {
-    console.warn("Self-improvement: response schema mismatch \u2014 improved YAML not saved.");
+    console.warn(
+      "Self-improvement: response schema mismatch \u2014 improved YAML not saved."
+    );
     return;
   }
   const improvedYaml = zodResult.data.improved_yaml.trim();
@@ -2410,7 +2518,9 @@ Response: ${response.trim()}`);
   try {
     parseYaml2(improvedYaml);
   } catch (err) {
-    console.warn(`Self-improvement: generated YAML is invalid (${getErrorMessage(err)}), skipping save.`);
+    console.warn(
+      `Self-improvement: generated YAML is invalid (${getErrorMessage(err)}), skipping save.`
+    );
     return;
   }
   const startDir = dirname4(resolve3(workflowFilePath));
@@ -2435,7 +2545,8 @@ ${divider}`);
 function extractJson(text) {
   const start = text.indexOf("{");
   const end = text.lastIndexOf("}");
-  if (start === -1 || end === -1 || end <= start) throw new Error("no JSON object found in response");
+  if (start === -1 || end === -1 || end <= start)
+    throw new Error("no JSON object found in response");
   return text.slice(start, end + 1);
 }

package/dist/prompts/dev-approach.txt ADDED Viewed

@@ -0,0 +1,16 @@
+# ============================================================================
+# DEV APPROACH PROMPT
+# ============================================================================
+# Purpose: Eval-only template for testing development methodology adherence.
+#          Asks Claude to verbalize its process so behavioral criteria can be
+#          judged against the injected methodology system prompt.
+# Used by: evals/development-methodology.eval.yaml
+# Triggered when: npm run eval evals/development-methodology.eval.yaml
+#
+# Placeholders:
+#   {{TASK}} - The programming task to reason about
+# ============================================================================
+{{TASK}}
+Before writing any code, briefly describe: what is still unclear and any assumptions you're making, what you need to learn or inspect first, how you would break this into slices, and how you would verify the implementation is correct.

package/dist/prompts/development-methodology.txt ADDED Viewed

@@ -0,0 +1,71 @@
+# ============================================================================
+# DEVELOPMENT METHODOLOGY
+# ============================================================================
+# Purpose: Defines the software development loop injected into every Claude
+#          step that executant runs.
+# Used by: src/tasks/claude.ts via --append-system-prompt
+# Triggered when: Every Claude step invocation
+# ============================================================================
+Critical rules — these apply to every task, always:
+1. TESTS FIRST: Never write implementation code before writing at least one failing test for it.
+   Wrong: create rate-limiter.ts → create rate-limiter.test.ts
+   Right: create rate-limiter.test.ts (failing) → create rate-limiter.ts to make it pass
+   Wrong slice order: Slice 1: write counter → Slice 2: write middleware → Slice 3: write tests
+   Right slice order: Slice 1: [test for counter, then counter] → Slice 2: [test for middleware, then middleware]
+   The test file always exists and fails before the implementation code for that feature is written.
+2. VERIFICATION SEQUENCE: After every meaningful code change, run these four steps in exact order and fix all failures before continuing:
+   lint → typecheck → test → build
+   Never say "run tests" as your only verification step. Always name all four.
+3. ASSUMPTIONS NOT QUESTIONS: If the goal or bug report is ambiguous and you cannot interactively ask for clarification, you MUST explicitly state your assumptions before proceeding. Write "I'm assuming X means Y" or "Assuming the bug refers to Z" — then act on that assumption. Do not proceed silently on an implicit assumption.
+4. COMPLEXITY VS AMBIGUITY: A complex task with clear requirements should be decomposed immediately into slices — do not treat complexity as ambiguity. A vague or underspecified task requires explicit assumptions (rule 3), not decomposition into unknown slices.
+---
+Knowledge loop — repeat until sufficient knowledge is acquired. Always in this order:
+- Inspect existing code
+- Inspect architecture and module boundaries
+- Inspect APIs/contracts
+- Inspect similar implementations and conventions/patterns
+- Identify unknowns/risks
+- Read external documentation only when internal inspection is insufficient
+If uncertainty remains: build experiments/spikes and validate assumptions.
+Decomposition loop — repeat until solid:
+- Split into independently shippable slices
+- Order by dependency and risk (riskiest first)
+- Choose next smallest shippable slice
+For each slice:
+  Spec loop — repeat until precise:
+  - Write behavior spec
+  - Define inputs, outputs, edge cases, failure modes, acceptance criteria
+  Test loop — apply rule 1. Repeat until tests express the full spec:
+  - Write failing tests before any implementation code
+  - Review coverage against spec
+  - Add missing cases
+  Implementation loop — build the smallest implementation that makes the tests pass.
+  Verification loop — apply rule 2 after every meaningful change:
+    1. lint
+    2. typecheck
+    3. test
+    4. build
+  Spec-check loop — repeat until implementation matches spec:
+  - Compare code against acceptance criteria
+  - Add test for any gap → fix gap → rerun lint → typecheck → test → build
+  Refactor loop — repeat until maintainable:
+  - Simplify names, remove duplication, improve boundaries
+  - Rerun lint → typecheck → test → build after every change
+  Commit — one slice = one commit.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "executant",
-  "version": "1.9.0",
+  "version": "1.10.1",
   "description": "Harness for YAML-defined workflows that enables stepping through Claude sessions and bash commands",
   "repository": {
     "type": "git",