npm - codeharness - Versions diffs - 0.30.1 → 0.31.1 - Mend

codeharness 0.30.1 → 0.31.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +3 -3
package/dist/{chunk-QLY7NJIB.js → chunk-INMK5DZS.js} +1 -1
package/dist/{docker-P65B7Z3S.js → docker-2Z4EIH3U.js} +1 -1
package/dist/index.js +53 -109
package/package.json +1 -1
package/templates/agents/checker.yaml +65 -0
package/templates/workflows/default.yaml +8 -0

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@ codeharness is an **npm CLI** + **Claude Code plugin** that packages verificatio
 1. **Verifies features work** — not just that tests pass. Black-box verification runs the built CLI inside a Docker container with no source code access. If the feature doesn't work from a user's perspective, verification fails.
 2. **Fixes what it finds** — verification failures with code bugs automatically return to development with specific findings. The dev agent gets told exactly what's broken and why.
-3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, reviews it, verifies it, and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
+3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, checks it (tests + lint), verifies it (agent evaluation), and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
 4. **Makes agents see runtime** — ephemeral VictoriaMetrics stack (logs, metrics, traces) that agents query programmatically during development. No guessing at what the code does at runtime.
 ## Installation
@@ -61,7 +61,7 @@ The plugin provides slash commands that orchestrate the CLI within Claude Code s
 | Command | Purpose |
 |---------|---------|
-| `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → dev → review → verify loop |
+| `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → implement → check → verify loop |
 | `/harness-init` | Interactive project initialization |
 | `/harness-status` | Quick overview of sprint progress and harness health |
 | `/harness-onboard` | Scan project and generate onboarding plan |
@@ -84,7 +84,7 @@ codeharness integrates with [BMAD Method](https://github.com/bmadcode/BMAD-METHO
 ┌─────────────────────────────────────────┐
 │  Claude Code Session                     │
 │  /harness-run picks next story           │
-│  → create-story → dev → review → verify  │
+│  → create-story → implement → check → verify │
 └────────────────────┬────────────────────┘
                      │ verify
                      ▼

package/dist/{chunk-QLY7NJIB.js → chunk-INMK5DZS.js} RENAMED Viewed

@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
 }
 // src/modules/infra/init-project.ts
-var HARNESS_VERSION = true ? "0.30.1" : "0.0.0-dev";
+var HARNESS_VERSION = true ? "0.31.1" : "0.0.0-dev";
 function failResult(opts, error) {
   return {
     status: "fail",

package/dist/{docker-P65B7Z3S.js → docker-2Z4EIH3U.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   stopCollectorOnly,
   stopSharedStack,
   stopStack
-} from "./chunk-QLY7NJIB.js";
+} from "./chunk-INMK5DZS.js";
 export {
   checkRemoteEndpoint,
   cleanupOrphanedContainers,

package/dist/index.js CHANGED Viewed

@@ -40,7 +40,7 @@ import {
   validateDockerfile,
   warn,
   writeState
-} from "./chunk-QLY7NJIB.js";
+} from "./chunk-INMK5DZS.js";
 // src/index.ts
 import { Command } from "commander";
@@ -5101,23 +5101,10 @@ import { Box as Box7, Static, Text as Text7, useInput } from "ink";
 // src/lib/ink-workflow.tsx
 import { Text as Text2, Box as Box2 } from "ink";
 import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
-var termWidth = () => Math.min(process.stdout.columns || 60, 80);
 var SPINNER_FRAMES = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
 function isLoopBlock2(step) {
   return typeof step === "object" && step !== null && "loop" in step;
 }
-function formatCost(costUsd) {
-  if (costUsd == null) return "...";
-  return `$${costUsd.toFixed(2)}`;
-}
-function formatElapsed2(ms) {
-  if (ms == null) return "...";
-  const seconds = Math.round(ms / 1e3);
-  if (seconds >= 60) {
-    return `${Math.floor(seconds / 60)}m`;
-  }
-  return `${seconds}s`;
-}
 function TaskNode({ name, status, spinnerFrame }) {
   const s = status ?? "pending";
   switch (s) {
@@ -5151,17 +5138,6 @@ function loopIteration(tasks, taskStates) {
   });
   return anyStarted ? 1 : 0;
 }
-function collectTaskNames(flow) {
-  const names = [];
-  for (const step of flow) {
-    if (isLoopBlock2(step)) {
-      names.push(...step.loop);
-    } else {
-      names.push(step);
-    }
-  }
-  return names;
-}
 function hasMetaData(taskMeta) {
   if (!taskMeta) return false;
   return Object.keys(taskMeta).length > 0;
@@ -5207,69 +5183,10 @@ function WorkflowGraph({ flow, currentTask, taskStates, taskMeta }) {
       );
     }
   }
-  let driverRow = null;
-  let costRow = null;
-  if (showMeta) {
-    const taskNames = collectTaskNames(flow);
-    const driverParts = [];
-    const costParts = [];
-    let hasAnyCost = false;
-    for (const name of taskNames) {
-      const m = meta[name];
-      const driver = m?.driver ?? "";
-      driverParts.push(driver);
-      const state = taskStates[name];
-      if (state === "done") {
-        const costStr = formatCost(m?.costUsd);
-        const timeStr = formatElapsed2(m?.elapsedMs);
-        costParts.push(`${costStr} / ${timeStr}`);
-        hasAnyCost = true;
-      } else {
-        costParts.push("");
-      }
-    }
-    const hasSomeDriver = driverParts.some((d) => d.length > 0);
-    if (hasSomeDriver) {
-      const driverLabels = [];
-      for (let idx = 0; idx < taskNames.length; idx++) {
-        if (idx > 0) {
-          driverLabels.push(/* @__PURE__ */ jsx2(Text2, { children: "   " }, `drv-sep-${idx}`));
-        }
-        driverLabels.push(
-          /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: driverParts[idx] || " " }, `drv-${idx}`)
-        );
-      }
-      driverRow = /* @__PURE__ */ jsxs2(Text2, { children: [
-        "  ",
-        driverLabels
-      ] });
-    }
-    if (hasAnyCost) {
-      const costLabels = [];
-      for (let idx = 0; idx < taskNames.length; idx++) {
-        if (idx > 0) {
-          costLabels.push(/* @__PURE__ */ jsx2(Text2, { children: "   " }, `cost-sep-${idx}`));
-        }
-        costLabels.push(
-          /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: costParts[idx] || " " }, `cost-${idx}`)
-        );
-      }
-      costRow = /* @__PURE__ */ jsxs2(Text2, { children: [
-        "  ",
-        costLabels
-      ] });
-    }
-  }
-  return /* @__PURE__ */ jsxs2(Box2, { flexDirection: "column", children: [
-    /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) }),
-    /* @__PURE__ */ jsxs2(Text2, { children: [
-      "  ",
-      elements
-    ] }),
-    driverRow,
-    costRow,
-    /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) })
-  ] });
+  return /* @__PURE__ */ jsx2(Box2, { flexDirection: "column", children: /* @__PURE__ */ jsxs2(Text2, { children: [
+    "  ",
+    elements
+  ] }) });
 }
 // src/lib/ink-lane-container.tsx
@@ -5447,7 +5364,7 @@ import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
 function formatConflictText(count) {
   return count === 1 ? "1 conflict" : `${count} conflicts`;
 }
-function formatCost2(cost) {
+function formatCost(cost) {
   return `$${cost.toFixed(2)}`;
 }
 function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes }) {
@@ -5473,7 +5390,7 @@ function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes })
       /* @__PURE__ */ jsx5(Text5, { children: " \u2502 " }),
       /* @__PURE__ */ jsx5(Text5, { dimColor: true, children: `Pending: ${pendingSection}` })
     ] }),
-    completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost2(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
+    completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
   ] });
 }
@@ -5606,11 +5523,7 @@ function Separator() {
   const width = process.stdout.columns || 60;
   return /* @__PURE__ */ jsx8(Text8, { children: "\u2501".repeat(width) });
 }
-function shortKey(key) {
-  const m = key.match(/^(\d+-\d+)/);
-  return m ? m[1] : key;
-}
-function formatCost3(cost) {
+function formatCost2(cost) {
   return `$${cost.toFixed(2)}`;
 }
 function Header({ info: info3, laneCount }) {
@@ -5619,7 +5532,7 @@ function Header({ info: info3, laneCount }) {
   if (laneCount != null && laneCount > 1) parts.push(`${laneCount} lanes`);
   if (info3.elapsed) parts.push(`${info3.elapsed} elapsed`);
   const displayCost = laneCount != null && laneCount > 1 && info3.laneTotalCost != null ? info3.laneTotalCost : info3.totalCost;
-  if (displayCost != null) parts.push(`${formatCost3(displayCost)} spent`);
+  if (displayCost != null) parts.push(`${formatCost2(displayCost)} spent`);
   const left = parts.join(" | ");
   const right = "[q to quit]";
   const width = process.stdout.columns || 80;
@@ -5654,9 +5567,9 @@ function EpicInfo({ info: info3 }) {
 function StoryContext({ entries }) {
   if (entries.length === 0) return null;
   return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: entries.map((e, i) => {
-    if (e.role === "prev") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "green", children: `  Prev: ${shortKey(e.key)} \u2713` }) }, i);
-    if (e.role === "current") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "cyan", children: `  This: ${shortKey(e.key)} \u25C6 ${e.task ?? ""}` }) }, i);
-    return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { dimColor: true, children: `  Next: ${shortKey(e.key)}` }) }, i);
+    if (e.role === "prev") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "green", children: `  Prev: ${e.key} \u2713` }) }, i);
+    if (e.role === "current") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "cyan", children: `  This: ${e.key} \u25C6 ${e.task ?? ""}` }) }, i);
+    return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { dimColor: true, children: `  Next: ${e.key}` }) }, i);
   }) });
 }
@@ -10559,7 +10472,7 @@ async function handleDockerCheck(isJson) {
     }
   }
 }
-function formatElapsed3(ms) {
+function formatElapsed2(ms) {
   const s = Math.floor(ms / 1e3);
   const h = Math.floor(s / 3600);
   const m = Math.floor(s % 3600 / 60);
@@ -10579,7 +10492,7 @@ function printWorkflowState() {
   console.log(`  Tasks completed: ${state.tasks_completed.length}`);
   if (state.phase === "executing" && state.started) {
     const elapsed = Date.now() - Date.parse(state.started);
-    console.log(`  Elapsed: ${formatElapsed3(elapsed)}`);
+    console.log(`  Elapsed: ${formatElapsed2(elapsed)}`);
   }
   if (state.evaluator_scores.length > 0) {
     const latest = state.evaluator_scores[state.evaluator_scores.length - 1];
@@ -10604,7 +10517,7 @@ function getWorkflowStateData() {
   };
   if (state.phase === "executing" && state.started) {
     data.elapsed_ms = Date.now() - Date.parse(state.started);
-    data.elapsed = formatElapsed3(data.elapsed_ms);
+    data.elapsed = formatElapsed2(data.elapsed_ms);
   }
   return data;
 }
@@ -11256,7 +11169,7 @@ function registerTeardownCommand(program) {
     } else if (otlpMode === "remote-routed") {
       if (!options.keepDocker) {
         try {
-          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-P65B7Z3S.js");
+          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-2Z4EIH3U.js");
           stopCollectorOnly2();
           result.docker.stopped = true;
           if (!isJson) {
@@ -11288,7 +11201,7 @@ function registerTeardownCommand(program) {
         info("Shared stack: kept running (other projects may use it)");
       }
     } else if (isLegacyStack) {
-      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-P65B7Z3S.js");
+      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-2Z4EIH3U.js");
       let stackRunning = false;
       try {
         stackRunning = isStackRunning2(composeFile);
@@ -13741,6 +13654,36 @@ function parseLine(line) {
     return null;
   }
   const type = parsed.type;
+  const item = parsed.item;
+  if (type === "item.started" && item) {
+    const itemType = item.type;
+    if (itemType === "command_execution") {
+      const cmd = item.command;
+      return { type: "tool-start", name: "Bash", id: item.id ?? "" };
+    }
+    if (itemType === "file_edit") {
+      return { type: "tool-start", name: "Edit", id: item.id ?? "" };
+    }
+    if (itemType === "file_read") {
+      return { type: "tool-start", name: "Read", id: item.id ?? "" };
+    }
+    return null;
+  }
+  if (type === "item.completed" && item) {
+    const itemType = item.type;
+    if (itemType === "command_execution") {
+      const cmd = item.command;
+      return { type: "tool-complete" };
+    }
+    if (itemType === "agent_message") {
+      const text = item.text;
+      if (text) return { type: "text", text };
+    }
+    if (itemType === "file_edit" || itemType === "file_read") {
+      return { type: "tool-complete" };
+    }
+    return null;
+  }
   if (type === "tool_call") {
     const name = parsed.name;
     const callId = parsed.call_id;
@@ -13829,12 +13772,13 @@ var CodexDriver = class {
         opts.plugins
       );
     }
-    const args = [];
-    if (opts.model) {
-      args.push("--model", opts.model);
+    const args = ["exec", "--json"];
+    const model = opts.model && !opts.model.startsWith("claude-") ? opts.model : void 0;
+    if (model) {
+      args.push("--model", model);
     }
     if (opts.cwd) {
-      args.push("--cwd", opts.cwd);
+      args.push("--cd", opts.cwd);
     }
     args.push(opts.prompt);
     let yieldedResult = false;
@@ -14184,7 +14128,7 @@ function registerDriversCommand(program) {
 }
 // src/index.ts
-var VERSION = true ? "0.30.1" : "0.0.0-dev";
+var VERSION = true ? "0.31.1" : "0.0.0-dev";
 function createProgram() {
   const program = new Command();
   program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codeharness",
-  "version": "0.30.1",
+  "version": "0.31.1",
   "type": "module",
   "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
   "bin": {

package/templates/agents/checker.yaml ADDED Viewed

@@ -0,0 +1,65 @@
+name: checker
+role:
+  title: Automated Checker
+  purpose: Run tests, linter, and coverage checks — report pass/fail objectively
+persona:
+  identity: |
+    CI bot that runs the project's test suite, linter, and coverage tool.
+    Reports results objectively — no interpretation, no fixes, just facts.
+  communication_style: "Machine-like. Commands run, output captured, pass/fail reported."
+  principles:
+    - Run the project's actual test command (npm test, pytest, cargo test, etc.)
+    - Run the project's linter if configured (eslint, ruff, clippy, etc.)
+    - Check coverage against target if configured
+    - Report exact command, exit code, and output for each check
+    - Never fix code — only report results
+prompt_template: |
+  ## Role
+  You are running automated checks on the implementation. Run tests, linter, and coverage. Report results.
+  ## Process
+  1. **Detect check commands** from the project (package.json scripts, pyproject.toml, Makefile, etc.)
+  2. **Run tests**: execute the test command, capture output and exit code
+  3. **Run linter**: execute the lint command if available
+  4. **Check coverage**: if a coverage target exists, verify it's met
+  ## Output Format
+  Output a single JSON object:
+  ```json
+  {
+    "verdict": "pass" | "fail",
+    "checks": [
+      {
+        "name": "tests",
+        "command": "npm test",
+        "exit_code": 0,
+        "passed": true,
+        "summary": "42 tests passed"
+      },
+      {
+        "name": "lint",
+        "command": "npm run lint",
+        "exit_code": 0,
+        "passed": true,
+        "summary": "no issues"
+      },
+      {
+        "name": "coverage",
+        "command": "npm run coverage",
+        "exit_code": 0,
+        "passed": true,
+        "summary": "98% (target: 100%)"
+      }
+    ]
+  }
+  ```
+  Verdict is "pass" only if ALL checks pass.
+  ## Output Location
+  Write results to ./verdict/check.json

package/templates/workflows/default.yaml CHANGED Viewed

@@ -11,6 +11,12 @@ tasks:
     session: fresh
     source_access: true
     model: claude-sonnet-4-6
+  check:
+    agent: checker
+    scope: per-story
+    session: fresh
+    source_access: true
+    driver: codex
   review:
     agent: reviewer
     scope: per-story
@@ -39,10 +45,12 @@ tasks:
 flow:
   - create-story
   - implement
+  - check
   - review
   - verify
   - loop:
       - retry
+      - check
       - review
       - verify
   - retro