codeharness 0.30.1 → 0.31.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ codeharness is an **npm CLI** + **Claude Code plugin** that packages verificatio
8
8
 
9
9
  1. **Verifies features work** — not just that tests pass. Black-box verification runs the built CLI inside a Docker container with no source code access. If the feature doesn't work from a user's perspective, verification fails.
10
10
  2. **Fixes what it finds** — verification failures with code bugs automatically return to development with specific findings. The dev agent gets told exactly what's broken and why.
11
- 3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, reviews it, verifies it, and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
11
+ 3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, checks it (tests + lint), verifies it (agent evaluation), and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
12
12
  4. **Makes agents see runtime** — ephemeral VictoriaMetrics stack (logs, metrics, traces) that agents query programmatically during development. No guessing at what the code does at runtime.
13
13
 
14
14
  ## Installation
@@ -61,7 +61,7 @@ The plugin provides slash commands that orchestrate the CLI within Claude Code s
61
61
 
62
62
  | Command | Purpose |
63
63
  |---------|---------|
64
- | `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → devreview → verify loop |
64
+ | `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → implementcheck → verify loop |
65
65
  | `/harness-init` | Interactive project initialization |
66
66
  | `/harness-status` | Quick overview of sprint progress and harness health |
67
67
  | `/harness-onboard` | Scan project and generate onboarding plan |
@@ -84,7 +84,7 @@ codeharness integrates with [BMAD Method](https://github.com/bmadcode/BMAD-METHO
84
84
  ┌─────────────────────────────────────────┐
85
85
  │ Claude Code Session │
86
86
  │ /harness-run picks next story │
87
- │ → create-story → devreview → verify
87
+ │ → create-story → implementcheck → verify
88
88
  └────────────────────┬────────────────────┘
89
89
  │ verify
90
90
 
@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
2895
2895
  }
2896
2896
 
2897
2897
  // src/modules/infra/init-project.ts
2898
- var HARNESS_VERSION = true ? "0.30.1" : "0.0.0-dev";
2898
+ var HARNESS_VERSION = true ? "0.31.1" : "0.0.0-dev";
2899
2899
  function failResult(opts, error) {
2900
2900
  return {
2901
2901
  status: "fail",
@@ -16,7 +16,7 @@ import {
16
16
  stopCollectorOnly,
17
17
  stopSharedStack,
18
18
  stopStack
19
- } from "./chunk-QLY7NJIB.js";
19
+ } from "./chunk-INMK5DZS.js";
20
20
  export {
21
21
  checkRemoteEndpoint,
22
22
  cleanupOrphanedContainers,
package/dist/index.js CHANGED
@@ -40,7 +40,7 @@ import {
40
40
  validateDockerfile,
41
41
  warn,
42
42
  writeState
43
- } from "./chunk-QLY7NJIB.js";
43
+ } from "./chunk-INMK5DZS.js";
44
44
 
45
45
  // src/index.ts
46
46
  import { Command } from "commander";
@@ -5101,23 +5101,10 @@ import { Box as Box7, Static, Text as Text7, useInput } from "ink";
5101
5101
  // src/lib/ink-workflow.tsx
5102
5102
  import { Text as Text2, Box as Box2 } from "ink";
5103
5103
  import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
5104
- var termWidth = () => Math.min(process.stdout.columns || 60, 80);
5105
5104
  var SPINNER_FRAMES = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
5106
5105
  function isLoopBlock2(step) {
5107
5106
  return typeof step === "object" && step !== null && "loop" in step;
5108
5107
  }
5109
- function formatCost(costUsd) {
5110
- if (costUsd == null) return "...";
5111
- return `$${costUsd.toFixed(2)}`;
5112
- }
5113
- function formatElapsed2(ms) {
5114
- if (ms == null) return "...";
5115
- const seconds = Math.round(ms / 1e3);
5116
- if (seconds >= 60) {
5117
- return `${Math.floor(seconds / 60)}m`;
5118
- }
5119
- return `${seconds}s`;
5120
- }
5121
5108
  function TaskNode({ name, status, spinnerFrame }) {
5122
5109
  const s = status ?? "pending";
5123
5110
  switch (s) {
@@ -5151,17 +5138,6 @@ function loopIteration(tasks, taskStates) {
5151
5138
  });
5152
5139
  return anyStarted ? 1 : 0;
5153
5140
  }
5154
- function collectTaskNames(flow) {
5155
- const names = [];
5156
- for (const step of flow) {
5157
- if (isLoopBlock2(step)) {
5158
- names.push(...step.loop);
5159
- } else {
5160
- names.push(step);
5161
- }
5162
- }
5163
- return names;
5164
- }
5165
5141
  function hasMetaData(taskMeta) {
5166
5142
  if (!taskMeta) return false;
5167
5143
  return Object.keys(taskMeta).length > 0;
@@ -5207,69 +5183,10 @@ function WorkflowGraph({ flow, currentTask, taskStates, taskMeta }) {
5207
5183
  );
5208
5184
  }
5209
5185
  }
5210
- let driverRow = null;
5211
- let costRow = null;
5212
- if (showMeta) {
5213
- const taskNames = collectTaskNames(flow);
5214
- const driverParts = [];
5215
- const costParts = [];
5216
- let hasAnyCost = false;
5217
- for (const name of taskNames) {
5218
- const m = meta[name];
5219
- const driver = m?.driver ?? "";
5220
- driverParts.push(driver);
5221
- const state = taskStates[name];
5222
- if (state === "done") {
5223
- const costStr = formatCost(m?.costUsd);
5224
- const timeStr = formatElapsed2(m?.elapsedMs);
5225
- costParts.push(`${costStr} / ${timeStr}`);
5226
- hasAnyCost = true;
5227
- } else {
5228
- costParts.push("");
5229
- }
5230
- }
5231
- const hasSomeDriver = driverParts.some((d) => d.length > 0);
5232
- if (hasSomeDriver) {
5233
- const driverLabels = [];
5234
- for (let idx = 0; idx < taskNames.length; idx++) {
5235
- if (idx > 0) {
5236
- driverLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `drv-sep-${idx}`));
5237
- }
5238
- driverLabels.push(
5239
- /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: driverParts[idx] || " " }, `drv-${idx}`)
5240
- );
5241
- }
5242
- driverRow = /* @__PURE__ */ jsxs2(Text2, { children: [
5243
- " ",
5244
- driverLabels
5245
- ] });
5246
- }
5247
- if (hasAnyCost) {
5248
- const costLabels = [];
5249
- for (let idx = 0; idx < taskNames.length; idx++) {
5250
- if (idx > 0) {
5251
- costLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `cost-sep-${idx}`));
5252
- }
5253
- costLabels.push(
5254
- /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: costParts[idx] || " " }, `cost-${idx}`)
5255
- );
5256
- }
5257
- costRow = /* @__PURE__ */ jsxs2(Text2, { children: [
5258
- " ",
5259
- costLabels
5260
- ] });
5261
- }
5262
- }
5263
- return /* @__PURE__ */ jsxs2(Box2, { flexDirection: "column", children: [
5264
- /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) }),
5265
- /* @__PURE__ */ jsxs2(Text2, { children: [
5266
- " ",
5267
- elements
5268
- ] }),
5269
- driverRow,
5270
- costRow,
5271
- /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) })
5272
- ] });
5186
+ return /* @__PURE__ */ jsx2(Box2, { flexDirection: "column", children: /* @__PURE__ */ jsxs2(Text2, { children: [
5187
+ " ",
5188
+ elements
5189
+ ] }) });
5273
5190
  }
5274
5191
 
5275
5192
  // src/lib/ink-lane-container.tsx
@@ -5447,7 +5364,7 @@ import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
5447
5364
  function formatConflictText(count) {
5448
5365
  return count === 1 ? "1 conflict" : `${count} conflicts`;
5449
5366
  }
5450
- function formatCost2(cost) {
5367
+ function formatCost(cost) {
5451
5368
  return `$${cost.toFixed(2)}`;
5452
5369
  }
5453
5370
  function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes }) {
@@ -5473,7 +5390,7 @@ function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes })
5473
5390
  /* @__PURE__ */ jsx5(Text5, { children: " \u2502 " }),
5474
5391
  /* @__PURE__ */ jsx5(Text5, { dimColor: true, children: `Pending: ${pendingSection}` })
5475
5392
  ] }),
5476
- completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost2(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
5393
+ completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
5477
5394
  ] });
5478
5395
  }
5479
5396
 
@@ -5606,11 +5523,7 @@ function Separator() {
5606
5523
  const width = process.stdout.columns || 60;
5607
5524
  return /* @__PURE__ */ jsx8(Text8, { children: "\u2501".repeat(width) });
5608
5525
  }
5609
- function shortKey(key) {
5610
- const m = key.match(/^(\d+-\d+)/);
5611
- return m ? m[1] : key;
5612
- }
5613
- function formatCost3(cost) {
5526
+ function formatCost2(cost) {
5614
5527
  return `$${cost.toFixed(2)}`;
5615
5528
  }
5616
5529
  function Header({ info: info3, laneCount }) {
@@ -5619,7 +5532,7 @@ function Header({ info: info3, laneCount }) {
5619
5532
  if (laneCount != null && laneCount > 1) parts.push(`${laneCount} lanes`);
5620
5533
  if (info3.elapsed) parts.push(`${info3.elapsed} elapsed`);
5621
5534
  const displayCost = laneCount != null && laneCount > 1 && info3.laneTotalCost != null ? info3.laneTotalCost : info3.totalCost;
5622
- if (displayCost != null) parts.push(`${formatCost3(displayCost)} spent`);
5535
+ if (displayCost != null) parts.push(`${formatCost2(displayCost)} spent`);
5623
5536
  const left = parts.join(" | ");
5624
5537
  const right = "[q to quit]";
5625
5538
  const width = process.stdout.columns || 80;
@@ -5654,9 +5567,9 @@ function EpicInfo({ info: info3 }) {
5654
5567
  function StoryContext({ entries }) {
5655
5568
  if (entries.length === 0) return null;
5656
5569
  return /* @__PURE__ */ jsx8(Box8, { flexDirection: "column", children: entries.map((e, i) => {
5657
- if (e.role === "prev") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "green", children: ` Prev: ${shortKey(e.key)} \u2713` }) }, i);
5658
- if (e.role === "current") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "cyan", children: ` This: ${shortKey(e.key)} \u25C6 ${e.task ?? ""}` }) }, i);
5659
- return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { dimColor: true, children: ` Next: ${shortKey(e.key)}` }) }, i);
5570
+ if (e.role === "prev") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "green", children: ` Prev: ${e.key} \u2713` }) }, i);
5571
+ if (e.role === "current") return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { color: "cyan", children: ` This: ${e.key} \u25C6 ${e.task ?? ""}` }) }, i);
5572
+ return /* @__PURE__ */ jsx8(Text8, { children: /* @__PURE__ */ jsx8(Text8, { dimColor: true, children: ` Next: ${e.key}` }) }, i);
5660
5573
  }) });
5661
5574
  }
5662
5575
 
@@ -10559,7 +10472,7 @@ async function handleDockerCheck(isJson) {
10559
10472
  }
10560
10473
  }
10561
10474
  }
10562
- function formatElapsed3(ms) {
10475
+ function formatElapsed2(ms) {
10563
10476
  const s = Math.floor(ms / 1e3);
10564
10477
  const h = Math.floor(s / 3600);
10565
10478
  const m = Math.floor(s % 3600 / 60);
@@ -10579,7 +10492,7 @@ function printWorkflowState() {
10579
10492
  console.log(` Tasks completed: ${state.tasks_completed.length}`);
10580
10493
  if (state.phase === "executing" && state.started) {
10581
10494
  const elapsed = Date.now() - Date.parse(state.started);
10582
- console.log(` Elapsed: ${formatElapsed3(elapsed)}`);
10495
+ console.log(` Elapsed: ${formatElapsed2(elapsed)}`);
10583
10496
  }
10584
10497
  if (state.evaluator_scores.length > 0) {
10585
10498
  const latest = state.evaluator_scores[state.evaluator_scores.length - 1];
@@ -10604,7 +10517,7 @@ function getWorkflowStateData() {
10604
10517
  };
10605
10518
  if (state.phase === "executing" && state.started) {
10606
10519
  data.elapsed_ms = Date.now() - Date.parse(state.started);
10607
- data.elapsed = formatElapsed3(data.elapsed_ms);
10520
+ data.elapsed = formatElapsed2(data.elapsed_ms);
10608
10521
  }
10609
10522
  return data;
10610
10523
  }
@@ -11256,7 +11169,7 @@ function registerTeardownCommand(program) {
11256
11169
  } else if (otlpMode === "remote-routed") {
11257
11170
  if (!options.keepDocker) {
11258
11171
  try {
11259
- const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-P65B7Z3S.js");
11172
+ const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-2Z4EIH3U.js");
11260
11173
  stopCollectorOnly2();
11261
11174
  result.docker.stopped = true;
11262
11175
  if (!isJson) {
@@ -11288,7 +11201,7 @@ function registerTeardownCommand(program) {
11288
11201
  info("Shared stack: kept running (other projects may use it)");
11289
11202
  }
11290
11203
  } else if (isLegacyStack) {
11291
- const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-P65B7Z3S.js");
11204
+ const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-2Z4EIH3U.js");
11292
11205
  let stackRunning = false;
11293
11206
  try {
11294
11207
  stackRunning = isStackRunning2(composeFile);
@@ -13741,6 +13654,36 @@ function parseLine(line) {
13741
13654
  return null;
13742
13655
  }
13743
13656
  const type = parsed.type;
13657
+ const item = parsed.item;
13658
+ if (type === "item.started" && item) {
13659
+ const itemType = item.type;
13660
+ if (itemType === "command_execution") {
13661
+ const cmd = item.command;
13662
+ return { type: "tool-start", name: "Bash", id: item.id ?? "" };
13663
+ }
13664
+ if (itemType === "file_edit") {
13665
+ return { type: "tool-start", name: "Edit", id: item.id ?? "" };
13666
+ }
13667
+ if (itemType === "file_read") {
13668
+ return { type: "tool-start", name: "Read", id: item.id ?? "" };
13669
+ }
13670
+ return null;
13671
+ }
13672
+ if (type === "item.completed" && item) {
13673
+ const itemType = item.type;
13674
+ if (itemType === "command_execution") {
13675
+ const cmd = item.command;
13676
+ return { type: "tool-complete" };
13677
+ }
13678
+ if (itemType === "agent_message") {
13679
+ const text = item.text;
13680
+ if (text) return { type: "text", text };
13681
+ }
13682
+ if (itemType === "file_edit" || itemType === "file_read") {
13683
+ return { type: "tool-complete" };
13684
+ }
13685
+ return null;
13686
+ }
13744
13687
  if (type === "tool_call") {
13745
13688
  const name = parsed.name;
13746
13689
  const callId = parsed.call_id;
@@ -13829,12 +13772,13 @@ var CodexDriver = class {
13829
13772
  opts.plugins
13830
13773
  );
13831
13774
  }
13832
- const args = [];
13833
- if (opts.model) {
13834
- args.push("--model", opts.model);
13775
+ const args = ["exec", "--json"];
13776
+ const model = opts.model && !opts.model.startsWith("claude-") ? opts.model : void 0;
13777
+ if (model) {
13778
+ args.push("--model", model);
13835
13779
  }
13836
13780
  if (opts.cwd) {
13837
- args.push("--cwd", opts.cwd);
13781
+ args.push("--cd", opts.cwd);
13838
13782
  }
13839
13783
  args.push(opts.prompt);
13840
13784
  let yieldedResult = false;
@@ -14184,7 +14128,7 @@ function registerDriversCommand(program) {
14184
14128
  }
14185
14129
 
14186
14130
  // src/index.ts
14187
- var VERSION = true ? "0.30.1" : "0.0.0-dev";
14131
+ var VERSION = true ? "0.31.1" : "0.0.0-dev";
14188
14132
  function createProgram() {
14189
14133
  const program = new Command();
14190
14134
  program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.30.1",
3
+ "version": "0.31.1",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
@@ -0,0 +1,65 @@
1
+ name: checker
2
+ role:
3
+ title: Automated Checker
4
+ purpose: Run tests, linter, and coverage checks — report pass/fail objectively
5
+ persona:
6
+ identity: |
7
+ CI bot that runs the project's test suite, linter, and coverage tool.
8
+ Reports results objectively — no interpretation, no fixes, just facts.
9
+ communication_style: "Machine-like. Commands run, output captured, pass/fail reported."
10
+ principles:
11
+ - Run the project's actual test command (npm test, pytest, cargo test, etc.)
12
+ - Run the project's linter if configured (eslint, ruff, clippy, etc.)
13
+ - Check coverage against target if configured
14
+ - Report exact command, exit code, and output for each check
15
+ - Never fix code — only report results
16
+ prompt_template: |
17
+ ## Role
18
+
19
+ You are running automated checks on the implementation. Run tests, linter, and coverage. Report results.
20
+
21
+ ## Process
22
+
23
+ 1. **Detect check commands** from the project (package.json scripts, pyproject.toml, Makefile, etc.)
24
+ 2. **Run tests**: execute the test command, capture output and exit code
25
+ 3. **Run linter**: execute the lint command if available
26
+ 4. **Check coverage**: if a coverage target exists, verify it's met
27
+
28
+ ## Output Format
29
+
30
+ Output a single JSON object:
31
+
32
+ ```json
33
+ {
34
+ "verdict": "pass" | "fail",
35
+ "checks": [
36
+ {
37
+ "name": "tests",
38
+ "command": "npm test",
39
+ "exit_code": 0,
40
+ "passed": true,
41
+ "summary": "42 tests passed"
42
+ },
43
+ {
44
+ "name": "lint",
45
+ "command": "npm run lint",
46
+ "exit_code": 0,
47
+ "passed": true,
48
+ "summary": "no issues"
49
+ },
50
+ {
51
+ "name": "coverage",
52
+ "command": "npm run coverage",
53
+ "exit_code": 0,
54
+ "passed": true,
55
+ "summary": "98% (target: 100%)"
56
+ }
57
+ ]
58
+ }
59
+ ```
60
+
61
+ Verdict is "pass" only if ALL checks pass.
62
+
63
+ ## Output Location
64
+
65
+ Write results to ./verdict/check.json
@@ -11,6 +11,12 @@ tasks:
11
11
  session: fresh
12
12
  source_access: true
13
13
  model: claude-sonnet-4-6
14
+ check:
15
+ agent: checker
16
+ scope: per-story
17
+ session: fresh
18
+ source_access: true
19
+ driver: codex
14
20
  review:
15
21
  agent: reviewer
16
22
  scope: per-story
@@ -39,10 +45,12 @@ tasks:
39
45
  flow:
40
46
  - create-story
41
47
  - implement
48
+ - check
42
49
  - review
43
50
  - verify
44
51
  - loop:
45
52
  - retry
53
+ - check
46
54
  - review
47
55
  - verify
48
56
  - retro