codeharness 0.30.1 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ codeharness is an **npm CLI** + **Claude Code plugin** that packages verificatio
8
8
 
9
9
  1. **Verifies features work** — not just that tests pass. Black-box verification runs the built CLI inside a Docker container with no source code access. If the feature doesn't work from a user's perspective, verification fails.
10
10
  2. **Fixes what it finds** — verification failures with code bugs automatically return to development with specific findings. The dev agent gets told exactly what's broken and why.
11
- 3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, reviews it, verifies it, and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
11
+ 3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, checks it (tests + lint), verifies it (agent evaluation), and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
12
12
  4. **Makes agents see runtime** — ephemeral VictoriaMetrics stack (logs, metrics, traces) that agents query programmatically during development. No guessing at what the code does at runtime.
13
13
 
14
14
  ## Installation
@@ -61,7 +61,7 @@ The plugin provides slash commands that orchestrate the CLI within Claude Code s
61
61
 
62
62
  | Command | Purpose |
63
63
  |---------|---------|
64
- | `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → devreview → verify loop |
64
+ | `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → implementcheck → verify loop |
65
65
  | `/harness-init` | Interactive project initialization |
66
66
  | `/harness-status` | Quick overview of sprint progress and harness health |
67
67
  | `/harness-onboard` | Scan project and generate onboarding plan |
@@ -84,7 +84,7 @@ codeharness integrates with [BMAD Method](https://github.com/bmadcode/BMAD-METHO
84
84
  ┌─────────────────────────────────────────┐
85
85
  │ Claude Code Session │
86
86
  │ /harness-run picks next story │
87
- │ → create-story → devreview → verify
87
+ │ → create-story → implementcheck → verify
88
88
  └────────────────────┬────────────────────┘
89
89
  │ verify
90
90
 
@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
2895
2895
  }
2896
2896
 
2897
2897
  // src/modules/infra/init-project.ts
2898
- var HARNESS_VERSION = true ? "0.30.1" : "0.0.0-dev";
2898
+ var HARNESS_VERSION = true ? "0.31.0" : "0.0.0-dev";
2899
2899
  function failResult(opts, error) {
2900
2900
  return {
2901
2901
  status: "fail",
@@ -16,7 +16,7 @@ import {
16
16
  stopCollectorOnly,
17
17
  stopSharedStack,
18
18
  stopStack
19
- } from "./chunk-QLY7NJIB.js";
19
+ } from "./chunk-ITPLJVAB.js";
20
20
  export {
21
21
  checkRemoteEndpoint,
22
22
  cleanupOrphanedContainers,
package/dist/index.js CHANGED
@@ -40,7 +40,7 @@ import {
40
40
  validateDockerfile,
41
41
  warn,
42
42
  writeState
43
- } from "./chunk-QLY7NJIB.js";
43
+ } from "./chunk-ITPLJVAB.js";
44
44
 
45
45
  // src/index.ts
46
46
  import { Command } from "commander";
@@ -5101,23 +5101,10 @@ import { Box as Box7, Static, Text as Text7, useInput } from "ink";
5101
5101
  // src/lib/ink-workflow.tsx
5102
5102
  import { Text as Text2, Box as Box2 } from "ink";
5103
5103
  import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
5104
- var termWidth = () => Math.min(process.stdout.columns || 60, 80);
5105
5104
  var SPINNER_FRAMES = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
5106
5105
  function isLoopBlock2(step) {
5107
5106
  return typeof step === "object" && step !== null && "loop" in step;
5108
5107
  }
5109
- function formatCost(costUsd) {
5110
- if (costUsd == null) return "...";
5111
- return `$${costUsd.toFixed(2)}`;
5112
- }
5113
- function formatElapsed2(ms) {
5114
- if (ms == null) return "...";
5115
- const seconds = Math.round(ms / 1e3);
5116
- if (seconds >= 60) {
5117
- return `${Math.floor(seconds / 60)}m`;
5118
- }
5119
- return `${seconds}s`;
5120
- }
5121
5108
  function TaskNode({ name, status, spinnerFrame }) {
5122
5109
  const s = status ?? "pending";
5123
5110
  switch (s) {
@@ -5151,17 +5138,6 @@ function loopIteration(tasks, taskStates) {
5151
5138
  });
5152
5139
  return anyStarted ? 1 : 0;
5153
5140
  }
5154
- function collectTaskNames(flow) {
5155
- const names = [];
5156
- for (const step of flow) {
5157
- if (isLoopBlock2(step)) {
5158
- names.push(...step.loop);
5159
- } else {
5160
- names.push(step);
5161
- }
5162
- }
5163
- return names;
5164
- }
5165
5141
  function hasMetaData(taskMeta) {
5166
5142
  if (!taskMeta) return false;
5167
5143
  return Object.keys(taskMeta).length > 0;
@@ -5207,69 +5183,10 @@ function WorkflowGraph({ flow, currentTask, taskStates, taskMeta }) {
5207
5183
  );
5208
5184
  }
5209
5185
  }
5210
- let driverRow = null;
5211
- let costRow = null;
5212
- if (showMeta) {
5213
- const taskNames = collectTaskNames(flow);
5214
- const driverParts = [];
5215
- const costParts = [];
5216
- let hasAnyCost = false;
5217
- for (const name of taskNames) {
5218
- const m = meta[name];
5219
- const driver = m?.driver ?? "";
5220
- driverParts.push(driver);
5221
- const state = taskStates[name];
5222
- if (state === "done") {
5223
- const costStr = formatCost(m?.costUsd);
5224
- const timeStr = formatElapsed2(m?.elapsedMs);
5225
- costParts.push(`${costStr} / ${timeStr}`);
5226
- hasAnyCost = true;
5227
- } else {
5228
- costParts.push("");
5229
- }
5230
- }
5231
- const hasSomeDriver = driverParts.some((d) => d.length > 0);
5232
- if (hasSomeDriver) {
5233
- const driverLabels = [];
5234
- for (let idx = 0; idx < taskNames.length; idx++) {
5235
- if (idx > 0) {
5236
- driverLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `drv-sep-${idx}`));
5237
- }
5238
- driverLabels.push(
5239
- /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: driverParts[idx] || " " }, `drv-${idx}`)
5240
- );
5241
- }
5242
- driverRow = /* @__PURE__ */ jsxs2(Text2, { children: [
5243
- " ",
5244
- driverLabels
5245
- ] });
5246
- }
5247
- if (hasAnyCost) {
5248
- const costLabels = [];
5249
- for (let idx = 0; idx < taskNames.length; idx++) {
5250
- if (idx > 0) {
5251
- costLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `cost-sep-${idx}`));
5252
- }
5253
- costLabels.push(
5254
- /* @__PURE__ */ jsx2(Text2, { dimColor: true, children: costParts[idx] || " " }, `cost-${idx}`)
5255
- );
5256
- }
5257
- costRow = /* @__PURE__ */ jsxs2(Text2, { children: [
5258
- " ",
5259
- costLabels
5260
- ] });
5261
- }
5262
- }
5263
- return /* @__PURE__ */ jsxs2(Box2, { flexDirection: "column", children: [
5264
- /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) }),
5265
- /* @__PURE__ */ jsxs2(Text2, { children: [
5266
- " ",
5267
- elements
5268
- ] }),
5269
- driverRow,
5270
- costRow,
5271
- /* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) })
5272
- ] });
5186
+ return /* @__PURE__ */ jsx2(Box2, { flexDirection: "column", children: /* @__PURE__ */ jsxs2(Text2, { children: [
5187
+ " ",
5188
+ elements
5189
+ ] }) });
5273
5190
  }
5274
5191
 
5275
5192
  // src/lib/ink-lane-container.tsx
@@ -5447,7 +5364,7 @@ import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
5447
5364
  function formatConflictText(count) {
5448
5365
  return count === 1 ? "1 conflict" : `${count} conflicts`;
5449
5366
  }
5450
- function formatCost2(cost) {
5367
+ function formatCost(cost) {
5451
5368
  return `$${cost.toFixed(2)}`;
5452
5369
  }
5453
5370
  function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes }) {
@@ -5473,7 +5390,7 @@ function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes })
5473
5390
  /* @__PURE__ */ jsx5(Text5, { children: " \u2502 " }),
5474
5391
  /* @__PURE__ */ jsx5(Text5, { dimColor: true, children: `Pending: ${pendingSection}` })
5475
5392
  ] }),
5476
- completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost2(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
5393
+ completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
5477
5394
  ] });
5478
5395
  }
5479
5396
 
@@ -5610,7 +5527,7 @@ function shortKey(key) {
5610
5527
  const m = key.match(/^(\d+-\d+)/);
5611
5528
  return m ? m[1] : key;
5612
5529
  }
5613
- function formatCost3(cost) {
5530
+ function formatCost2(cost) {
5614
5531
  return `$${cost.toFixed(2)}`;
5615
5532
  }
5616
5533
  function Header({ info: info3, laneCount }) {
@@ -5619,7 +5536,7 @@ function Header({ info: info3, laneCount }) {
5619
5536
  if (laneCount != null && laneCount > 1) parts.push(`${laneCount} lanes`);
5620
5537
  if (info3.elapsed) parts.push(`${info3.elapsed} elapsed`);
5621
5538
  const displayCost = laneCount != null && laneCount > 1 && info3.laneTotalCost != null ? info3.laneTotalCost : info3.totalCost;
5622
- if (displayCost != null) parts.push(`${formatCost3(displayCost)} spent`);
5539
+ if (displayCost != null) parts.push(`${formatCost2(displayCost)} spent`);
5623
5540
  const left = parts.join(" | ");
5624
5541
  const right = "[q to quit]";
5625
5542
  const width = process.stdout.columns || 80;
@@ -10559,7 +10476,7 @@ async function handleDockerCheck(isJson) {
10559
10476
  }
10560
10477
  }
10561
10478
  }
10562
- function formatElapsed3(ms) {
10479
+ function formatElapsed2(ms) {
10563
10480
  const s = Math.floor(ms / 1e3);
10564
10481
  const h = Math.floor(s / 3600);
10565
10482
  const m = Math.floor(s % 3600 / 60);
@@ -10579,7 +10496,7 @@ function printWorkflowState() {
10579
10496
  console.log(` Tasks completed: ${state.tasks_completed.length}`);
10580
10497
  if (state.phase === "executing" && state.started) {
10581
10498
  const elapsed = Date.now() - Date.parse(state.started);
10582
- console.log(` Elapsed: ${formatElapsed3(elapsed)}`);
10499
+ console.log(` Elapsed: ${formatElapsed2(elapsed)}`);
10583
10500
  }
10584
10501
  if (state.evaluator_scores.length > 0) {
10585
10502
  const latest = state.evaluator_scores[state.evaluator_scores.length - 1];
@@ -10604,7 +10521,7 @@ function getWorkflowStateData() {
10604
10521
  };
10605
10522
  if (state.phase === "executing" && state.started) {
10606
10523
  data.elapsed_ms = Date.now() - Date.parse(state.started);
10607
- data.elapsed = formatElapsed3(data.elapsed_ms);
10524
+ data.elapsed = formatElapsed2(data.elapsed_ms);
10608
10525
  }
10609
10526
  return data;
10610
10527
  }
@@ -11256,7 +11173,7 @@ function registerTeardownCommand(program) {
11256
11173
  } else if (otlpMode === "remote-routed") {
11257
11174
  if (!options.keepDocker) {
11258
11175
  try {
11259
- const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-P65B7Z3S.js");
11176
+ const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-TANMGEDO.js");
11260
11177
  stopCollectorOnly2();
11261
11178
  result.docker.stopped = true;
11262
11179
  if (!isJson) {
@@ -11288,7 +11205,7 @@ function registerTeardownCommand(program) {
11288
11205
  info("Shared stack: kept running (other projects may use it)");
11289
11206
  }
11290
11207
  } else if (isLegacyStack) {
11291
- const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-P65B7Z3S.js");
11208
+ const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-TANMGEDO.js");
11292
11209
  let stackRunning = false;
11293
11210
  try {
11294
11211
  stackRunning = isStackRunning2(composeFile);
@@ -13829,12 +13746,12 @@ var CodexDriver = class {
13829
13746
  opts.plugins
13830
13747
  );
13831
13748
  }
13832
- const args = [];
13749
+ const args = ["exec"];
13833
13750
  if (opts.model) {
13834
13751
  args.push("--model", opts.model);
13835
13752
  }
13836
13753
  if (opts.cwd) {
13837
- args.push("--cwd", opts.cwd);
13754
+ args.push("--cd", opts.cwd);
13838
13755
  }
13839
13756
  args.push(opts.prompt);
13840
13757
  let yieldedResult = false;
@@ -14184,7 +14101,7 @@ function registerDriversCommand(program) {
14184
14101
  }
14185
14102
 
14186
14103
  // src/index.ts
14187
- var VERSION = true ? "0.30.1" : "0.0.0-dev";
14104
+ var VERSION = true ? "0.31.0" : "0.0.0-dev";
14188
14105
  function createProgram() {
14189
14106
  const program = new Command();
14190
14107
  program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.30.1",
3
+ "version": "0.31.0",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
@@ -0,0 +1,65 @@
1
+ name: checker
2
+ role:
3
+ title: Automated Checker
4
+ purpose: Run tests, linter, and coverage checks — report pass/fail objectively
5
+ persona:
6
+ identity: |
7
+ CI bot that runs the project's test suite, linter, and coverage tool.
8
+ Reports results objectively — no interpretation, no fixes, just facts.
9
+ communication_style: "Machine-like. Commands run, output captured, pass/fail reported."
10
+ principles:
11
+ - Run the project's actual test command (npm test, pytest, cargo test, etc.)
12
+ - Run the project's linter if configured (eslint, ruff, clippy, etc.)
13
+ - Check coverage against target if configured
14
+ - Report exact command, exit code, and output for each check
15
+ - Never fix code — only report results
16
+ prompt_template: |
17
+ ## Role
18
+
19
+ You are running automated checks on the implementation. Run tests, linter, and coverage. Report results.
20
+
21
+ ## Process
22
+
23
+ 1. **Detect check commands** from the project (package.json scripts, pyproject.toml, Makefile, etc.)
24
+ 2. **Run tests**: execute the test command, capture output and exit code
25
+ 3. **Run linter**: execute the lint command if available
26
+ 4. **Check coverage**: if a coverage target exists, verify it's met
27
+
28
+ ## Output Format
29
+
30
+ Output a single JSON object:
31
+
32
+ ```json
33
+ {
34
+ "verdict": "pass" | "fail",
35
+ "checks": [
36
+ {
37
+ "name": "tests",
38
+ "command": "npm test",
39
+ "exit_code": 0,
40
+ "passed": true,
41
+ "summary": "42 tests passed"
42
+ },
43
+ {
44
+ "name": "lint",
45
+ "command": "npm run lint",
46
+ "exit_code": 0,
47
+ "passed": true,
48
+ "summary": "no issues"
49
+ },
50
+ {
51
+ "name": "coverage",
52
+ "command": "npm run coverage",
53
+ "exit_code": 0,
54
+ "passed": true,
55
+ "summary": "98% (target: 100%)"
56
+ }
57
+ ]
58
+ }
59
+ ```
60
+
61
+ Verdict is "pass" only if ALL checks pass.
62
+
63
+ ## Output Location
64
+
65
+ Write results to ./verdict/check.json
@@ -11,6 +11,12 @@ tasks:
11
11
  session: fresh
12
12
  source_access: true
13
13
  model: claude-sonnet-4-6
14
+ check:
15
+ agent: checker
16
+ scope: per-story
17
+ session: fresh
18
+ source_access: true
19
+ driver: codex
14
20
  review:
15
21
  agent: reviewer
16
22
  scope: per-story
@@ -39,10 +45,12 @@ tasks:
39
45
  flow:
40
46
  - create-story
41
47
  - implement
48
+ - check
42
49
  - review
43
50
  - verify
44
51
  - loop:
45
52
  - retry
53
+ - check
46
54
  - review
47
55
  - verify
48
56
  - retro