codeharness 0.30.1 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@ codeharness is an **npm CLI** + **Claude Code plugin** that packages verificatio
|
|
|
8
8
|
|
|
9
9
|
1. **Verifies features work** — not just that tests pass. Black-box verification runs the built CLI inside a Docker container with no source code access. If the feature doesn't work from a user's perspective, verification fails.
|
|
10
10
|
2. **Fixes what it finds** — verification failures with code bugs automatically return to development with specific findings. The dev agent gets told exactly what's broken and why.
|
|
11
|
-
3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it,
|
|
11
|
+
3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, checks it (tests + lint), verifies it (agent evaluation), and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
|
|
12
12
|
4. **Makes agents see runtime** — ephemeral VictoriaMetrics stack (logs, metrics, traces) that agents query programmatically during development. No guessing at what the code does at runtime.
|
|
13
13
|
|
|
14
14
|
## Installation
|
|
@@ -61,7 +61,7 @@ The plugin provides slash commands that orchestrate the CLI within Claude Code s
|
|
|
61
61
|
|
|
62
62
|
| Command | Purpose |
|
|
63
63
|
|---------|---------|
|
|
64
|
-
| `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create →
|
|
64
|
+
| `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → implement → check → verify loop |
|
|
65
65
|
| `/harness-init` | Interactive project initialization |
|
|
66
66
|
| `/harness-status` | Quick overview of sprint progress and harness health |
|
|
67
67
|
| `/harness-onboard` | Scan project and generate onboarding plan |
|
|
@@ -84,7 +84,7 @@ codeharness integrates with [BMAD Method](https://github.com/bmadcode/BMAD-METHO
|
|
|
84
84
|
┌─────────────────────────────────────────┐
|
|
85
85
|
│ Claude Code Session │
|
|
86
86
|
│ /harness-run picks next story │
|
|
87
|
-
│ → create-story →
|
|
87
|
+
│ → create-story → implement → check → verify │
|
|
88
88
|
└────────────────────┬────────────────────┘
|
|
89
89
|
│ verify
|
|
90
90
|
▼
|
|
@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
|
|
|
2895
2895
|
}
|
|
2896
2896
|
|
|
2897
2897
|
// src/modules/infra/init-project.ts
|
|
2898
|
-
var HARNESS_VERSION = true ? "0.
|
|
2898
|
+
var HARNESS_VERSION = true ? "0.31.0" : "0.0.0-dev";
|
|
2899
2899
|
function failResult(opts, error) {
|
|
2900
2900
|
return {
|
|
2901
2901
|
status: "fail",
|
package/dist/index.js
CHANGED
|
@@ -40,7 +40,7 @@ import {
|
|
|
40
40
|
validateDockerfile,
|
|
41
41
|
warn,
|
|
42
42
|
writeState
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-ITPLJVAB.js";
|
|
44
44
|
|
|
45
45
|
// src/index.ts
|
|
46
46
|
import { Command } from "commander";
|
|
@@ -5101,23 +5101,10 @@ import { Box as Box7, Static, Text as Text7, useInput } from "ink";
|
|
|
5101
5101
|
// src/lib/ink-workflow.tsx
|
|
5102
5102
|
import { Text as Text2, Box as Box2 } from "ink";
|
|
5103
5103
|
import { jsx as jsx2, jsxs as jsxs2 } from "react/jsx-runtime";
|
|
5104
|
-
var termWidth = () => Math.min(process.stdout.columns || 60, 80);
|
|
5105
5104
|
var SPINNER_FRAMES = ["\u280B", "\u2819", "\u2839", "\u2838", "\u283C", "\u2834", "\u2826", "\u2827", "\u2807", "\u280F"];
|
|
5106
5105
|
function isLoopBlock2(step) {
|
|
5107
5106
|
return typeof step === "object" && step !== null && "loop" in step;
|
|
5108
5107
|
}
|
|
5109
|
-
function formatCost(costUsd) {
|
|
5110
|
-
if (costUsd == null) return "...";
|
|
5111
|
-
return `$${costUsd.toFixed(2)}`;
|
|
5112
|
-
}
|
|
5113
|
-
function formatElapsed2(ms) {
|
|
5114
|
-
if (ms == null) return "...";
|
|
5115
|
-
const seconds = Math.round(ms / 1e3);
|
|
5116
|
-
if (seconds >= 60) {
|
|
5117
|
-
return `${Math.floor(seconds / 60)}m`;
|
|
5118
|
-
}
|
|
5119
|
-
return `${seconds}s`;
|
|
5120
|
-
}
|
|
5121
5108
|
function TaskNode({ name, status, spinnerFrame }) {
|
|
5122
5109
|
const s = status ?? "pending";
|
|
5123
5110
|
switch (s) {
|
|
@@ -5151,17 +5138,6 @@ function loopIteration(tasks, taskStates) {
|
|
|
5151
5138
|
});
|
|
5152
5139
|
return anyStarted ? 1 : 0;
|
|
5153
5140
|
}
|
|
5154
|
-
function collectTaskNames(flow) {
|
|
5155
|
-
const names = [];
|
|
5156
|
-
for (const step of flow) {
|
|
5157
|
-
if (isLoopBlock2(step)) {
|
|
5158
|
-
names.push(...step.loop);
|
|
5159
|
-
} else {
|
|
5160
|
-
names.push(step);
|
|
5161
|
-
}
|
|
5162
|
-
}
|
|
5163
|
-
return names;
|
|
5164
|
-
}
|
|
5165
5141
|
function hasMetaData(taskMeta) {
|
|
5166
5142
|
if (!taskMeta) return false;
|
|
5167
5143
|
return Object.keys(taskMeta).length > 0;
|
|
@@ -5207,69 +5183,10 @@ function WorkflowGraph({ flow, currentTask, taskStates, taskMeta }) {
|
|
|
5207
5183
|
);
|
|
5208
5184
|
}
|
|
5209
5185
|
}
|
|
5210
|
-
|
|
5211
|
-
|
|
5212
|
-
|
|
5213
|
-
|
|
5214
|
-
const driverParts = [];
|
|
5215
|
-
const costParts = [];
|
|
5216
|
-
let hasAnyCost = false;
|
|
5217
|
-
for (const name of taskNames) {
|
|
5218
|
-
const m = meta[name];
|
|
5219
|
-
const driver = m?.driver ?? "";
|
|
5220
|
-
driverParts.push(driver);
|
|
5221
|
-
const state = taskStates[name];
|
|
5222
|
-
if (state === "done") {
|
|
5223
|
-
const costStr = formatCost(m?.costUsd);
|
|
5224
|
-
const timeStr = formatElapsed2(m?.elapsedMs);
|
|
5225
|
-
costParts.push(`${costStr} / ${timeStr}`);
|
|
5226
|
-
hasAnyCost = true;
|
|
5227
|
-
} else {
|
|
5228
|
-
costParts.push("");
|
|
5229
|
-
}
|
|
5230
|
-
}
|
|
5231
|
-
const hasSomeDriver = driverParts.some((d) => d.length > 0);
|
|
5232
|
-
if (hasSomeDriver) {
|
|
5233
|
-
const driverLabels = [];
|
|
5234
|
-
for (let idx = 0; idx < taskNames.length; idx++) {
|
|
5235
|
-
if (idx > 0) {
|
|
5236
|
-
driverLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `drv-sep-${idx}`));
|
|
5237
|
-
}
|
|
5238
|
-
driverLabels.push(
|
|
5239
|
-
/* @__PURE__ */ jsx2(Text2, { dimColor: true, children: driverParts[idx] || " " }, `drv-${idx}`)
|
|
5240
|
-
);
|
|
5241
|
-
}
|
|
5242
|
-
driverRow = /* @__PURE__ */ jsxs2(Text2, { children: [
|
|
5243
|
-
" ",
|
|
5244
|
-
driverLabels
|
|
5245
|
-
] });
|
|
5246
|
-
}
|
|
5247
|
-
if (hasAnyCost) {
|
|
5248
|
-
const costLabels = [];
|
|
5249
|
-
for (let idx = 0; idx < taskNames.length; idx++) {
|
|
5250
|
-
if (idx > 0) {
|
|
5251
|
-
costLabels.push(/* @__PURE__ */ jsx2(Text2, { children: " " }, `cost-sep-${idx}`));
|
|
5252
|
-
}
|
|
5253
|
-
costLabels.push(
|
|
5254
|
-
/* @__PURE__ */ jsx2(Text2, { dimColor: true, children: costParts[idx] || " " }, `cost-${idx}`)
|
|
5255
|
-
);
|
|
5256
|
-
}
|
|
5257
|
-
costRow = /* @__PURE__ */ jsxs2(Text2, { children: [
|
|
5258
|
-
" ",
|
|
5259
|
-
costLabels
|
|
5260
|
-
] });
|
|
5261
|
-
}
|
|
5262
|
-
}
|
|
5263
|
-
return /* @__PURE__ */ jsxs2(Box2, { flexDirection: "column", children: [
|
|
5264
|
-
/* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) }),
|
|
5265
|
-
/* @__PURE__ */ jsxs2(Text2, { children: [
|
|
5266
|
-
" ",
|
|
5267
|
-
elements
|
|
5268
|
-
] }),
|
|
5269
|
-
driverRow,
|
|
5270
|
-
costRow,
|
|
5271
|
-
/* @__PURE__ */ jsx2(Text2, { children: "\u2501".repeat(termWidth()) })
|
|
5272
|
-
] });
|
|
5186
|
+
return /* @__PURE__ */ jsx2(Box2, { flexDirection: "column", children: /* @__PURE__ */ jsxs2(Text2, { children: [
|
|
5187
|
+
" ",
|
|
5188
|
+
elements
|
|
5189
|
+
] }) });
|
|
5273
5190
|
}
|
|
5274
5191
|
|
|
5275
5192
|
// src/lib/ink-lane-container.tsx
|
|
@@ -5447,7 +5364,7 @@ import { jsx as jsx5, jsxs as jsxs5 } from "react/jsx-runtime";
|
|
|
5447
5364
|
function formatConflictText(count) {
|
|
5448
5365
|
return count === 1 ? "1 conflict" : `${count} conflicts`;
|
|
5449
5366
|
}
|
|
5450
|
-
function
|
|
5367
|
+
function formatCost(cost) {
|
|
5451
5368
|
return `$${cost.toFixed(2)}`;
|
|
5452
5369
|
}
|
|
5453
5370
|
function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes }) {
|
|
@@ -5473,7 +5390,7 @@ function SummaryBar({ doneStories, mergingEpic, pendingEpics, completedLanes })
|
|
|
5473
5390
|
/* @__PURE__ */ jsx5(Text5, { children: " \u2502 " }),
|
|
5474
5391
|
/* @__PURE__ */ jsx5(Text5, { dimColor: true, children: `Pending: ${pendingSection}` })
|
|
5475
5392
|
] }),
|
|
5476
|
-
completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${
|
|
5393
|
+
completedLanes && completedLanes.length > 0 && completedLanes.map((lane) => /* @__PURE__ */ jsx5(Text5, { color: "green", children: `[OK] Lane ${lane.laneIndex}: Epic ${lane.epicId} complete (${lane.storyCount} stories, ${formatCost(lane.cost)}, ${lane.elapsed})` }, `lane-complete-${lane.laneIndex}`))
|
|
5477
5394
|
] });
|
|
5478
5395
|
}
|
|
5479
5396
|
|
|
@@ -5610,7 +5527,7 @@ function shortKey(key) {
|
|
|
5610
5527
|
const m = key.match(/^(\d+-\d+)/);
|
|
5611
5528
|
return m ? m[1] : key;
|
|
5612
5529
|
}
|
|
5613
|
-
function
|
|
5530
|
+
function formatCost2(cost) {
|
|
5614
5531
|
return `$${cost.toFixed(2)}`;
|
|
5615
5532
|
}
|
|
5616
5533
|
function Header({ info: info3, laneCount }) {
|
|
@@ -5619,7 +5536,7 @@ function Header({ info: info3, laneCount }) {
|
|
|
5619
5536
|
if (laneCount != null && laneCount > 1) parts.push(`${laneCount} lanes`);
|
|
5620
5537
|
if (info3.elapsed) parts.push(`${info3.elapsed} elapsed`);
|
|
5621
5538
|
const displayCost = laneCount != null && laneCount > 1 && info3.laneTotalCost != null ? info3.laneTotalCost : info3.totalCost;
|
|
5622
|
-
if (displayCost != null) parts.push(`${
|
|
5539
|
+
if (displayCost != null) parts.push(`${formatCost2(displayCost)} spent`);
|
|
5623
5540
|
const left = parts.join(" | ");
|
|
5624
5541
|
const right = "[q to quit]";
|
|
5625
5542
|
const width = process.stdout.columns || 80;
|
|
@@ -10559,7 +10476,7 @@ async function handleDockerCheck(isJson) {
|
|
|
10559
10476
|
}
|
|
10560
10477
|
}
|
|
10561
10478
|
}
|
|
10562
|
-
function
|
|
10479
|
+
function formatElapsed2(ms) {
|
|
10563
10480
|
const s = Math.floor(ms / 1e3);
|
|
10564
10481
|
const h = Math.floor(s / 3600);
|
|
10565
10482
|
const m = Math.floor(s % 3600 / 60);
|
|
@@ -10579,7 +10496,7 @@ function printWorkflowState() {
|
|
|
10579
10496
|
console.log(` Tasks completed: ${state.tasks_completed.length}`);
|
|
10580
10497
|
if (state.phase === "executing" && state.started) {
|
|
10581
10498
|
const elapsed = Date.now() - Date.parse(state.started);
|
|
10582
|
-
console.log(` Elapsed: ${
|
|
10499
|
+
console.log(` Elapsed: ${formatElapsed2(elapsed)}`);
|
|
10583
10500
|
}
|
|
10584
10501
|
if (state.evaluator_scores.length > 0) {
|
|
10585
10502
|
const latest = state.evaluator_scores[state.evaluator_scores.length - 1];
|
|
@@ -10604,7 +10521,7 @@ function getWorkflowStateData() {
|
|
|
10604
10521
|
};
|
|
10605
10522
|
if (state.phase === "executing" && state.started) {
|
|
10606
10523
|
data.elapsed_ms = Date.now() - Date.parse(state.started);
|
|
10607
|
-
data.elapsed =
|
|
10524
|
+
data.elapsed = formatElapsed2(data.elapsed_ms);
|
|
10608
10525
|
}
|
|
10609
10526
|
return data;
|
|
10610
10527
|
}
|
|
@@ -11256,7 +11173,7 @@ function registerTeardownCommand(program) {
|
|
|
11256
11173
|
} else if (otlpMode === "remote-routed") {
|
|
11257
11174
|
if (!options.keepDocker) {
|
|
11258
11175
|
try {
|
|
11259
|
-
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-
|
|
11176
|
+
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-TANMGEDO.js");
|
|
11260
11177
|
stopCollectorOnly2();
|
|
11261
11178
|
result.docker.stopped = true;
|
|
11262
11179
|
if (!isJson) {
|
|
@@ -11288,7 +11205,7 @@ function registerTeardownCommand(program) {
|
|
|
11288
11205
|
info("Shared stack: kept running (other projects may use it)");
|
|
11289
11206
|
}
|
|
11290
11207
|
} else if (isLegacyStack) {
|
|
11291
|
-
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-
|
|
11208
|
+
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-TANMGEDO.js");
|
|
11292
11209
|
let stackRunning = false;
|
|
11293
11210
|
try {
|
|
11294
11211
|
stackRunning = isStackRunning2(composeFile);
|
|
@@ -13829,12 +13746,12 @@ var CodexDriver = class {
|
|
|
13829
13746
|
opts.plugins
|
|
13830
13747
|
);
|
|
13831
13748
|
}
|
|
13832
|
-
const args = [];
|
|
13749
|
+
const args = ["exec"];
|
|
13833
13750
|
if (opts.model) {
|
|
13834
13751
|
args.push("--model", opts.model);
|
|
13835
13752
|
}
|
|
13836
13753
|
if (opts.cwd) {
|
|
13837
|
-
args.push("--
|
|
13754
|
+
args.push("--cd", opts.cwd);
|
|
13838
13755
|
}
|
|
13839
13756
|
args.push(opts.prompt);
|
|
13840
13757
|
let yieldedResult = false;
|
|
@@ -14184,7 +14101,7 @@ function registerDriversCommand(program) {
|
|
|
14184
14101
|
}
|
|
14185
14102
|
|
|
14186
14103
|
// src/index.ts
|
|
14187
|
-
var VERSION = true ? "0.
|
|
14104
|
+
var VERSION = true ? "0.31.0" : "0.0.0-dev";
|
|
14188
14105
|
function createProgram() {
|
|
14189
14106
|
const program = new Command();
|
|
14190
14107
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: checker
|
|
2
|
+
role:
|
|
3
|
+
title: Automated Checker
|
|
4
|
+
purpose: Run tests, linter, and coverage checks — report pass/fail objectively
|
|
5
|
+
persona:
|
|
6
|
+
identity: |
|
|
7
|
+
CI bot that runs the project's test suite, linter, and coverage tool.
|
|
8
|
+
Reports results objectively — no interpretation, no fixes, just facts.
|
|
9
|
+
communication_style: "Machine-like. Commands run, output captured, pass/fail reported."
|
|
10
|
+
principles:
|
|
11
|
+
- Run the project's actual test command (npm test, pytest, cargo test, etc.)
|
|
12
|
+
- Run the project's linter if configured (eslint, ruff, clippy, etc.)
|
|
13
|
+
- Check coverage against target if configured
|
|
14
|
+
- Report exact command, exit code, and output for each check
|
|
15
|
+
- Never fix code — only report results
|
|
16
|
+
prompt_template: |
|
|
17
|
+
## Role
|
|
18
|
+
|
|
19
|
+
You are running automated checks on the implementation. Run tests, linter, and coverage. Report results.
|
|
20
|
+
|
|
21
|
+
## Process
|
|
22
|
+
|
|
23
|
+
1. **Detect check commands** from the project (package.json scripts, pyproject.toml, Makefile, etc.)
|
|
24
|
+
2. **Run tests**: execute the test command, capture output and exit code
|
|
25
|
+
3. **Run linter**: execute the lint command if available
|
|
26
|
+
4. **Check coverage**: if a coverage target exists, verify it's met
|
|
27
|
+
|
|
28
|
+
## Output Format
|
|
29
|
+
|
|
30
|
+
Output a single JSON object:
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"verdict": "pass" | "fail",
|
|
35
|
+
"checks": [
|
|
36
|
+
{
|
|
37
|
+
"name": "tests",
|
|
38
|
+
"command": "npm test",
|
|
39
|
+
"exit_code": 0,
|
|
40
|
+
"passed": true,
|
|
41
|
+
"summary": "42 tests passed"
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"name": "lint",
|
|
45
|
+
"command": "npm run lint",
|
|
46
|
+
"exit_code": 0,
|
|
47
|
+
"passed": true,
|
|
48
|
+
"summary": "no issues"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"name": "coverage",
|
|
52
|
+
"command": "npm run coverage",
|
|
53
|
+
"exit_code": 0,
|
|
54
|
+
"passed": true,
|
|
55
|
+
"summary": "98% (target: 100%)"
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Verdict is "pass" only if ALL checks pass.
|
|
62
|
+
|
|
63
|
+
## Output Location
|
|
64
|
+
|
|
65
|
+
Write results to ./verdict/check.json
|
|
@@ -11,6 +11,12 @@ tasks:
|
|
|
11
11
|
session: fresh
|
|
12
12
|
source_access: true
|
|
13
13
|
model: claude-sonnet-4-6
|
|
14
|
+
check:
|
|
15
|
+
agent: checker
|
|
16
|
+
scope: per-story
|
|
17
|
+
session: fresh
|
|
18
|
+
source_access: true
|
|
19
|
+
driver: codex
|
|
14
20
|
review:
|
|
15
21
|
agent: reviewer
|
|
16
22
|
scope: per-story
|
|
@@ -39,10 +45,12 @@ tasks:
|
|
|
39
45
|
flow:
|
|
40
46
|
- create-story
|
|
41
47
|
- implement
|
|
48
|
+
- check
|
|
42
49
|
- review
|
|
43
50
|
- verify
|
|
44
51
|
- loop:
|
|
45
52
|
- retry
|
|
53
|
+
- check
|
|
46
54
|
- review
|
|
47
55
|
- verify
|
|
48
56
|
- retro
|