codeharness 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -45
- package/dist/index.js +124 -19
- package/package.json +1 -1
- package/ralph/drivers/claude-code.sh +3 -0
- package/ralph/ralph.sh +25 -0
package/README.md
CHANGED
|
@@ -1,66 +1,118 @@
|
|
|
1
1
|
# codeharness
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Makes autonomous coding agents produce software that actually works — not software that passes tests.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
# Install
|
|
7
|
-
npm install -g codeharness
|
|
5
|
+
codeharness is an **npm CLI** + **Claude Code plugin** that packages verification-driven development as an installable tool: black-box verification via Docker, agent-first observability via VictoriaMetrics, and mechanical enforcement via hooks that make skipping verification architecturally impossible.
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
codeharness init
|
|
7
|
+
## What it does
|
|
11
8
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
1. **Verifies features work** — not just that tests pass. Black-box verification runs the built CLI inside a Docker container with no source code access. If the feature doesn't work from a user's perspective, verification fails.
|
|
10
|
+
2. **Fixes what it finds** — verification failures with code bugs automatically return to development with specific findings. The dev agent gets told exactly what's broken and why.
|
|
11
|
+
3. **Runs sprints autonomously** — reads your sprint plan, picks the highest-priority story, implements it, reviews it, verifies it, and moves to the next one. Cross-epic prioritization, retry management, and session handoff built in.
|
|
12
|
+
4. **Makes agents see runtime** — ephemeral VictoriaMetrics stack (logs, metrics, traces) that agents query programmatically during development. No guessing at what the code does at runtime.
|
|
15
13
|
|
|
16
14
|
## Installation
|
|
17
15
|
|
|
16
|
+
Two components — install both:
|
|
17
|
+
|
|
18
18
|
```bash
|
|
19
|
+
# CLI (npm package)
|
|
19
20
|
npm install -g codeharness
|
|
20
|
-
```
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
# Claude Code plugin (slash commands, hooks, skills)
|
|
23
|
+
claude plugin install github:iVintik/codeharness
|
|
24
|
+
```
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
## Quick Start
|
|
25
27
|
|
|
26
28
|
```bash
|
|
29
|
+
# Initialize in your project
|
|
27
30
|
codeharness init
|
|
31
|
+
|
|
32
|
+
# Start autonomous sprint execution (inside Claude Code)
|
|
33
|
+
/harness-run
|
|
28
34
|
```
|
|
29
35
|
|
|
30
|
-
|
|
36
|
+
## How it works
|
|
37
|
+
|
|
38
|
+
### As a CLI (`codeharness`)
|
|
39
|
+
|
|
40
|
+
The CLI handles all mechanical work — stack detection, Docker management, verification, coverage, retry state.
|
|
41
|
+
|
|
42
|
+
| Command | Purpose |
|
|
43
|
+
|---------|---------|
|
|
44
|
+
| `codeharness init` | Detect stack, install dependencies, start observability, scaffold docs |
|
|
45
|
+
| `codeharness run` | Execute the autonomous coding loop (Ralph) |
|
|
46
|
+
| `codeharness verify --story <key>` | Run verification pipeline for a story |
|
|
47
|
+
| `codeharness status` | Show harness health, sprint progress, Docker stack |
|
|
48
|
+
| `codeharness coverage` | Run tests with coverage and evaluate against targets |
|
|
49
|
+
| `codeharness onboard epic` | Scan codebase for gaps, generate onboarding stories |
|
|
50
|
+
| `codeharness retry --status` | Show retry counts and flagged stories |
|
|
51
|
+
| `codeharness retry --reset` | Clear retry state for re-verification |
|
|
52
|
+
| `codeharness verify-env build` | Build Docker image for black-box verification |
|
|
53
|
+
| `codeharness stack start` | Start the shared observability stack |
|
|
54
|
+
| `codeharness teardown` | Remove harness from project |
|
|
31
55
|
|
|
32
|
-
|
|
56
|
+
All commands support `--json` for machine-readable output.
|
|
57
|
+
|
|
58
|
+
### As a Claude Code plugin (`/harness-*`)
|
|
59
|
+
|
|
60
|
+
The plugin provides slash commands that orchestrate the CLI within Claude Code sessions:
|
|
61
|
+
|
|
62
|
+
| Command | Purpose |
|
|
63
|
+
|---------|---------|
|
|
64
|
+
| `/harness-run` | Autonomous sprint execution — picks stories by priority, runs create → dev → review → verify loop |
|
|
65
|
+
| `/harness-init` | Interactive project initialization |
|
|
66
|
+
| `/harness-status` | Quick overview of sprint progress and harness health |
|
|
67
|
+
| `/harness-onboard` | Scan project and generate onboarding plan |
|
|
68
|
+
| `/harness-verify` | Verify a story with real-world evidence |
|
|
69
|
+
|
|
70
|
+
### BMAD Method integration
|
|
71
|
+
|
|
72
|
+
codeharness integrates with [BMAD Method](https://github.com/bmadcode/BMAD-METHOD) for structured sprint planning:
|
|
73
|
+
|
|
74
|
+
| Phase | Commands |
|
|
75
|
+
|-------|----------|
|
|
76
|
+
| Analysis | `/create-brief`, `/brainstorm-project`, `/market-research` |
|
|
77
|
+
| Planning | `/create-prd`, `/create-ux` |
|
|
78
|
+
| Solutioning | `/create-architecture`, `/create-epics-stories` |
|
|
79
|
+
| Implementation | `/sprint-planning`, `/create-story`, then `/harness-run` |
|
|
80
|
+
|
|
81
|
+
## Verification architecture
|
|
33
82
|
|
|
34
83
|
```
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
stack Manage the shared observability stack
|
|
58
|
-
query Query observability data (logs, metrics, traces)
|
|
59
|
-
scoped to current project
|
|
60
|
-
retro-import [options] Import retrospective action items as beads issues
|
|
61
|
-
github-import [options] Import GitHub issues labeled for sprint planning into
|
|
62
|
-
beads
|
|
63
|
-
verify-env Manage verification environment (Docker image + clean
|
|
64
|
-
workspace)
|
|
65
|
-
help [command] display help for command
|
|
84
|
+
┌─────────────────────────────────────────┐
|
|
85
|
+
│ Claude Code Session │
|
|
86
|
+
│ /harness-run picks next story │
|
|
87
|
+
│ → create-story → dev → review → verify │
|
|
88
|
+
└────────────────────┬────────────────────┘
|
|
89
|
+
│ verify
|
|
90
|
+
▼
|
|
91
|
+
┌─────────────────────────────────────────┐
|
|
92
|
+
│ Docker Container (no source code) │
|
|
93
|
+
│ - codeharness CLI installed from tarball│
|
|
94
|
+
│ - claude CLI for nested verification │
|
|
95
|
+
│ - curl/jq for observability queries │
|
|
96
|
+
│ Exercises CLI as a real user would │
|
|
97
|
+
└────────────────────┬────────────────────┘
|
|
98
|
+
│ queries
|
|
99
|
+
▼
|
|
100
|
+
┌─────────────────────────────────────────┐
|
|
101
|
+
│ Observability Stack (VictoriaMetrics) │
|
|
102
|
+
│ - VictoriaLogs :9428 (LogQL) │
|
|
103
|
+
│ - VictoriaMetrics :8428 (PromQL) │
|
|
104
|
+
│ - OTEL Collector :4318 │
|
|
105
|
+
└─────────────────────────────────────────┘
|
|
66
106
|
```
|
|
107
|
+
|
|
108
|
+
When verification finds code bugs → story returns to dev with findings → dev fixes → re-verify. This loop runs up to 10 times per story. Infrastructure failures (timeouts, Docker errors) retry 3 times then skip.
|
|
109
|
+
|
|
110
|
+
## Requirements
|
|
111
|
+
|
|
112
|
+
- Node.js >= 18
|
|
113
|
+
- Docker (for observability and verification)
|
|
114
|
+
- Claude Code (for plugin features)
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT
|
package/dist/index.js
CHANGED
|
@@ -464,6 +464,10 @@ var WEB_OTLP_PACKAGES = [
|
|
|
464
464
|
var AGENT_OTLP_PACKAGES_NODE = ["@traceloop/node-server-sdk"];
|
|
465
465
|
var AGENT_OTLP_PACKAGES_PYTHON = ["traceloop-sdk"];
|
|
466
466
|
var NODE_REQUIRE_FLAG = "--require @opentelemetry/auto-instrumentations-node/register";
|
|
467
|
+
function truncateError(message, maxLength = 200) {
|
|
468
|
+
if (message.length <= maxLength) return message;
|
|
469
|
+
return message.slice(0, maxLength) + "... (truncated)";
|
|
470
|
+
}
|
|
467
471
|
function installNodeOtlp(projectDir) {
|
|
468
472
|
try {
|
|
469
473
|
execFileSync2("npm", ["install", ...NODE_OTLP_PACKAGES], { cwd: projectDir, stdio: "pipe", timeout: 3e5 });
|
|
@@ -480,7 +484,7 @@ function installNodeOtlp(projectDir) {
|
|
|
480
484
|
packages_installed: false,
|
|
481
485
|
start_script_patched: false,
|
|
482
486
|
env_vars_configured: false,
|
|
483
|
-
error: `Failed to install Node.js OTLP packages: ${message}`
|
|
487
|
+
error: `Failed to install Node.js OTLP packages: ${truncateError(message)}`
|
|
484
488
|
};
|
|
485
489
|
}
|
|
486
490
|
}
|
|
@@ -489,8 +493,14 @@ function patchNodeStartScript(projectDir) {
|
|
|
489
493
|
if (!existsSync3(pkgPath)) {
|
|
490
494
|
return false;
|
|
491
495
|
}
|
|
492
|
-
|
|
493
|
-
|
|
496
|
+
let raw;
|
|
497
|
+
let pkg;
|
|
498
|
+
try {
|
|
499
|
+
raw = readFileSync3(pkgPath, "utf-8");
|
|
500
|
+
pkg = JSON.parse(raw);
|
|
501
|
+
} catch {
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
494
504
|
const scripts = pkg["scripts"];
|
|
495
505
|
if (!scripts) {
|
|
496
506
|
return false;
|
|
@@ -554,6 +564,14 @@ function configureWeb(projectDir, stack) {
|
|
|
554
564
|
} catch {
|
|
555
565
|
}
|
|
556
566
|
}
|
|
567
|
+
let endpoint = "http://localhost:4318";
|
|
568
|
+
try {
|
|
569
|
+
const currentState = readState(projectDir);
|
|
570
|
+
if (currentState.otlp?.endpoint) {
|
|
571
|
+
endpoint = currentState.otlp.endpoint;
|
|
572
|
+
}
|
|
573
|
+
} catch {
|
|
574
|
+
}
|
|
557
575
|
const snippet = `// OpenTelemetry Web SDK initialization \u2014 generated by codeharness
|
|
558
576
|
import { WebTracerProvider } from '@opentelemetry/sdk-trace-web';
|
|
559
577
|
import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
|
|
@@ -563,7 +581,7 @@ import { XMLHttpRequestInstrumentation } from '@opentelemetry/instrumentation-xm
|
|
|
563
581
|
import { registerInstrumentations } from '@opentelemetry/instrumentation';
|
|
564
582
|
|
|
565
583
|
const exporter = new OTLPTraceExporter({
|
|
566
|
-
url: '
|
|
584
|
+
url: '${endpoint}/v1/traces',
|
|
567
585
|
});
|
|
568
586
|
|
|
569
587
|
const provider = new WebTracerProvider();
|
|
@@ -675,12 +693,12 @@ function instrumentProject(projectDir, stack, opts) {
|
|
|
675
693
|
error: "Unsupported stack for OTLP instrumentation"
|
|
676
694
|
};
|
|
677
695
|
}
|
|
696
|
+
configureOtlpEnvVars(projectDir, stack, { appType });
|
|
697
|
+
result.env_vars_configured = true;
|
|
698
|
+
if (!isJson) {
|
|
699
|
+
ok("OTLP: environment variables configured");
|
|
700
|
+
}
|
|
678
701
|
if (result.status === "configured") {
|
|
679
|
-
configureOtlpEnvVars(projectDir, stack, { appType });
|
|
680
|
-
result.env_vars_configured = true;
|
|
681
|
-
if (!isJson) {
|
|
682
|
-
ok("OTLP: environment variables configured");
|
|
683
|
-
}
|
|
684
702
|
if (appType === "cli") {
|
|
685
703
|
configureCli(projectDir);
|
|
686
704
|
if (!isJson) {
|
|
@@ -874,6 +892,11 @@ ${patchContent}
|
|
|
874
892
|
${markers.end}`;
|
|
875
893
|
const startIdx = content.indexOf(markers.start);
|
|
876
894
|
const endIdx = content.indexOf(markers.end);
|
|
895
|
+
if (startIdx !== -1 !== (endIdx !== -1)) {
|
|
896
|
+
throw new Error(
|
|
897
|
+
`Corrupted patch markers for '${patchName}': only ${startIdx !== -1 ? "start" : "end"} marker found in ${filePath}`
|
|
898
|
+
);
|
|
899
|
+
}
|
|
877
900
|
if (startIdx !== -1 && endIdx !== -1) {
|
|
878
901
|
if (endIdx < startIdx) {
|
|
879
902
|
throw new Error(
|
|
@@ -1060,8 +1083,8 @@ var BmadError = class extends Error {
|
|
|
1060
1083
|
};
|
|
1061
1084
|
var PATCH_TARGETS = {
|
|
1062
1085
|
"story-verification": "bmm/workflows/4-implementation/create-story/template.md",
|
|
1063
|
-
"dev-enforcement": "bmm/workflows/4-implementation/dev-story/
|
|
1064
|
-
"review-enforcement": "bmm/workflows/4-implementation/code-review/
|
|
1086
|
+
"dev-enforcement": "bmm/workflows/4-implementation/dev-story/instructions.xml",
|
|
1087
|
+
"review-enforcement": "bmm/workflows/4-implementation/code-review/instructions.xml",
|
|
1065
1088
|
"retro-enforcement": "bmm/workflows/4-implementation/retrospective/instructions.md",
|
|
1066
1089
|
"sprint-beads": "bmm/workflows/4-implementation/sprint-planning/checklist.md",
|
|
1067
1090
|
"sprint-retro": "bmm/workflows/4-implementation/sprint-planning/instructions.md"
|
|
@@ -1110,9 +1133,9 @@ function installBmad(dir) {
|
|
|
1110
1133
|
patches_applied: []
|
|
1111
1134
|
};
|
|
1112
1135
|
}
|
|
1113
|
-
const cmdStr = "npx bmad-method
|
|
1136
|
+
const cmdStr = "npx bmad-method install";
|
|
1114
1137
|
try {
|
|
1115
|
-
execFileSync4("npx", ["bmad-method", "
|
|
1138
|
+
execFileSync4("npx", ["bmad-method", "install"], {
|
|
1116
1139
|
stdio: "pipe",
|
|
1117
1140
|
timeout: 6e4,
|
|
1118
1141
|
cwd: root
|
|
@@ -1121,6 +1144,9 @@ function installBmad(dir) {
|
|
|
1121
1144
|
const message = err instanceof Error ? err.message : String(err);
|
|
1122
1145
|
throw new BmadError(cmdStr, message);
|
|
1123
1146
|
}
|
|
1147
|
+
if (!isBmadInstalled(root)) {
|
|
1148
|
+
throw new BmadError(cmdStr, "_bmad/ directory was not created after successful npx bmad-method install");
|
|
1149
|
+
}
|
|
1124
1150
|
const version = detectBmadVersion(root);
|
|
1125
1151
|
return {
|
|
1126
1152
|
status: "installed",
|
|
@@ -1177,6 +1203,21 @@ function applyAllPatches(dir) {
|
|
|
1177
1203
|
}
|
|
1178
1204
|
return results;
|
|
1179
1205
|
}
|
|
1206
|
+
function detectBmalph(dir) {
|
|
1207
|
+
const root = dir ?? process.cwd();
|
|
1208
|
+
const files = [];
|
|
1209
|
+
const ralphRcPath = join5(root, ".ralph", ".ralphrc");
|
|
1210
|
+
if (existsSync5(ralphRcPath)) {
|
|
1211
|
+
files.push(".ralph/.ralphrc");
|
|
1212
|
+
}
|
|
1213
|
+
const dotRalphDir = join5(root, ".ralph");
|
|
1214
|
+
if (existsSync5(dotRalphDir)) {
|
|
1215
|
+
if (files.length === 0) {
|
|
1216
|
+
files.push(".ralph/");
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
return { detected: files.length > 0, files };
|
|
1220
|
+
}
|
|
1180
1221
|
function generateStoryKey(epicNumber, storyNumber, title) {
|
|
1181
1222
|
const slug = title.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
1182
1223
|
return `${epicNumber}-${storyNumber}-${slug}`;
|
|
@@ -1402,7 +1443,7 @@ function getInstallCommand(stack) {
|
|
|
1402
1443
|
}
|
|
1403
1444
|
|
|
1404
1445
|
// src/commands/init.ts
|
|
1405
|
-
var HARNESS_VERSION = true ? "0.
|
|
1446
|
+
var HARNESS_VERSION = true ? "0.17.0" : "0.0.0-dev";
|
|
1406
1447
|
function getProjectName(projectDir) {
|
|
1407
1448
|
try {
|
|
1408
1449
|
const pkgPath = join6(projectDir, "package.json");
|
|
@@ -1498,7 +1539,7 @@ function generateDocsIndexContent() {
|
|
|
1498
1539
|
}
|
|
1499
1540
|
var DO_NOT_EDIT_HEADER = "<!-- DO NOT EDIT MANUALLY -->\n";
|
|
1500
1541
|
function registerInitCommand(program) {
|
|
1501
|
-
program.command("init").description("Initialize the harness in a project").option("--no-frontend", "Disable frontend enforcement").option("--no-database", "Disable database enforcement").option("--no-api", "Disable API enforcement").option("--otel-endpoint <url>", "Remote OTLP endpoint (skips local Docker stack)").option("--logs-url <url>", "Remote VictoriaLogs URL").option("--metrics-url <url>", "Remote VictoriaMetrics URL").option("--traces-url <url>", "Remote Jaeger/VictoriaTraces URL").action(async (options, cmd) => {
|
|
1542
|
+
program.command("init").description("Initialize the harness in a project").option("--no-frontend", "Disable frontend enforcement").option("--no-database", "Disable database enforcement").option("--no-api", "Disable API enforcement").option("--no-observability", "Skip OTLP package installation").option("--otel-endpoint <url>", "Remote OTLP endpoint (skips local Docker stack)").option("--logs-url <url>", "Remote VictoriaLogs URL").option("--metrics-url <url>", "Remote VictoriaMetrics URL").option("--traces-url <url>", "Remote Jaeger/VictoriaTraces URL").action(async (options, cmd) => {
|
|
1502
1543
|
const globalOpts = cmd.optsWithGlobals();
|
|
1503
1544
|
const isJson = globalOpts.json === true;
|
|
1504
1545
|
const projectDir = process.cwd();
|
|
@@ -1527,6 +1568,47 @@ function registerInitCommand(program) {
|
|
|
1527
1568
|
result.documentation.agents_md = "exists";
|
|
1528
1569
|
result.documentation.docs_scaffold = "exists";
|
|
1529
1570
|
result.documentation.readme = "exists";
|
|
1571
|
+
const depResults = [];
|
|
1572
|
+
for (const spec of DEPENDENCY_REGISTRY) {
|
|
1573
|
+
const check = checkInstalled(spec);
|
|
1574
|
+
const depResult = {
|
|
1575
|
+
name: spec.name,
|
|
1576
|
+
displayName: spec.displayName,
|
|
1577
|
+
status: check.installed ? "already-installed" : "failed",
|
|
1578
|
+
version: check.version
|
|
1579
|
+
};
|
|
1580
|
+
depResults.push(depResult);
|
|
1581
|
+
if (!isJson) {
|
|
1582
|
+
if (check.installed) {
|
|
1583
|
+
const versionStr = check.version ? ` (v${check.version})` : "";
|
|
1584
|
+
ok(`${spec.displayName}: already installed${versionStr}`);
|
|
1585
|
+
} else {
|
|
1586
|
+
fail(`${spec.displayName}: not found`);
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
result.dependencies = depResults;
|
|
1591
|
+
if (isBmadInstalled(projectDir)) {
|
|
1592
|
+
try {
|
|
1593
|
+
const patchResults = applyAllPatches(projectDir);
|
|
1594
|
+
const patchNames = patchResults.filter((r) => r.applied).map((r) => r.patchName);
|
|
1595
|
+
const version = detectBmadVersion(projectDir);
|
|
1596
|
+
const bmalpHDetection = detectBmalph(projectDir);
|
|
1597
|
+
result.bmad = {
|
|
1598
|
+
status: "already-installed",
|
|
1599
|
+
version,
|
|
1600
|
+
patches_applied: patchNames,
|
|
1601
|
+
bmalph_detected: bmalpHDetection.detected
|
|
1602
|
+
};
|
|
1603
|
+
if (!isJson) {
|
|
1604
|
+
info("BMAD: already installed, patches verified");
|
|
1605
|
+
if (bmalpHDetection.detected) {
|
|
1606
|
+
warn("bmalph detected \u2014 superseded files noted for cleanup");
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
} catch {
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1530
1612
|
if (isJson) {
|
|
1531
1613
|
jsonOutput(result);
|
|
1532
1614
|
} else {
|
|
@@ -1661,7 +1743,8 @@ function registerInitCommand(program) {
|
|
|
1661
1743
|
result.bmad = {
|
|
1662
1744
|
status: "already-installed",
|
|
1663
1745
|
version,
|
|
1664
|
-
patches_applied: patchNames
|
|
1746
|
+
patches_applied: patchNames,
|
|
1747
|
+
bmalph_detected: false
|
|
1665
1748
|
};
|
|
1666
1749
|
if (!isJson) {
|
|
1667
1750
|
info("BMAD: existing installation detected, patches applied");
|
|
@@ -1673,18 +1756,27 @@ function registerInitCommand(program) {
|
|
|
1673
1756
|
result.bmad = {
|
|
1674
1757
|
status: installResult.status,
|
|
1675
1758
|
version: installResult.version,
|
|
1676
|
-
patches_applied: patchNames
|
|
1759
|
+
patches_applied: patchNames,
|
|
1760
|
+
bmalph_detected: false
|
|
1677
1761
|
};
|
|
1678
1762
|
if (!isJson) {
|
|
1679
1763
|
ok(`BMAD: installed (v${installResult.version ?? "unknown"}), harness patches applied`);
|
|
1680
1764
|
}
|
|
1681
1765
|
}
|
|
1766
|
+
const bmalpHDetection = detectBmalph(projectDir);
|
|
1767
|
+
if (bmalpHDetection.detected && result.bmad) {
|
|
1768
|
+
result.bmad.bmalph_detected = true;
|
|
1769
|
+
if (!isJson) {
|
|
1770
|
+
warn("bmalph detected \u2014 superseded files noted for cleanup");
|
|
1771
|
+
}
|
|
1772
|
+
}
|
|
1682
1773
|
} catch (err) {
|
|
1683
1774
|
if (err instanceof BmadError) {
|
|
1684
1775
|
result.bmad = {
|
|
1685
1776
|
status: "failed",
|
|
1686
1777
|
version: null,
|
|
1687
1778
|
patches_applied: [],
|
|
1779
|
+
bmalph_detected: false,
|
|
1688
1780
|
error: err.message
|
|
1689
1781
|
};
|
|
1690
1782
|
if (!isJson) {
|
|
@@ -1757,7 +1849,20 @@ function registerInitCommand(program) {
|
|
|
1757
1849
|
ok("Documentation: README.md created");
|
|
1758
1850
|
}
|
|
1759
1851
|
}
|
|
1760
|
-
|
|
1852
|
+
let otlpResult;
|
|
1853
|
+
if (!options.observability) {
|
|
1854
|
+
otlpResult = {
|
|
1855
|
+
status: "skipped",
|
|
1856
|
+
packages_installed: false,
|
|
1857
|
+
start_script_patched: false,
|
|
1858
|
+
env_vars_configured: false
|
|
1859
|
+
};
|
|
1860
|
+
if (!isJson) {
|
|
1861
|
+
info("OTLP: skipped (--no-observability)");
|
|
1862
|
+
}
|
|
1863
|
+
} else {
|
|
1864
|
+
otlpResult = instrumentProject(projectDir, stack, { json: isJson, appType });
|
|
1865
|
+
}
|
|
1761
1866
|
result.otlp = otlpResult;
|
|
1762
1867
|
try {
|
|
1763
1868
|
const updatedState = readState(projectDir);
|
|
@@ -7675,7 +7780,7 @@ function handleStatus(dir, isJson, filterStory) {
|
|
|
7675
7780
|
}
|
|
7676
7781
|
|
|
7677
7782
|
// src/index.ts
|
|
7678
|
-
var VERSION = true ? "0.
|
|
7783
|
+
var VERSION = true ? "0.17.0" : "0.0.0-dev";
|
|
7679
7784
|
function createProgram() {
|
|
7680
7785
|
const program = new Command();
|
|
7681
7786
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
package/ralph/ralph.sh
CHANGED
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
|
|
8
8
|
set -e
|
|
9
9
|
|
|
10
|
+
# DEBUG: catch unexpected exits from set -e
|
|
11
|
+
trap 'echo "[$(date "+%Y-%m-%d %H:%M:%S")] [FATAL] ralph.sh died at line $LINENO (exit code: $?)" >> "${LOG_DIR:-ralph/logs}/ralph_crash.log" 2>/dev/null' ERR
|
|
12
|
+
|
|
10
13
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
14
|
source "$SCRIPT_DIR/lib/date_utils.sh"
|
|
12
15
|
source "$SCRIPT_DIR/lib/timeout_utils.sh"
|
|
@@ -581,6 +584,11 @@ execute_iteration() {
|
|
|
581
584
|
local deadline=$(( $(date +%s) + timeout_seconds ))
|
|
582
585
|
echo "$deadline" > "ralph/.iteration_deadline"
|
|
583
586
|
|
|
587
|
+
# DEBUG: log the command being run
|
|
588
|
+
log_status "DEBUG" "Command: ${CLAUDE_CMD_ARGS[*]}"
|
|
589
|
+
log_status "DEBUG" "Output file: $output_file"
|
|
590
|
+
log_status "DEBUG" "LIVE_OUTPUT=$LIVE_OUTPUT, timeout=${timeout_seconds}s"
|
|
591
|
+
|
|
584
592
|
log_status "INFO" "Starting $(driver_display_name) (timeout: ${ITERATION_TIMEOUT_MINUTES}m)..."
|
|
585
593
|
|
|
586
594
|
# Execute with timeout
|
|
@@ -606,6 +614,8 @@ execute_iteration() {
|
|
|
606
614
|
local claude_pid=$!
|
|
607
615
|
local progress_counter=0
|
|
608
616
|
|
|
617
|
+
log_status "DEBUG" "Background PID: $claude_pid"
|
|
618
|
+
|
|
609
619
|
while kill -0 $claude_pid 2>/dev/null; do
|
|
610
620
|
progress_counter=$((progress_counter + 1))
|
|
611
621
|
if [[ -f "$output_file" && -s "$output_file" ]]; then
|
|
@@ -614,8 +624,23 @@ execute_iteration() {
|
|
|
614
624
|
sleep 10
|
|
615
625
|
done
|
|
616
626
|
|
|
627
|
+
# Protect wait from set -e — capture exit code without crashing
|
|
628
|
+
set +e
|
|
617
629
|
wait $claude_pid
|
|
618
630
|
exit_code=$?
|
|
631
|
+
set -e
|
|
632
|
+
log_status "DEBUG" "Claude exited with code: $exit_code, output size: $(wc -c < "$output_file" 2>/dev/null || echo 0) bytes"
|
|
633
|
+
|
|
634
|
+
# If output is empty and exit code is non-zero, log diagnostic info
|
|
635
|
+
if [[ ! -s "$output_file" && $exit_code -ne 0 ]]; then
|
|
636
|
+
log_status "ERROR" "Claude produced no output and exited with code $exit_code"
|
|
637
|
+
log_status "DEBUG" "Checking if claude binary is responsive..."
|
|
638
|
+
if claude --version > /dev/null 2>&1; then
|
|
639
|
+
log_status "DEBUG" "claude binary OK: $(claude --version 2>&1)"
|
|
640
|
+
else
|
|
641
|
+
log_status "ERROR" "claude binary not responding"
|
|
642
|
+
fi
|
|
643
|
+
fi
|
|
619
644
|
fi
|
|
620
645
|
|
|
621
646
|
if [[ $exit_code -eq 0 ]]; then
|