codeharness 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +91 -21
- package/package.json +1 -1
- package/ralph/ralph.sh +22 -1
package/dist/index.js
CHANGED
|
@@ -1348,7 +1348,7 @@ function importStoriesToBeads(stories, opts, beadsFns) {
|
|
|
1348
1348
|
}
|
|
1349
1349
|
|
|
1350
1350
|
// src/commands/init.ts
|
|
1351
|
-
var HARNESS_VERSION = true ? "0.
|
|
1351
|
+
var HARNESS_VERSION = true ? "0.12.0" : "0.0.0-dev";
|
|
1352
1352
|
function getStackLabel(stack) {
|
|
1353
1353
|
if (stack === "nodejs") return "Node.js (package.json)";
|
|
1354
1354
|
if (stack === "python") return "Python";
|
|
@@ -2405,10 +2405,13 @@ function buildSpawnArgs(opts) {
|
|
|
2405
2405
|
if (opts.live) {
|
|
2406
2406
|
args.push("--live");
|
|
2407
2407
|
}
|
|
2408
|
+
if (opts.reset) {
|
|
2409
|
+
args.push("--reset");
|
|
2410
|
+
}
|
|
2408
2411
|
return args;
|
|
2409
2412
|
}
|
|
2410
2413
|
function registerRunCommand(program) {
|
|
2411
|
-
program.command("run").description("Execute the autonomous coding loop").option("--max-iterations <n>", "Maximum loop iterations", "50").option("--timeout <seconds>", "Total loop timeout in seconds", "14400").option("--iteration-timeout <minutes>", "Per-iteration timeout in minutes", "30").option("--live", "Show live output streaming", false).option("--calls <n>", "Max API calls per hour", "100").option("--max-story-retries <n>", "Max retries per story before flagging", "3").action(async (options, cmd) => {
|
|
2414
|
+
program.command("run").description("Execute the autonomous coding loop").option("--max-iterations <n>", "Maximum loop iterations", "50").option("--timeout <seconds>", "Total loop timeout in seconds", "14400").option("--iteration-timeout <minutes>", "Per-iteration timeout in minutes", "30").option("--live", "Show live output streaming", false).option("--calls <n>", "Max API calls per hour", "100").option("--max-story-retries <n>", "Max retries per story before flagging", "3").option("--reset", "Clear retry counters, flagged stories, and circuit breaker before starting", false).action(async (options, cmd) => {
|
|
2412
2415
|
const globalOpts = cmd.optsWithGlobals();
|
|
2413
2416
|
const isJson = !!globalOpts.json;
|
|
2414
2417
|
const outputOpts = { json: isJson };
|
|
@@ -2477,7 +2480,8 @@ function registerRunCommand(program) {
|
|
|
2477
2480
|
iterationTimeout,
|
|
2478
2481
|
calls,
|
|
2479
2482
|
live: options.live,
|
|
2480
|
-
maxStoryRetries
|
|
2483
|
+
maxStoryRetries,
|
|
2484
|
+
reset: options.reset
|
|
2481
2485
|
});
|
|
2482
2486
|
const env = { ...process.env };
|
|
2483
2487
|
if (isJson) {
|
|
@@ -2592,6 +2596,32 @@ var INTEGRATION_KEYWORDS = [
|
|
|
2592
2596
|
"integration test",
|
|
2593
2597
|
"manual verification"
|
|
2594
2598
|
];
|
|
2599
|
+
var DOCKER_SESSION_KEYWORDS = [
|
|
2600
|
+
"agent tool",
|
|
2601
|
+
"subagent",
|
|
2602
|
+
"via agent",
|
|
2603
|
+
"invoke",
|
|
2604
|
+
"/create-story",
|
|
2605
|
+
"/bmad-dev-story",
|
|
2606
|
+
"/bmad-code-review",
|
|
2607
|
+
"/harness-run",
|
|
2608
|
+
"/retrospective",
|
|
2609
|
+
"sprint execution",
|
|
2610
|
+
"fresh context",
|
|
2611
|
+
"spawns",
|
|
2612
|
+
"code review workflow",
|
|
2613
|
+
"dev-story workflow",
|
|
2614
|
+
"automatically proceeds",
|
|
2615
|
+
"retries the current story",
|
|
2616
|
+
"halts with status",
|
|
2617
|
+
"prints summary"
|
|
2618
|
+
];
|
|
2619
|
+
var ESCALATE_KEYWORDS = [
|
|
2620
|
+
"physical hardware",
|
|
2621
|
+
"manual human",
|
|
2622
|
+
"visual inspection by human",
|
|
2623
|
+
"paid external service"
|
|
2624
|
+
];
|
|
2595
2625
|
function classifyVerifiability(description) {
|
|
2596
2626
|
const lower = description.toLowerCase();
|
|
2597
2627
|
for (const kw of INTEGRATION_KEYWORDS) {
|
|
@@ -2599,6 +2629,19 @@ function classifyVerifiability(description) {
|
|
|
2599
2629
|
}
|
|
2600
2630
|
return "cli-verifiable";
|
|
2601
2631
|
}
|
|
2632
|
+
function classifyStrategy(description) {
|
|
2633
|
+
const lower = description.toLowerCase();
|
|
2634
|
+
for (const kw of ESCALATE_KEYWORDS) {
|
|
2635
|
+
if (lower.includes(kw)) return "escalate";
|
|
2636
|
+
}
|
|
2637
|
+
for (const kw of DOCKER_SESSION_KEYWORDS) {
|
|
2638
|
+
if (lower.includes(kw)) return "docker-session";
|
|
2639
|
+
}
|
|
2640
|
+
for (const kw of INTEGRATION_KEYWORDS) {
|
|
2641
|
+
if (lower.includes(kw)) return "docker-session";
|
|
2642
|
+
}
|
|
2643
|
+
return "cli-direct";
|
|
2644
|
+
}
|
|
2602
2645
|
var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required)\s*-->/;
|
|
2603
2646
|
function parseVerificationTag(text) {
|
|
2604
2647
|
const match = VERIFICATION_TAG_PATTERN.exec(text);
|
|
@@ -2655,11 +2698,13 @@ function parseStoryACs(storyFilePath) {
|
|
|
2655
2698
|
if (description) {
|
|
2656
2699
|
const tag = parseVerificationTag(description);
|
|
2657
2700
|
const verifiability = tag ?? classifyVerifiability(description);
|
|
2701
|
+
const strategy = classifyStrategy(description);
|
|
2658
2702
|
acs.push({
|
|
2659
2703
|
id: currentId,
|
|
2660
2704
|
description,
|
|
2661
2705
|
type: classifyAC(description),
|
|
2662
|
-
verifiability
|
|
2706
|
+
verifiability,
|
|
2707
|
+
strategy
|
|
2663
2708
|
});
|
|
2664
2709
|
} else {
|
|
2665
2710
|
warn(`Skipping malformed AC #${currentId}: empty description`);
|
|
@@ -3294,27 +3339,52 @@ function validateProofQuality(proofPath) {
|
|
|
3294
3339
|
return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
|
|
3295
3340
|
}
|
|
3296
3341
|
const content = readFileSync10(proofPath, "utf-8");
|
|
3297
|
-
const acHeaderPattern = /^## AC \d
|
|
3342
|
+
const acHeaderPattern = /^## AC ?(\d+):/gm;
|
|
3298
3343
|
const matches = [...content.matchAll(acHeaderPattern)];
|
|
3299
|
-
if (matches.length === 0) {
|
|
3300
|
-
return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
|
|
3301
|
-
}
|
|
3302
3344
|
let verified = 0;
|
|
3303
3345
|
let pending = 0;
|
|
3304
3346
|
let escalated = 0;
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3347
|
+
if (matches.length > 0) {
|
|
3348
|
+
for (let i = 0; i < matches.length; i++) {
|
|
3349
|
+
const start = matches[i].index;
|
|
3350
|
+
const end = i + 1 < matches.length ? matches[i + 1].index : content.length;
|
|
3351
|
+
const section = content.slice(start, end);
|
|
3352
|
+
if (section.includes("[ESCALATE]")) {
|
|
3353
|
+
escalated++;
|
|
3354
|
+
continue;
|
|
3355
|
+
}
|
|
3356
|
+
const hasEvidence = section.includes("<!-- /showboat exec -->") || section.includes("<!-- showboat image:") || /```(?:bash|shell)\n[\s\S]*?```\n+```output\n/m.test(section);
|
|
3357
|
+
if (hasEvidence) {
|
|
3358
|
+
verified++;
|
|
3359
|
+
} else {
|
|
3360
|
+
pending++;
|
|
3361
|
+
}
|
|
3312
3362
|
}
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3363
|
+
} else {
|
|
3364
|
+
const inlineAcPattern = /--- AC ?(\d+):/g;
|
|
3365
|
+
const inlineMatches = [...content.matchAll(inlineAcPattern)];
|
|
3366
|
+
const acNumbers = new Set(inlineMatches.map((m) => m[1]));
|
|
3367
|
+
if (acNumbers.size === 0) {
|
|
3368
|
+
return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
|
|
3369
|
+
}
|
|
3370
|
+
for (const acNum of acNumbers) {
|
|
3371
|
+
const acPattern = new RegExp(`--- AC ?${acNum}:`, "g");
|
|
3372
|
+
const acIdx = content.search(acPattern);
|
|
3373
|
+
if (acIdx === -1) {
|
|
3374
|
+
pending++;
|
|
3375
|
+
continue;
|
|
3376
|
+
}
|
|
3377
|
+
const nextAcPattern = new RegExp(`--- AC ?(?!${acNum})\\d+:`, "g");
|
|
3378
|
+
nextAcPattern.lastIndex = acIdx + 1;
|
|
3379
|
+
const nextMatch = nextAcPattern.exec(content);
|
|
3380
|
+
const section = content.slice(acIdx, nextMatch ? nextMatch.index : content.length);
|
|
3381
|
+
if (section.includes("[ESCALATE]")) {
|
|
3382
|
+
escalated++;
|
|
3383
|
+
} else if (/```output\n/m.test(section)) {
|
|
3384
|
+
verified++;
|
|
3385
|
+
} else {
|
|
3386
|
+
pending++;
|
|
3387
|
+
}
|
|
3318
3388
|
}
|
|
3319
3389
|
}
|
|
3320
3390
|
const total = verified + pending + escalated;
|
|
@@ -6783,7 +6853,7 @@ function registerGithubImportCommand(program) {
|
|
|
6783
6853
|
}
|
|
6784
6854
|
|
|
6785
6855
|
// src/index.ts
|
|
6786
|
-
var VERSION = true ? "0.
|
|
6856
|
+
var VERSION = true ? "0.12.0" : "0.0.0-dev";
|
|
6787
6857
|
function createProgram() {
|
|
6788
6858
|
const program = new Command();
|
|
6789
6859
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
package/ralph/ralph.sh
CHANGED
|
@@ -50,6 +50,9 @@ CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-json}"
|
|
|
50
50
|
CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-}"
|
|
51
51
|
CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-false}" # Fresh context per iteration by default
|
|
52
52
|
|
|
53
|
+
# Reset retry state on start
|
|
54
|
+
RESET_RETRIES=false
|
|
55
|
+
|
|
53
56
|
# Live output
|
|
54
57
|
LIVE_OUTPUT=false
|
|
55
58
|
|
|
@@ -722,6 +725,7 @@ Options:
|
|
|
722
725
|
--prompt FILE Prompt file for each iteration
|
|
723
726
|
--progress FILE Progress file (tasks JSON)
|
|
724
727
|
--live Show live output streaming
|
|
728
|
+
--reset Clear retry counters, flagged stories, and circuit breaker before starting
|
|
725
729
|
--reset-circuit Reset circuit breaker and exit
|
|
726
730
|
--status Show current status and exit
|
|
727
731
|
|
|
@@ -805,7 +809,20 @@ main() {
|
|
|
805
809
|
fi
|
|
806
810
|
fi
|
|
807
811
|
|
|
808
|
-
#
|
|
812
|
+
# Reset retry state if --reset flag was passed
|
|
813
|
+
if [[ "$RESET_RETRIES" == "true" ]]; then
|
|
814
|
+
if [[ -f "$STORY_RETRY_FILE" ]]; then
|
|
815
|
+
rm -f "$STORY_RETRY_FILE"
|
|
816
|
+
log_status "INFO" "Cleared story retry counters"
|
|
817
|
+
fi
|
|
818
|
+
if [[ -f "$FLAGGED_STORIES_FILE" ]]; then
|
|
819
|
+
rm -f "$FLAGGED_STORIES_FILE"
|
|
820
|
+
log_status "INFO" "Cleared flagged stories"
|
|
821
|
+
fi
|
|
822
|
+
reset_circuit_breaker "Reset via --reset flag"
|
|
823
|
+
log_status "INFO" "Circuit breaker reset to CLOSED"
|
|
824
|
+
fi
|
|
825
|
+
|
|
809
826
|
# .story_retries and .flagged_stories are file-based — they persist automatically
|
|
810
827
|
|
|
811
828
|
log_status "SUCCESS" "Ralph loop starting"
|
|
@@ -1056,6 +1073,10 @@ while [[ $# -gt 0 ]]; do
|
|
|
1056
1073
|
LIVE_OUTPUT=true
|
|
1057
1074
|
shift
|
|
1058
1075
|
;;
|
|
1076
|
+
--reset)
|
|
1077
|
+
RESET_RETRIES=true
|
|
1078
|
+
shift
|
|
1079
|
+
;;
|
|
1059
1080
|
--reset-circuit)
|
|
1060
1081
|
# Derive state paths so circuit breaker uses the correct directory
|
|
1061
1082
|
HARNESS_STATE_DIR="$(pwd)/.claude"
|