codeharness 0.17.5 → 0.17.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +11 -5
- package/package.json +1 -1
- package/ralph/ralph.sh +23 -3
package/dist/index.js
CHANGED
|
@@ -1443,7 +1443,7 @@ function getInstallCommand(stack) {
|
|
|
1443
1443
|
}
|
|
1444
1444
|
|
|
1445
1445
|
// src/commands/init.ts
|
|
1446
|
-
var HARNESS_VERSION = true ? "0.17.
|
|
1446
|
+
var HARNESS_VERSION = true ? "0.17.7" : "0.0.0-dev";
|
|
1447
1447
|
function getProjectName(projectDir) {
|
|
1448
1448
|
try {
|
|
1449
1449
|
const pkgPath = join6(projectDir, "package.json");
|
|
@@ -3483,6 +3483,9 @@ function classifyCommand(cmd) {
|
|
|
3483
3483
|
if (/docker\s+exec\b/.test(cmd)) {
|
|
3484
3484
|
return "docker-exec";
|
|
3485
3485
|
}
|
|
3486
|
+
if (/docker\s+(ps|logs|inspect|stats|top|port)\b/.test(cmd)) {
|
|
3487
|
+
return "docker-host";
|
|
3488
|
+
}
|
|
3486
3489
|
if (/curl\b/.test(cmd) && /localhost:(9428|8428|16686)\b/.test(cmd)) {
|
|
3487
3490
|
return "observability";
|
|
3488
3491
|
}
|
|
@@ -3510,8 +3513,10 @@ function checkBlackBoxEnforcement(proofContent) {
|
|
|
3510
3513
|
const section = proofContent.slice(start, end);
|
|
3511
3514
|
if (section.includes("[ESCALATE]")) continue;
|
|
3512
3515
|
const sectionCommands = classifyEvidenceCommands(section);
|
|
3513
|
-
const
|
|
3514
|
-
|
|
3516
|
+
const hasBlackBoxEvidence = sectionCommands.some(
|
|
3517
|
+
(c) => c.type === "docker-exec" || c.type === "docker-host" || c.type === "observability"
|
|
3518
|
+
);
|
|
3519
|
+
if (!hasBlackBoxEvidence) {
|
|
3515
3520
|
acsMissingDockerExec.push(acNum);
|
|
3516
3521
|
}
|
|
3517
3522
|
}
|
|
@@ -3531,7 +3536,8 @@ function checkBlackBoxEnforcement(proofContent) {
|
|
|
3531
3536
|
}
|
|
3532
3537
|
function hasFailVerdict(section) {
|
|
3533
3538
|
const withoutCodeBlocks = section.replace(/```[\s\S]*?```/g, "");
|
|
3534
|
-
|
|
3539
|
+
const withoutInlineCode = withoutCodeBlocks.replace(/`[^`]+`/g, "");
|
|
3540
|
+
return withoutInlineCode.includes("[FAIL]");
|
|
3535
3541
|
}
|
|
3536
3542
|
function checkPreconditions(dir, storyId) {
|
|
3537
3543
|
const state = readState(dir);
|
|
@@ -7820,7 +7826,7 @@ function handleStatus(dir, isJson, filterStory) {
|
|
|
7820
7826
|
}
|
|
7821
7827
|
|
|
7822
7828
|
// src/index.ts
|
|
7823
|
-
var VERSION = true ? "0.17.
|
|
7829
|
+
var VERSION = true ? "0.17.7" : "0.0.0-dev";
|
|
7824
7830
|
function createProgram() {
|
|
7825
7831
|
const program = new Command();
|
|
7826
7832
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
package/ralph/ralph.sh
CHANGED
|
@@ -37,7 +37,7 @@ LOG_DIR=""
|
|
|
37
37
|
|
|
38
38
|
# Loop limits
|
|
39
39
|
MAX_ITERATIONS=${MAX_ITERATIONS:-50}
|
|
40
|
-
MAX_STORY_RETRIES=${MAX_STORY_RETRIES:-
|
|
40
|
+
MAX_STORY_RETRIES=${MAX_STORY_RETRIES:-10}
|
|
41
41
|
LOOP_TIMEOUT_SECONDS=${LOOP_TIMEOUT_SECONDS:-14400} # 4 hours default
|
|
42
42
|
ITERATION_TIMEOUT_MINUTES=${ITERATION_TIMEOUT_MINUTES:-30}
|
|
43
43
|
|
|
@@ -951,7 +951,7 @@ main() {
|
|
|
951
951
|
fi
|
|
952
952
|
local retry_count
|
|
953
953
|
retry_count=$(increment_story_retry "$skey")
|
|
954
|
-
if [[ $retry_count -
|
|
954
|
+
if [[ $retry_count -ge $MAX_STORY_RETRIES ]]; then
|
|
955
955
|
log_status "WARN" "Story ${skey} exceeded retry limit (${retry_count}) — flagging and moving on"
|
|
956
956
|
flag_story "$skey"
|
|
957
957
|
else
|
|
@@ -998,8 +998,28 @@ main() {
|
|
|
998
998
|
break
|
|
999
999
|
;;
|
|
1000
1000
|
*)
|
|
1001
|
-
# Failure — retry
|
|
1001
|
+
# Failure (timeout or crash) — increment retry for the story that was being worked on
|
|
1002
1002
|
consecutive_failures=$((consecutive_failures + 1))
|
|
1003
|
+
|
|
1004
|
+
# Increment retry for the first non-done, non-flagged story (the one that caused the timeout)
|
|
1005
|
+
local after_snap_fail
|
|
1006
|
+
after_snap_fail=$(snapshot_story_statuses)
|
|
1007
|
+
while IFS=: read -r fkey fstatus; do
|
|
1008
|
+
[[ -z "$fkey" ]] && continue
|
|
1009
|
+
[[ "$fstatus" == "done" ]] && continue
|
|
1010
|
+
if ! is_story_flagged "$fkey"; then
|
|
1011
|
+
local fail_retry
|
|
1012
|
+
fail_retry=$(increment_story_retry "$fkey")
|
|
1013
|
+
if [[ $fail_retry -ge $MAX_STORY_RETRIES ]]; then
|
|
1014
|
+
log_status "WARN" "Story ${fkey} exceeded retry limit (${fail_retry}) after timeout — flagging"
|
|
1015
|
+
flag_story "$fkey"
|
|
1016
|
+
else
|
|
1017
|
+
log_status "WARN" "Story ${fkey} — timeout retry ${fail_retry}/${MAX_STORY_RETRIES}"
|
|
1018
|
+
fi
|
|
1019
|
+
break
|
|
1020
|
+
fi
|
|
1021
|
+
done <<< "$after_snap_fail"
|
|
1022
|
+
|
|
1003
1023
|
if [[ $consecutive_failures -ge $max_consecutive_failures ]]; then
|
|
1004
1024
|
log_status "ERROR" "$max_consecutive_failures consecutive failures — halting"
|
|
1005
1025
|
update_status "$loop_count" "$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0")" "consecutive_failures" "halted"
|