codeharness 0.18.1 → 0.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +926 -130
- package/package.json +1 -1
- package/ralph/ralph.sh +36 -0
package/package.json
CHANGED
package/ralph/ralph.sh
CHANGED
|
@@ -560,6 +560,12 @@ execute_iteration() {
|
|
|
560
560
|
loop_start_sha=$(git rev-parse HEAD 2>/dev/null || echo "")
|
|
561
561
|
fi
|
|
562
562
|
|
|
563
|
+
# Snapshot sprint-state.json before iteration (for timeout delta capture)
|
|
564
|
+
local state_snapshot_path="ralph/.state-snapshot.json"
|
|
565
|
+
if [[ -f "sprint-state.json" ]]; then
|
|
566
|
+
cp "sprint-state.json" "$state_snapshot_path" 2>/dev/null || true
|
|
567
|
+
fi
|
|
568
|
+
|
|
563
569
|
log_status "LOOP" "Iteration $iteration — Task: ${task_id:-'(reading from prompt)'}"
|
|
564
570
|
local timeout_seconds=$((ITERATION_TIMEOUT_MINUTES * 60))
|
|
565
571
|
|
|
@@ -686,12 +692,36 @@ execute_iteration() {
|
|
|
686
692
|
return 0
|
|
687
693
|
elif [[ $exit_code -eq 124 ]]; then
|
|
688
694
|
log_status "WARN" "Iteration timed out after ${ITERATION_TIMEOUT_MINUTES}m"
|
|
695
|
+
|
|
696
|
+
# Capture timeout report
|
|
697
|
+
if command -v npx &>/dev/null; then
|
|
698
|
+
log_status "INFO" "Capturing timeout report..."
|
|
699
|
+
npx codeharness timeout-report \
|
|
700
|
+
--story "${task_id:-unknown}" \
|
|
701
|
+
--iteration "$iteration" \
|
|
702
|
+
--duration "$ITERATION_TIMEOUT_MINUTES" \
|
|
703
|
+
--output-file "$output_file" \
|
|
704
|
+
--state-snapshot "$state_snapshot_path" 2>/dev/null && \
|
|
705
|
+
log_status "INFO" "Timeout report saved" || \
|
|
706
|
+
log_status "WARN" "Failed to capture timeout report"
|
|
707
|
+
fi
|
|
708
|
+
|
|
709
|
+
# Verify report file exists with non-zero content
|
|
710
|
+
local report_file="ralph/logs/timeout-report-${iteration}-${task_id:-unknown}.md"
|
|
711
|
+
if [[ -s "$report_file" ]]; then
|
|
712
|
+
log_status "INFO" "Timeout report verified: $report_file"
|
|
713
|
+
fi
|
|
714
|
+
|
|
689
715
|
return 1
|
|
690
716
|
else
|
|
691
717
|
# Check for API limit
|
|
692
718
|
if grep -qi "5.*hour.*limit\|limit.*reached.*try.*back\|usage.*limit.*reached" "$output_file" 2>/dev/null; then
|
|
693
719
|
log_status "ERROR" "Claude API usage limit reached"
|
|
694
720
|
return 2
|
|
721
|
+
# Check for transient API errors (500, 529, overloaded) — don't count against story
|
|
722
|
+
elif grep -qi "Internal server error\|api_error\|overloaded\|529\|503" "$output_file" 2>/dev/null; then
|
|
723
|
+
log_status "WARN" "Transient API error (not story's fault) — will retry"
|
|
724
|
+
return 4
|
|
695
725
|
else
|
|
696
726
|
log_status "ERROR" "$(driver_display_name) execution failed (exit code: $exit_code)"
|
|
697
727
|
return 1
|
|
@@ -997,6 +1027,12 @@ main() {
|
|
|
997
1027
|
update_status "$loop_count" "$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0")" "circuit_breaker" "halted"
|
|
998
1028
|
break
|
|
999
1029
|
;;
|
|
1030
|
+
4)
|
|
1031
|
+
# Transient API error — retry after brief pause, don't count against story
|
|
1032
|
+
consecutive_failures=0 # reset — this isn't the story's fault
|
|
1033
|
+
log_status "INFO" "Transient API error — retrying in 30s (not counting against story)"
|
|
1034
|
+
sleep 30
|
|
1035
|
+
;;
|
|
1000
1036
|
*)
|
|
1001
1037
|
# Failure (timeout or crash) — increment retry for the story that was being worked on
|
|
1002
1038
|
consecutive_failures=$((consecutive_failures + 1))
|