codeharness 0.18.1 → 0.19.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.18.1",
3
+ "version": "0.19.2",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
package/ralph/ralph.sh CHANGED
@@ -560,6 +560,12 @@ execute_iteration() {
560
560
  loop_start_sha=$(git rev-parse HEAD 2>/dev/null || echo "")
561
561
  fi
562
562
 
563
+ # Snapshot sprint-state.json before iteration (for timeout delta capture)
564
+ local state_snapshot_path="ralph/.state-snapshot.json"
565
+ if [[ -f "sprint-state.json" ]]; then
566
+ cp "sprint-state.json" "$state_snapshot_path" 2>/dev/null || true
567
+ fi
568
+
563
569
  log_status "LOOP" "Iteration $iteration — Task: ${task_id:-'(reading from prompt)'}"
564
570
  local timeout_seconds=$((ITERATION_TIMEOUT_MINUTES * 60))
565
571
 
@@ -686,12 +692,36 @@ execute_iteration() {
686
692
  return 0
687
693
  elif [[ $exit_code -eq 124 ]]; then
688
694
  log_status "WARN" "Iteration timed out after ${ITERATION_TIMEOUT_MINUTES}m"
695
+
696
+ # Capture timeout report
697
+ if command -v npx &>/dev/null; then
698
+ log_status "INFO" "Capturing timeout report..."
699
+ npx codeharness timeout-report \
700
+ --story "${task_id:-unknown}" \
701
+ --iteration "$iteration" \
702
+ --duration "$ITERATION_TIMEOUT_MINUTES" \
703
+ --output-file "$output_file" \
704
+ --state-snapshot "$state_snapshot_path" 2>/dev/null && \
705
+ log_status "INFO" "Timeout report saved" || \
706
+ log_status "WARN" "Failed to capture timeout report"
707
+ fi
708
+
709
+ # Verify report file exists with non-zero content
710
+ local report_file="ralph/logs/timeout-report-${iteration}-${task_id:-unknown}.md"
711
+ if [[ -s "$report_file" ]]; then
712
+ log_status "INFO" "Timeout report verified: $report_file"
713
+ fi
714
+
689
715
  return 1
690
716
  else
691
717
  # Check for API limit
692
718
  if grep -qi "5.*hour.*limit\|limit.*reached.*try.*back\|usage.*limit.*reached" "$output_file" 2>/dev/null; then
693
719
  log_status "ERROR" "Claude API usage limit reached"
694
720
  return 2
721
+ # Check for transient API errors (500, 529, overloaded) — don't count against story
722
+ elif grep -qi "Internal server error\|api_error\|overloaded\|529\|503" "$output_file" 2>/dev/null; then
723
+ log_status "WARN" "Transient API error (not story's fault) — will retry"
724
+ return 4
695
725
  else
696
726
  log_status "ERROR" "$(driver_display_name) execution failed (exit code: $exit_code)"
697
727
  return 1
@@ -997,6 +1027,12 @@ main() {
997
1027
  update_status "$loop_count" "$(cat "$CALL_COUNT_FILE" 2>/dev/null || echo "0")" "circuit_breaker" "halted"
998
1028
  break
999
1029
  ;;
1030
+ 4)
1031
+ # Transient API error — retry after brief pause, don't count against story
1032
+ consecutive_failures=0 # reset — this isn't the story's fault
1033
+ log_status "INFO" "Transient API error — retrying in 30s (not counting against story)"
1034
+ sleep 30
1035
+ ;;
1000
1036
  *)
1001
1037
  # Failure (timeout or crash) — increment retry for the story that was being worked on
1002
1038
  consecutive_failures=$((consecutive_failures + 1))