codeharness 0.19.5 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +3149 -2656
- package/dist/modules/observability/index.d.ts +295 -0
- package/dist/modules/observability/index.js +366 -0
- package/package.json +6 -1
- package/patches/AGENTS.md +19 -1
- package/patches/observability/AGENTS.md +27 -0
- package/patches/observability/__tests__/catch-without-logging.ts +36 -0
- package/patches/observability/__tests__/error-path-no-log.ts +47 -0
- package/patches/observability/__tests__/function-no-debug-log.ts +54 -0
- package/patches/observability/catch-without-logging.ts +36 -0
- package/patches/observability/catch-without-logging.yaml +35 -0
- package/patches/observability/error-path-no-log.ts +47 -0
- package/patches/observability/error-path-no-log.yaml +68 -0
- package/patches/observability/function-no-debug-log.ts +54 -0
- package/patches/observability/function-no-debug-log.yaml +114 -0
- package/ralph/drivers/claude-code.sh +2 -28
- package/ralph/ralph.sh +153 -9
- package/templates/Dockerfile.verify +8 -1
package/ralph/ralph.sh
CHANGED
|
@@ -47,7 +47,7 @@ RATE_LIMIT_SLEEP=3600 # 1 hour
|
|
|
47
47
|
|
|
48
48
|
# Driver
|
|
49
49
|
PLATFORM_DRIVER="${PLATFORM_DRIVER:-claude-code}"
|
|
50
|
-
CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-json}"
|
|
50
|
+
CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-stream-json}"
|
|
51
51
|
CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-}"
|
|
52
52
|
CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-false}" # Fresh context per iteration by default
|
|
53
53
|
|
|
@@ -98,6 +98,14 @@ log_status() {
|
|
|
98
98
|
"LOOP") color=$PURPLE ;;
|
|
99
99
|
esac
|
|
100
100
|
|
|
101
|
+
# DEBUG level: log file only, no terminal output
|
|
102
|
+
if [[ "$level" == "DEBUG" ]]; then
|
|
103
|
+
if [[ -n "$LOG_DIR" ]]; then
|
|
104
|
+
echo "[$timestamp] [$level] $message" >> "$LOG_DIR/ralph.log"
|
|
105
|
+
fi
|
|
106
|
+
return
|
|
107
|
+
fi
|
|
108
|
+
|
|
101
109
|
echo -e "${color}[$timestamp] [$level] $message${NC}" >&2
|
|
102
110
|
if [[ -n "$LOG_DIR" ]]; then
|
|
103
111
|
echo "[$timestamp] [$level] $message" >> "$LOG_DIR/ralph.log"
|
|
@@ -444,6 +452,58 @@ detect_story_changes() {
|
|
|
444
452
|
done <<< "$after_snapshot"
|
|
445
453
|
}
|
|
446
454
|
|
|
455
|
+
# ─── Sprint State Progress Polling ─────────────────────────────────────────
|
|
456
|
+
|
|
457
|
+
# Previous state tracking for change detection
|
|
458
|
+
PREV_STORY=""
|
|
459
|
+
PREV_PHASE=""
|
|
460
|
+
PREV_AC_PROGRESS=""
|
|
461
|
+
PREV_LAST_ACTION=""
|
|
462
|
+
|
|
463
|
+
# Poll sprint-state.json for progress changes during background execution.
|
|
464
|
+
# Prints structured update lines when progress fields change.
|
|
465
|
+
poll_sprint_state_progress() {
|
|
466
|
+
local state_file="sprint-state.json"
|
|
467
|
+
[[ -f "$state_file" ]] || return 0
|
|
468
|
+
|
|
469
|
+
# Single jq call to extract all fields (avoids 4 process spawns per poll cycle)
|
|
470
|
+
local raw
|
|
471
|
+
raw=$(jq -r '[.run.currentStory // "", .run.currentPhase // "", .run.lastAction // "", .run.acProgress // ""] | join("\t")' "$state_file" 2>/dev/null) || return 0
|
|
472
|
+
[[ -n "$raw" ]] || return 0
|
|
473
|
+
|
|
474
|
+
local cur_story cur_phase cur_action cur_ac
|
|
475
|
+
IFS=$'\t' read -r cur_story cur_phase cur_action cur_ac <<< "$raw"
|
|
476
|
+
|
|
477
|
+
# Nothing to report if no story is active
|
|
478
|
+
[[ -z "$cur_story" ]] && return 0
|
|
479
|
+
|
|
480
|
+
# Detect changes and print structured updates
|
|
481
|
+
if [[ "$cur_story" != "$PREV_STORY" || "$cur_phase" != "$PREV_PHASE" ]]; then
|
|
482
|
+
if [[ -n "$cur_action" && "$cur_action" != "null" ]]; then
|
|
483
|
+
log_status "INFO" "Story ${cur_story}: ${cur_phase} (${cur_action})"
|
|
484
|
+
else
|
|
485
|
+
log_status "INFO" "Story ${cur_story}: ${cur_phase}"
|
|
486
|
+
fi
|
|
487
|
+
elif [[ "$cur_ac" != "$PREV_AC_PROGRESS" && -n "$cur_ac" && "$cur_ac" != "null" ]]; then
|
|
488
|
+
log_status "INFO" "Story ${cur_story}: verify (AC ${cur_ac})"
|
|
489
|
+
elif [[ "$cur_action" != "$PREV_LAST_ACTION" && -n "$cur_action" && "$cur_action" != "null" ]]; then
|
|
490
|
+
log_status "INFO" "Story ${cur_story}: ${cur_phase} (${cur_action})"
|
|
491
|
+
fi
|
|
492
|
+
|
|
493
|
+
PREV_STORY="$cur_story"
|
|
494
|
+
PREV_PHASE="$cur_phase"
|
|
495
|
+
PREV_AC_PROGRESS="$cur_ac"
|
|
496
|
+
PREV_LAST_ACTION="$cur_action"
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
# Reset polling state between iterations
|
|
500
|
+
reset_poll_state() {
|
|
501
|
+
PREV_STORY=""
|
|
502
|
+
PREV_PHASE=""
|
|
503
|
+
PREV_AC_PROGRESS=""
|
|
504
|
+
PREV_LAST_ACTION=""
|
|
505
|
+
}
|
|
506
|
+
|
|
447
507
|
# ─── Progress Summary ───────────────────────────────────────────────────────
|
|
448
508
|
|
|
449
509
|
print_progress_summary() {
|
|
@@ -463,7 +523,51 @@ print_progress_summary() {
|
|
|
463
523
|
elapsed_fmt="${elapsed}s"
|
|
464
524
|
fi
|
|
465
525
|
|
|
466
|
-
|
|
526
|
+
# Read cost and failed stories from sprint-state.json (single jq call)
|
|
527
|
+
local cost=""
|
|
528
|
+
local cost_fmt=""
|
|
529
|
+
local failed_stories=""
|
|
530
|
+
if [[ -f "sprint-state.json" ]]; then
|
|
531
|
+
local state_data
|
|
532
|
+
state_data=$(jq -r '(.run.cost // 0 | tostring) + "\n" + ((.run.failed // []) | join("\n"))' "sprint-state.json" 2>/dev/null) || state_data=""
|
|
533
|
+
if [[ -n "$state_data" ]]; then
|
|
534
|
+
cost=$(head -1 <<< "$state_data")
|
|
535
|
+
failed_stories=$(tail -n +2 <<< "$state_data")
|
|
536
|
+
if [[ -n "$cost" && "$cost" != "0" && "$cost" != "null" ]]; then
|
|
537
|
+
cost_fmt=", cost: \$${cost}"
|
|
538
|
+
fi
|
|
539
|
+
fi
|
|
540
|
+
fi
|
|
541
|
+
|
|
542
|
+
log_status "INFO" "Progress: ${completed}/${total} done, ${remaining} remaining (iterations: ${loop_count}, elapsed: ${elapsed_fmt}${cost_fmt})"
|
|
543
|
+
|
|
544
|
+
# Show completed stories with ✓
|
|
545
|
+
if [[ -f "$SPRINT_STATUS_FILE" ]]; then
|
|
546
|
+
while IFS=: read -r key value; do
|
|
547
|
+
key=$(echo "$key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
548
|
+
value=$(echo "$value" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
549
|
+
[[ -z "$key" || "$key" == \#* ]] && continue
|
|
550
|
+
if [[ "$key" =~ ^[0-9]+-[0-9]+- && "$value" == "done" ]]; then
|
|
551
|
+
log_status "SUCCESS" " ✓ ${key}"
|
|
552
|
+
fi
|
|
553
|
+
done < "$SPRINT_STATUS_FILE"
|
|
554
|
+
fi
|
|
555
|
+
|
|
556
|
+
# Show failed stories with ✗ from sprint-state.json
|
|
557
|
+
if [[ -n "$failed_stories" ]]; then
|
|
558
|
+
while IFS= read -r fkey; do
|
|
559
|
+
[[ -z "$fkey" ]] && continue
|
|
560
|
+
log_status "ERROR" " ✗ ${fkey}"
|
|
561
|
+
done <<< "$failed_stories"
|
|
562
|
+
fi
|
|
563
|
+
|
|
564
|
+
# Show flagged/blocked stories with ✕
|
|
565
|
+
if [[ -f "$FLAGGED_STORIES_FILE" ]]; then
|
|
566
|
+
while IFS= read -r bkey; do
|
|
567
|
+
[[ -z "$bkey" ]] && continue
|
|
568
|
+
log_status "WARN" " ✕ ${bkey} (blocked)"
|
|
569
|
+
done < "$FLAGGED_STORIES_FILE"
|
|
570
|
+
fi
|
|
467
571
|
|
|
468
572
|
# Show the next story in line (first non-done, non-flagged)
|
|
469
573
|
if [[ -f "$SPRINT_STATUS_FILE" ]]; then
|
|
@@ -541,7 +645,7 @@ load_platform_driver() {
|
|
|
541
645
|
CLAUDE_ALLOWED_TOOLS=$(IFS=','; echo "${VALID_TOOL_PATTERNS[*]}")
|
|
542
646
|
fi
|
|
543
647
|
|
|
544
|
-
log_status "
|
|
648
|
+
log_status "DEBUG" "Platform driver: $(driver_display_name) ($(driver_cli_binary))"
|
|
545
649
|
}
|
|
546
650
|
|
|
547
651
|
# ─── Execution ───────────────────────────────────────────────────────────────
|
|
@@ -624,11 +728,13 @@ execute_iteration() {
|
|
|
624
728
|
|
|
625
729
|
log_status "DEBUG" "Background PID: $claude_pid"
|
|
626
730
|
|
|
731
|
+
reset_poll_state
|
|
627
732
|
while kill -0 $claude_pid 2>/dev/null; do
|
|
628
733
|
progress_counter=$((progress_counter + 1))
|
|
629
734
|
if [[ -f "$output_file" && -s "$output_file" ]]; then
|
|
630
735
|
cp "$output_file" "$LIVE_LOG_FILE" 2>/dev/null
|
|
631
736
|
fi
|
|
737
|
+
poll_sprint_state_progress
|
|
632
738
|
sleep 10
|
|
633
739
|
done
|
|
634
740
|
|
|
@@ -723,7 +829,8 @@ execute_iteration() {
|
|
|
723
829
|
log_status "ERROR" "Claude API usage limit reached"
|
|
724
830
|
return 2
|
|
725
831
|
# Check for transient API errors (500, 529, overloaded) — don't count against story
|
|
726
|
-
|
|
832
|
+
# Status code patterns exclude decimal prefixes (e.g., cost_usd=0.503 ≠ HTTP 503)
|
|
833
|
+
elif grep -qiE 'Internal server error|api_error|overloaded|(^|[^0-9.])529([^0-9]|$)|(^|[^0-9.])503([^0-9]|$)' "$output_file" 2>/dev/null; then
|
|
727
834
|
log_status "WARN" "Transient API error (not story's fault) — will retry"
|
|
728
835
|
return 4
|
|
729
836
|
else
|
|
@@ -792,6 +899,41 @@ The loop:
|
|
|
792
899
|
HELPEOF
|
|
793
900
|
}
|
|
794
901
|
|
|
902
|
+
# ─── Sprint Summary ──────────────────────────────────────────────────────────
|
|
903
|
+
|
|
904
|
+
# Print a compact sprint summary at startup
|
|
905
|
+
print_sprint_summary() {
|
|
906
|
+
local counts
|
|
907
|
+
counts=$(get_task_counts)
|
|
908
|
+
local total=${counts%% *}
|
|
909
|
+
local completed=${counts##* }
|
|
910
|
+
local remaining=$((total - completed))
|
|
911
|
+
|
|
912
|
+
# Find next story
|
|
913
|
+
local next_story=""
|
|
914
|
+
local next_status=""
|
|
915
|
+
if [[ -f "$SPRINT_STATUS_FILE" ]]; then
|
|
916
|
+
while IFS=: read -r key value; do
|
|
917
|
+
key=$(echo "$key" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
918
|
+
value=$(echo "$value" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
919
|
+
[[ -z "$key" || "$key" == \#* ]] && continue
|
|
920
|
+
if [[ "$key" =~ ^[0-9]+-[0-9]+- && "$value" != "done" ]]; then
|
|
921
|
+
if ! is_story_flagged "$key"; then
|
|
922
|
+
next_story="$key"
|
|
923
|
+
next_status="$value"
|
|
924
|
+
break
|
|
925
|
+
fi
|
|
926
|
+
fi
|
|
927
|
+
done < "$SPRINT_STATUS_FILE"
|
|
928
|
+
fi
|
|
929
|
+
|
|
930
|
+
if [[ -n "$next_story" ]]; then
|
|
931
|
+
log_status "INFO" "Sprint: ${completed}/${total} done, ${remaining} remaining — next: ${next_story} (${next_status})"
|
|
932
|
+
else
|
|
933
|
+
log_status "INFO" "Sprint: ${completed}/${total} done, ${remaining} remaining"
|
|
934
|
+
fi
|
|
935
|
+
}
|
|
936
|
+
|
|
795
937
|
# ─── Main ────────────────────────────────────────────────────────────────────
|
|
796
938
|
|
|
797
939
|
main() {
|
|
@@ -880,15 +1022,17 @@ main() {
|
|
|
880
1022
|
# .story_retries and .flagged_stories are file-based — they persist automatically
|
|
881
1023
|
|
|
882
1024
|
log_status "SUCCESS" "Ralph loop starting"
|
|
883
|
-
log_status "
|
|
884
|
-
log_status "
|
|
885
|
-
log_status "
|
|
886
|
-
log_status "
|
|
887
|
-
log_status "
|
|
1025
|
+
log_status "DEBUG" "Plugin: $PLUGIN_DIR"
|
|
1026
|
+
log_status "DEBUG" "Max iterations: $MAX_ITERATIONS | Timeout: $((LOOP_TIMEOUT_SECONDS / 3600))h"
|
|
1027
|
+
log_status "DEBUG" "Prompt: $PROMPT_FILE"
|
|
1028
|
+
log_status "DEBUG" "Sprint status: $SPRINT_STATUS_FILE"
|
|
1029
|
+
log_status "DEBUG" "Max story retries: $MAX_STORY_RETRIES"
|
|
888
1030
|
|
|
889
1031
|
# Record loop start time for timeout
|
|
890
1032
|
loop_start_time=$(date +%s)
|
|
891
1033
|
|
|
1034
|
+
print_sprint_summary
|
|
1035
|
+
|
|
892
1036
|
local consecutive_failures=0
|
|
893
1037
|
local max_consecutive_failures=3
|
|
894
1038
|
|
|
@@ -5,12 +5,19 @@ FROM node:20-slim
|
|
|
5
5
|
|
|
6
6
|
ARG TARBALL=package.tgz
|
|
7
7
|
|
|
8
|
-
# System utilities
|
|
8
|
+
# System utilities + Python for Semgrep
|
|
9
9
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
10
10
|
curl \
|
|
11
11
|
jq \
|
|
12
|
+
python3 \
|
|
13
|
+
python3-pip \
|
|
14
|
+
pipx \
|
|
12
15
|
&& rm -rf /var/lib/apt/lists/*
|
|
13
16
|
|
|
17
|
+
# Semgrep for static analysis verification
|
|
18
|
+
RUN pipx install semgrep && pipx ensurepath
|
|
19
|
+
ENV PATH="/root/.local/bin:${PATH}"
|
|
20
|
+
|
|
14
21
|
# Verification tools + Claude Code CLI
|
|
15
22
|
RUN npm install -g showboat @anthropic-ai/claude-code
|
|
16
23
|
|