@ai-dev-methodologies/rlp-desk 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -0
- package/docs/architecture.md +1 -1
- package/docs/protocol-reference.md +90 -3
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +97 -10
- package/src/governance.md +87 -10
- package/src/scripts/init_ralph_desk.zsh +22 -6
- package/src/scripts/run_ralph_desk.zsh +753 -124
|
@@ -7,7 +7,7 @@ set -uo pipefail
|
|
|
7
7
|
# Ralph Desk Tmux Runner
|
|
8
8
|
#
|
|
9
9
|
# Implements the Leader loop from governance.md section 7 as a shell script.
|
|
10
|
-
# Uses
|
|
10
|
+
# Uses tmux proven patterns: write-then-notify, pane IDs (%N),
|
|
11
11
|
# copy-mode guards, verification-based retry, heartbeat monitoring,
|
|
12
12
|
# idle pane nudging, exponential backoff restarts, atomic file writes.
|
|
13
13
|
#
|
|
@@ -30,6 +30,7 @@ set -uo pipefail
|
|
|
30
30
|
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
31
|
#
|
|
32
32
|
# Dependencies: tmux, claude CLI, jq
|
|
33
|
+
# Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
|
|
33
34
|
# =============================================================================
|
|
34
35
|
|
|
35
36
|
# --- Environment Variables ---
|
|
@@ -45,6 +46,17 @@ MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
|
45
46
|
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
46
47
|
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
47
48
|
|
|
49
|
+
# --- Engine Selection ---
|
|
50
|
+
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
51
|
+
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
|
|
52
|
+
CODEX_MODEL="${CODEX_MODEL:-gpt-5.4}"
|
|
53
|
+
CODEX_REASONING="${CODEX_REASONING:-high}" # low|medium|high
|
|
54
|
+
CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
55
|
+
|
|
56
|
+
# --- Verify Mode ---
|
|
57
|
+
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
58
|
+
VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
|
|
59
|
+
|
|
48
60
|
# --- Derived Paths ---
|
|
49
61
|
DESK="$ROOT/.claude/ralph-desk"
|
|
50
62
|
PROMPTS_DIR="$DESK/prompts"
|
|
@@ -80,6 +92,9 @@ CONSECUTIVE_FAILURES=0
|
|
|
80
92
|
PREV_CONTEXT_HASH=""
|
|
81
93
|
ITERATION=0
|
|
82
94
|
START_TIME=$(date +%s)
|
|
95
|
+
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
96
|
+
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
97
|
+
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
83
98
|
|
|
84
99
|
# =============================================================================
|
|
85
100
|
# Utility Functions
|
|
@@ -103,7 +118,7 @@ log_error() {
|
|
|
103
118
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
|
|
104
119
|
}
|
|
105
120
|
|
|
106
|
-
# --- governance.md s7: Atomic file writes (
|
|
121
|
+
# --- governance.md s7: Atomic file writes (tmux pattern) ---
|
|
107
122
|
# All file writes by the Leader use tmp+mv to prevent corruption.
|
|
108
123
|
atomic_write() {
|
|
109
124
|
local target="$1"
|
|
@@ -135,6 +150,19 @@ check_dependencies() {
|
|
|
135
150
|
missing=1
|
|
136
151
|
fi
|
|
137
152
|
|
|
153
|
+
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
154
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
155
|
+
if ! command -v codex >/dev/null 2>&1; then
|
|
156
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
157
|
+
log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
|
|
158
|
+
else
|
|
159
|
+
log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
|
|
160
|
+
fi
|
|
161
|
+
log_error "Install with: npm install -g @openai/codex"
|
|
162
|
+
missing=1
|
|
163
|
+
fi
|
|
164
|
+
fi
|
|
165
|
+
|
|
138
166
|
if (( missing )); then
|
|
139
167
|
exit 1
|
|
140
168
|
fi
|
|
@@ -142,6 +170,12 @@ check_dependencies() {
|
|
|
142
170
|
# Resolve full path to claude binary for reliable launches
|
|
143
171
|
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
144
172
|
log " Claude binary: $CLAUDE_BIN"
|
|
173
|
+
|
|
174
|
+
# Resolve codex binary if needed
|
|
175
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
176
|
+
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
177
|
+
log " Codex binary: $CODEX_BIN"
|
|
178
|
+
fi
|
|
145
179
|
}
|
|
146
180
|
|
|
147
181
|
# =============================================================================
|
|
@@ -180,7 +214,7 @@ validate_scaffold() {
|
|
|
180
214
|
}
|
|
181
215
|
|
|
182
216
|
# =============================================================================
|
|
183
|
-
# Session Management (
|
|
217
|
+
# Session Management (tmux pattern: pane IDs)
|
|
184
218
|
# =============================================================================
|
|
185
219
|
|
|
186
220
|
# --- governance.md s7 step 1: Check for existing sessions ---
|
|
@@ -205,7 +239,7 @@ check_existing_sessions() {
|
|
|
205
239
|
create_session() {
|
|
206
240
|
log "Creating tmux session: $SESSION_NAME"
|
|
207
241
|
|
|
208
|
-
#
|
|
242
|
+
# tmux split-pane pattern
|
|
209
243
|
if [[ -n "${TMUX:-}" ]]; then
|
|
210
244
|
# Inside tmux: split CURRENT pane in place
|
|
211
245
|
# Current pane stays as-is (leader/user stays here)
|
|
@@ -220,7 +254,7 @@ create_session() {
|
|
|
220
254
|
VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
221
255
|
else
|
|
222
256
|
# Outside tmux: wrap current terminal into a new tmux session and attach
|
|
223
|
-
#
|
|
257
|
+
# tmux pattern: user sees panes immediately, no separate attach needed
|
|
224
258
|
tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT"
|
|
225
259
|
LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
|
|
226
260
|
WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
@@ -263,7 +297,7 @@ create_session() {
|
|
|
263
297
|
}
|
|
264
298
|
|
|
265
299
|
# =============================================================================
|
|
266
|
-
# Copy-Mode Guard (
|
|
300
|
+
# Copy-Mode Guard (tmux pattern)
|
|
267
301
|
# =============================================================================
|
|
268
302
|
|
|
269
303
|
# --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
|
|
@@ -278,7 +312,7 @@ check_copy_mode() {
|
|
|
278
312
|
}
|
|
279
313
|
|
|
280
314
|
# =============================================================================
|
|
281
|
-
# Verification-Based Send Retry (
|
|
315
|
+
# Verification-Based Send Retry (tmux pattern)
|
|
282
316
|
# =============================================================================
|
|
283
317
|
|
|
284
318
|
# --- governance.md s7 step 5: Send with copy-mode guard and retry ---
|
|
@@ -286,7 +320,7 @@ safe_send_keys() {
|
|
|
286
320
|
local pane_id="$1"
|
|
287
321
|
local text="$2"
|
|
288
322
|
|
|
289
|
-
# --- Exact
|
|
323
|
+
# --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
|
|
290
324
|
|
|
291
325
|
# Guard: copy-mode captures keys; skip entirely
|
|
292
326
|
if ! check_copy_mode "$pane_id"; then
|
|
@@ -305,15 +339,24 @@ safe_send_keys() {
|
|
|
305
339
|
log_debug " Trust prompt detected, dismissing"
|
|
306
340
|
tmux send-keys -t "$pane_id" C-m
|
|
307
341
|
sleep 0.12
|
|
308
|
-
|
|
342
|
+
fi
|
|
343
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
344
|
+
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
345
|
+
log_debug " Permission prompt detected, auto-approving"
|
|
346
|
+
tmux send-keys -t "$pane_id" Enter
|
|
347
|
+
sleep 0.3
|
|
348
|
+
fi
|
|
349
|
+
# Auto-dismiss codex update prompt (select Skip)
|
|
350
|
+
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
351
|
+
log_debug " Codex update prompt detected, selecting Skip"
|
|
352
|
+
tmux send-keys -t "$pane_id" "2" Enter
|
|
309
353
|
sleep 0.2
|
|
310
354
|
fi
|
|
311
|
-
|
|
312
355
|
# Send text in literal mode with -- separator
|
|
313
356
|
log_debug " Sending text to pane $pane_id (${#text} chars)"
|
|
314
357
|
tmux send-keys -t "$pane_id" -l -- "$text"
|
|
315
358
|
|
|
316
|
-
# Allow input buffer to settle (
|
|
359
|
+
# Allow input buffer to settle (tmux: 150ms)
|
|
317
360
|
sleep 0.15
|
|
318
361
|
|
|
319
362
|
# Submit: up to 6 rounds of C-m double-press
|
|
@@ -321,7 +364,7 @@ safe_send_keys() {
|
|
|
321
364
|
while (( round < 6 )); do
|
|
322
365
|
sleep 0.1
|
|
323
366
|
if (( round == 0 && pane_busy )); then
|
|
324
|
-
# Busy pane: Tab+C-m queue semantics (
|
|
367
|
+
# Busy pane: Tab+C-m queue semantics (tmux pattern)
|
|
325
368
|
tmux send-keys -t "$pane_id" Tab
|
|
326
369
|
sleep 0.08
|
|
327
370
|
tmux send-keys -t "$pane_id" C-m
|
|
@@ -349,7 +392,7 @@ safe_send_keys() {
|
|
|
349
392
|
return 1
|
|
350
393
|
fi
|
|
351
394
|
|
|
352
|
-
# Adaptive fallback: C-u clear line, resend (
|
|
395
|
+
# Adaptive fallback: C-u clear line, resend (tmux pattern)
|
|
353
396
|
log_debug " Adaptive retry — clearing line and resending"
|
|
354
397
|
tmux send-keys -t "$pane_id" C-u
|
|
355
398
|
sleep 0.08
|
|
@@ -385,19 +428,19 @@ safe_send_keys() {
|
|
|
385
428
|
}
|
|
386
429
|
|
|
387
430
|
# =============================================================================
|
|
388
|
-
# Wait for Pane Ready (
|
|
431
|
+
# Wait for Pane Ready (tmux pattern: paneLooksReady)
|
|
389
432
|
# =============================================================================
|
|
390
433
|
|
|
391
434
|
wait_for_pane_ready() {
|
|
392
435
|
local pane_id="$1"
|
|
393
|
-
local timeout="${2:-10}" #
|
|
436
|
+
local timeout="${2:-10}" # tmux default: 10s
|
|
394
437
|
local start=$(date +%s)
|
|
395
438
|
log " Waiting for pane $pane_id ready..."
|
|
396
439
|
while (( $(date +%s) - start < timeout )); do
|
|
397
440
|
local captured
|
|
398
441
|
captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
|
|
399
442
|
|
|
400
|
-
# Auto-dismiss trust prompt (
|
|
443
|
+
# Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
|
|
401
444
|
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
402
445
|
log " Trust prompt detected, auto-dismissing..."
|
|
403
446
|
tmux send-keys -t "$pane_id" Enter
|
|
@@ -407,7 +450,23 @@ wait_for_pane_ready() {
|
|
|
407
450
|
continue
|
|
408
451
|
fi
|
|
409
452
|
|
|
410
|
-
#
|
|
453
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
454
|
+
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
455
|
+
log " Permission prompt detected, auto-approving..."
|
|
456
|
+
tmux send-keys -t "$pane_id" Enter
|
|
457
|
+
sleep 0.5
|
|
458
|
+
continue
|
|
459
|
+
fi
|
|
460
|
+
|
|
461
|
+
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
462
|
+
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
463
|
+
log " Codex update prompt detected, selecting Skip..."
|
|
464
|
+
tmux send-keys -t "$pane_id" "2" Enter
|
|
465
|
+
sleep 0.5
|
|
466
|
+
continue
|
|
467
|
+
fi
|
|
468
|
+
|
|
469
|
+
# tmux paneLooksReady: check each line for prompt char at line start
|
|
411
470
|
local ready=0
|
|
412
471
|
echo "$captured" | while IFS= read -r line; do
|
|
413
472
|
local trimmed="${line## }"
|
|
@@ -437,7 +496,7 @@ wait_for_pane_ready() {
|
|
|
437
496
|
}
|
|
438
497
|
|
|
439
498
|
# =============================================================================
|
|
440
|
-
# Heartbeat Monitoring (
|
|
499
|
+
# Heartbeat Monitoring (tmux pattern)
|
|
441
500
|
# =============================================================================
|
|
442
501
|
|
|
443
502
|
# --- governance.md s7 step 5+6: Check heartbeat freshness ---
|
|
@@ -473,7 +532,7 @@ check_heartbeat_exited() {
|
|
|
473
532
|
}
|
|
474
533
|
|
|
475
534
|
# =============================================================================
|
|
476
|
-
# Idle Pane Nudging (
|
|
535
|
+
# Idle Pane Nudging (tmux pattern)
|
|
477
536
|
# =============================================================================
|
|
478
537
|
|
|
479
538
|
# --- governance.md s7 step 5+6: Nudge idle panes ---
|
|
@@ -503,7 +562,7 @@ check_and_nudge_idle_pane() {
|
|
|
503
562
|
}
|
|
504
563
|
|
|
505
564
|
# =============================================================================
|
|
506
|
-
# Exponential Backoff Restart (
|
|
565
|
+
# Exponential Backoff Restart (tmux pattern)
|
|
507
566
|
# =============================================================================
|
|
508
567
|
|
|
509
568
|
# --- governance.md s7 step 5: Restart dead workers with backoff ---
|
|
@@ -529,14 +588,18 @@ restart_worker() {
|
|
|
529
588
|
tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
|
|
530
589
|
sleep 2
|
|
531
590
|
|
|
532
|
-
# Re-launch
|
|
533
|
-
|
|
591
|
+
# Re-launch worker (tmux interactive pattern)
|
|
592
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
593
|
+
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
594
|
+
else
|
|
595
|
+
safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
596
|
+
fi
|
|
534
597
|
WORKER_RESTARTS[$iter]=$((restart_count + 1))
|
|
535
598
|
return 0
|
|
536
599
|
}
|
|
537
600
|
|
|
538
601
|
# =============================================================================
|
|
539
|
-
# Write-Then-Notify: Trigger Script Generation (
|
|
602
|
+
# Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
|
|
540
603
|
# =============================================================================
|
|
541
604
|
|
|
542
605
|
# --- governance.md s7 step 4+5: Write prompt and trigger to files ---
|
|
@@ -575,9 +638,64 @@ write_worker_trigger() {
|
|
|
575
638
|
echo ""
|
|
576
639
|
cat "$fix_contract_file"
|
|
577
640
|
fi
|
|
641
|
+
|
|
642
|
+
# Per-US mode: tell Worker exactly which US to work on
|
|
643
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
644
|
+
# Find next unverified US
|
|
645
|
+
local next_us=""
|
|
646
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
647
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
648
|
+
next_us="$us"
|
|
649
|
+
break
|
|
650
|
+
fi
|
|
651
|
+
done
|
|
652
|
+
|
|
653
|
+
if [[ -n "$next_us" ]]; then
|
|
654
|
+
echo ""
|
|
655
|
+
echo "---"
|
|
656
|
+
echo "## PER-US SCOPE LOCK (this iteration)"
|
|
657
|
+
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
658
|
+
echo "Do NOT implement any other user stories."
|
|
659
|
+
echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
|
|
660
|
+
echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
|
|
661
|
+
elif [[ -n "$VERIFIED_US" ]]; then
|
|
662
|
+
# All individual US verified — this is the final full verify iteration
|
|
663
|
+
echo ""
|
|
664
|
+
echo "---"
|
|
665
|
+
echo "## FINAL VERIFICATION ITERATION"
|
|
666
|
+
echo "All individual US have been verified: $VERIFIED_US"
|
|
667
|
+
echo "Run all tests and verification commands to confirm everything works together."
|
|
668
|
+
echo "Signal verify with us_id=\"ALL\" for the final full verification."
|
|
669
|
+
fi
|
|
670
|
+
elif [[ "$VERIFY_MODE" = "batch" ]]; then
|
|
671
|
+
echo ""
|
|
672
|
+
echo "---"
|
|
673
|
+
echo "## BATCH MODE OVERRIDE"
|
|
674
|
+
echo "Ignore any per-US signal instructions above. In batch mode:"
|
|
675
|
+
echo "- Implement ALL user stories in this iteration"
|
|
676
|
+
echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
|
|
677
|
+
echo "- Do NOT signal verify after individual stories"
|
|
678
|
+
fi
|
|
578
679
|
} | atomic_write "$prompt_file"
|
|
579
680
|
|
|
580
681
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
682
|
+
# Engine-specific launch command (expanded at write time)
|
|
683
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
684
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $CODEX_MODEL \\
|
|
685
|
+
-c model_reasoning_effort=\"$CODEX_REASONING\" \\
|
|
686
|
+
--dangerously-bypass-approvals-and-sandbox \\
|
|
687
|
+
\"\$(cat $prompt_file)\" \\
|
|
688
|
+
2>&1 | tee $output_log"
|
|
689
|
+
local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
|
|
690
|
+
else
|
|
691
|
+
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
692
|
+
--model $WORKER_MODEL \\
|
|
693
|
+
--dangerously-skip-permissions \\
|
|
694
|
+
--output-format text \\
|
|
695
|
+
2>&1 | tee $output_log"
|
|
696
|
+
local engine_comment="# Run claude with fresh context (governance.md s7 step 5)"
|
|
697
|
+
fi
|
|
698
|
+
|
|
581
699
|
{
|
|
582
700
|
cat <<TRIGGER_EOF
|
|
583
701
|
#!/bin/zsh
|
|
@@ -586,7 +704,7 @@ write_worker_trigger() {
|
|
|
586
704
|
|
|
587
705
|
HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
588
706
|
|
|
589
|
-
# Background heartbeat writer (
|
|
707
|
+
# Background heartbeat writer (tmux pattern)
|
|
590
708
|
(
|
|
591
709
|
while true; do
|
|
592
710
|
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
@@ -596,12 +714,8 @@ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
|
596
714
|
) &
|
|
597
715
|
HEARTBEAT_PID=\$!
|
|
598
716
|
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
--model $WORKER_MODEL \\
|
|
602
|
-
--dangerously-skip-permissions \\
|
|
603
|
-
--output-format text \\
|
|
604
|
-
2>&1 | tee $output_log
|
|
717
|
+
$engine_comment
|
|
718
|
+
$engine_cmd
|
|
605
719
|
|
|
606
720
|
# Cleanup heartbeat writer
|
|
607
721
|
kill \$HEARTBEAT_PID 2>/dev/null
|
|
@@ -618,11 +732,20 @@ TRIGGER_EOF
|
|
|
618
732
|
|
|
619
733
|
write_verifier_trigger() {
|
|
620
734
|
local iter="$1"
|
|
621
|
-
local
|
|
622
|
-
local
|
|
623
|
-
local
|
|
735
|
+
local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
|
|
736
|
+
local verifier_model="${3:-$VERIFIER_MODEL}"
|
|
737
|
+
local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
|
|
738
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
739
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
740
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
|
|
741
|
+
|
|
742
|
+
# Read us_id from iter-signal.json for per-US scoping
|
|
743
|
+
local us_id=""
|
|
744
|
+
if [[ -f "$SIGNAL_FILE" ]]; then
|
|
745
|
+
us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
746
|
+
fi
|
|
624
747
|
|
|
625
|
-
# Build verifier prompt from base
|
|
748
|
+
# Build verifier prompt from base with US scope
|
|
626
749
|
{
|
|
627
750
|
cat "$VERIFIER_PROMPT_BASE"
|
|
628
751
|
echo ""
|
|
@@ -630,18 +753,45 @@ write_verifier_trigger() {
|
|
|
630
753
|
echo "## Verification Context"
|
|
631
754
|
echo "- **Iteration**: $iter"
|
|
632
755
|
echo "- **Done Claim**: $DONE_CLAIM_FILE"
|
|
756
|
+
echo "- **Verify Mode**: $VERIFY_MODE"
|
|
757
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$us_id" ]]; then
|
|
758
|
+
if [[ "$us_id" = "ALL" ]]; then
|
|
759
|
+
echo "- **Scope**: FINAL FULL VERIFY — check ALL acceptance criteria from the PRD"
|
|
760
|
+
echo "- **Previously verified US**: $VERIFIED_US"
|
|
761
|
+
else
|
|
762
|
+
echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
|
|
763
|
+
echo "- **Previously verified US**: $VERIFIED_US"
|
|
764
|
+
fi
|
|
765
|
+
fi
|
|
633
766
|
} | atomic_write "$prompt_file"
|
|
634
767
|
|
|
635
768
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
769
|
+
# Engine-specific launch command (expanded at write time)
|
|
770
|
+
if [[ "$verifier_engine" = "codex" ]]; then
|
|
771
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $CODEX_MODEL \\
|
|
772
|
+
-c model_reasoning_effort=\"$CODEX_REASONING\" \\
|
|
773
|
+
--dangerously-bypass-approvals-and-sandbox \\
|
|
774
|
+
\"\$(cat $prompt_file)\" \\
|
|
775
|
+
2>&1 | tee $output_log"
|
|
776
|
+
local engine_comment="# Run codex with fresh context (governance.md s7 step 7)"
|
|
777
|
+
else
|
|
778
|
+
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
779
|
+
--model $verifier_model \\
|
|
780
|
+
--dangerously-skip-permissions \\
|
|
781
|
+
--output-format text \\
|
|
782
|
+
2>&1 | tee $output_log"
|
|
783
|
+
local engine_comment="# Run claude with fresh context (governance.md s7 step 7)"
|
|
784
|
+
fi
|
|
785
|
+
|
|
636
786
|
{
|
|
637
787
|
cat <<TRIGGER_EOF
|
|
638
788
|
#!/bin/zsh
|
|
639
|
-
# Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
|
|
789
|
+
# Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
|
|
640
790
|
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
641
791
|
|
|
642
792
|
HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
643
793
|
|
|
644
|
-
# Background heartbeat writer (
|
|
794
|
+
# Background heartbeat writer (tmux pattern)
|
|
645
795
|
(
|
|
646
796
|
while true; do
|
|
647
797
|
echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
|
|
@@ -651,12 +801,8 @@ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
|
651
801
|
) &
|
|
652
802
|
HEARTBEAT_PID=\$!
|
|
653
803
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
--model $VERIFIER_MODEL \\
|
|
657
|
-
--dangerously-skip-permissions \\
|
|
658
|
-
--output-format text \\
|
|
659
|
-
2>&1 | tee $output_log
|
|
804
|
+
$engine_comment
|
|
805
|
+
$engine_cmd
|
|
660
806
|
|
|
661
807
|
# Cleanup heartbeat writer
|
|
662
808
|
kill \$HEARTBEAT_PID 2>/dev/null
|
|
@@ -680,6 +826,21 @@ update_status() {
|
|
|
680
826
|
local phase="$1"
|
|
681
827
|
local last_result="$2"
|
|
682
828
|
|
|
829
|
+
# Build verified_us as JSON array
|
|
830
|
+
local verified_us_json="[]"
|
|
831
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
832
|
+
verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
|
|
833
|
+
fi
|
|
834
|
+
|
|
835
|
+
# Build consensus fields
|
|
836
|
+
local consensus_json=""
|
|
837
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
838
|
+
consensus_json=',
|
|
839
|
+
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
840
|
+
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
841
|
+
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
842
|
+
fi
|
|
843
|
+
|
|
683
844
|
echo '{
|
|
684
845
|
"slug": "'"$SLUG"'",
|
|
685
846
|
"iteration": '"$ITERATION"',
|
|
@@ -687,8 +848,13 @@ update_status() {
|
|
|
687
848
|
"phase": "'"$phase"'",
|
|
688
849
|
"worker_model": "'"$WORKER_MODEL"'",
|
|
689
850
|
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
851
|
+
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
852
|
+
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
853
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
854
|
+
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
690
855
|
"last_result": "'"$last_result"'",
|
|
691
856
|
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
857
|
+
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
692
858
|
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
693
859
|
}' | atomic_write "$STATUS_FILE"
|
|
694
860
|
}
|
|
@@ -753,6 +919,9 @@ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
|
|
|
753
919
|
cleanup() {
|
|
754
920
|
log "Cleaning up..."
|
|
755
921
|
|
|
922
|
+
# Remove lockfile
|
|
923
|
+
rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
|
|
924
|
+
|
|
756
925
|
# Kill claude processes then kill panes
|
|
757
926
|
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
758
927
|
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
@@ -764,10 +933,14 @@ cleanup() {
|
|
|
764
933
|
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
765
934
|
fi
|
|
766
935
|
sleep 2
|
|
767
|
-
# Kill
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
936
|
+
# Kill panes on completion
|
|
937
|
+
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
938
|
+
tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
|
|
939
|
+
fi
|
|
940
|
+
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
941
|
+
tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
|
|
942
|
+
fi
|
|
943
|
+
log " Panes cleaned up."
|
|
771
944
|
|
|
772
945
|
# Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
|
|
773
946
|
setopt local_options nonomatch 2>/dev/null
|
|
@@ -780,6 +953,70 @@ cleanup() {
|
|
|
780
953
|
local minutes=$(( elapsed / 60 ))
|
|
781
954
|
local seconds=$(( elapsed % 60 ))
|
|
782
955
|
|
|
956
|
+
local final_status="UNKNOWN"
|
|
957
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
958
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
959
|
+
else final_status="TIMEOUT"; fi
|
|
960
|
+
|
|
961
|
+
if (( DEBUG )); then
|
|
962
|
+
local end_ts=$(date +%s)
|
|
963
|
+
local elapsed=$((end_ts - START_TIME))
|
|
964
|
+
|
|
965
|
+
log_debug "[EXEC] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
|
|
966
|
+
|
|
967
|
+
# --- Validation ---
|
|
968
|
+
log_debug "[VALIDATE] === Execution Validation ==="
|
|
969
|
+
|
|
970
|
+
# 1. Did the correct verify mode run?
|
|
971
|
+
log_debug "[VALIDATE] verify_mode=$VERIFY_MODE configured=true"
|
|
972
|
+
|
|
973
|
+
# 2. Per-US: were all US individually verified?
|
|
974
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
975
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
976
|
+
local expected_us=""
|
|
977
|
+
if [[ -f "$prd_file" ]]; then
|
|
978
|
+
expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
979
|
+
fi
|
|
980
|
+
local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
981
|
+
local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
982
|
+
|
|
983
|
+
if [[ "$final_status" = "COMPLETE" ]]; then
|
|
984
|
+
if (( verified_count >= expected_count )); then
|
|
985
|
+
log_debug "[VALIDATE] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
|
|
986
|
+
else
|
|
987
|
+
log_debug "[VALIDATE] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
|
|
988
|
+
fi
|
|
989
|
+
else
|
|
990
|
+
log_debug "[VALIDATE] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
|
|
991
|
+
fi
|
|
992
|
+
fi
|
|
993
|
+
|
|
994
|
+
# 3. Consensus: were both engines used?
|
|
995
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
996
|
+
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
997
|
+
log_debug "[VALIDATE] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
998
|
+
else
|
|
999
|
+
log_debug "[VALIDATE] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1000
|
+
fi
|
|
1001
|
+
fi
|
|
1002
|
+
|
|
1003
|
+
# 4. Engine match: did the configured engines actually run?
|
|
1004
|
+
local worker_dispatches=$(grep -c '\[EXEC\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1005
|
+
local verifier_dispatches=$(grep -c '\[EXEC\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1006
|
+
log_debug "[VALIDATE] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
|
|
1007
|
+
|
|
1008
|
+
# 5. Fix loops: how many fix contracts were generated?
|
|
1009
|
+
local fix_count=$(grep -c '\[EXEC\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1010
|
+
log_debug "[VALIDATE] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
|
|
1011
|
+
|
|
1012
|
+
# 6. Circuit breakers: any triggered?
|
|
1013
|
+
local cb_count=$(grep -c '\[EXEC\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1014
|
+
log_debug "[VALIDATE] circuit_breakers_triggered=$cb_count"
|
|
1015
|
+
|
|
1016
|
+
# 7. Overall result
|
|
1017
|
+
log_debug "[VALIDATE] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
|
|
1018
|
+
fi
|
|
1019
|
+
|
|
783
1020
|
echo ""
|
|
784
1021
|
echo "============================================================"
|
|
785
1022
|
echo " Ralph Desk Tmux Runner - Session Complete"
|
|
@@ -841,7 +1078,7 @@ poll_for_signal() {
|
|
|
841
1078
|
return 0 # success
|
|
842
1079
|
fi
|
|
843
1080
|
|
|
844
|
-
# Check heartbeat freshness (
|
|
1081
|
+
# Check heartbeat freshness (tmux pattern)
|
|
845
1082
|
if [[ -f "$heartbeat_file" ]]; then
|
|
846
1083
|
if check_heartbeat_exited "$heartbeat_file"; then
|
|
847
1084
|
# Process exited but no signal file -- give a brief grace period
|
|
@@ -870,6 +1107,7 @@ poll_for_signal() {
|
|
|
870
1107
|
(( HEARTBEAT_STALE_COUNT++ ))
|
|
871
1108
|
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
872
1109
|
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
1110
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
|
|
873
1111
|
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
874
1112
|
return 1
|
|
875
1113
|
fi
|
|
@@ -887,7 +1125,17 @@ poll_for_signal() {
|
|
|
887
1125
|
fi
|
|
888
1126
|
fi
|
|
889
1127
|
|
|
890
|
-
#
|
|
1128
|
+
# Auto-approve permission prompts during poll
|
|
1129
|
+
local poll_capture
|
|
1130
|
+
poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
1131
|
+
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1132
|
+
log " Permission prompt detected during poll, auto-approving..."
|
|
1133
|
+
log_debug "[EXEC] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1134
|
+
tmux send-keys -t "$pane_id" Enter
|
|
1135
|
+
sleep 0.5
|
|
1136
|
+
fi
|
|
1137
|
+
|
|
1138
|
+
# Idle pane nudging (tmux pattern)
|
|
891
1139
|
check_and_nudge_idle_pane "$pane_id" "nudge_count"
|
|
892
1140
|
|
|
893
1141
|
sleep "$POLL_INTERVAL"
|
|
@@ -926,6 +1174,218 @@ check_stale_context() {
|
|
|
926
1174
|
return 0
|
|
927
1175
|
}
|
|
928
1176
|
|
|
1177
|
+
# =============================================================================
|
|
1178
|
+
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
1179
|
+
# =============================================================================
|
|
1180
|
+
|
|
1181
|
+
# --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
|
|
1182
|
+
run_single_verifier() {
|
|
1183
|
+
local iter="$1"
|
|
1184
|
+
local engine="$2" # claude|codex
|
|
1185
|
+
local model="$3" # model for this verifier
|
|
1186
|
+
local suffix="$4" # "-claude" or "-codex"
|
|
1187
|
+
local verdict_dest="$5" # where to copy the verdict file
|
|
1188
|
+
|
|
1189
|
+
# Write trigger for this engine
|
|
1190
|
+
write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
|
|
1191
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1192
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1193
|
+
|
|
1194
|
+
# Clean previous Verifier session
|
|
1195
|
+
local verifier_cmd
|
|
1196
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1197
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1198
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1199
|
+
sleep 0.5
|
|
1200
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1201
|
+
sleep 2
|
|
1202
|
+
fi
|
|
1203
|
+
# Always ensure clean shell state before launching new verifier
|
|
1204
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1205
|
+
# Clear pane to avoid residual text interference
|
|
1206
|
+
tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
|
|
1207
|
+
sleep 0.5
|
|
1208
|
+
|
|
1209
|
+
# Remove previous verdict file
|
|
1210
|
+
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
1211
|
+
|
|
1212
|
+
# Launch verifier
|
|
1213
|
+
if [[ "$engine" = "codex" ]]; then
|
|
1214
|
+
# Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
|
|
1215
|
+
local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1216
|
+
log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
|
|
1217
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
|
|
1218
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1219
|
+
log_debug "Verifier$suffix codex exec sent directly"
|
|
1220
|
+
else
|
|
1221
|
+
# Claude: use interactive TUI
|
|
1222
|
+
local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
|
|
1223
|
+
log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
|
|
1224
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1225
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1226
|
+
|
|
1227
|
+
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1228
|
+
log_error "Verifier$suffix failed to start"
|
|
1229
|
+
return 1
|
|
1230
|
+
fi
|
|
1231
|
+
|
|
1232
|
+
sleep 3
|
|
1233
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
1234
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1235
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1236
|
+
log_debug "Verifier$suffix instruction sent directly"
|
|
1237
|
+
|
|
1238
|
+
# Verify claude actually started working
|
|
1239
|
+
local v_submit=0
|
|
1240
|
+
while (( v_submit < 10 )); do
|
|
1241
|
+
sleep 2
|
|
1242
|
+
local v_check
|
|
1243
|
+
v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1244
|
+
if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
|
|
1245
|
+
log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
|
|
1246
|
+
break
|
|
1247
|
+
fi
|
|
1248
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1249
|
+
sleep 0.3
|
|
1250
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1251
|
+
(( v_submit++ ))
|
|
1252
|
+
done
|
|
1253
|
+
fi
|
|
1254
|
+
|
|
1255
|
+
# Poll for verdict
|
|
1256
|
+
if [[ "$engine" = "codex" ]]; then
|
|
1257
|
+
# Codex exec: simple file poll (non-interactive, no heartbeat/nudge needed)
|
|
1258
|
+
log " Polling for verify-verdict.json ($suffix, codex exec)..."
|
|
1259
|
+
local codex_poll_start
|
|
1260
|
+
codex_poll_start=$(date +%s)
|
|
1261
|
+
while true; do
|
|
1262
|
+
if [[ -f "$VERDICT_FILE" ]]; then
|
|
1263
|
+
# Validate JSON
|
|
1264
|
+
if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
|
|
1265
|
+
log " Verdict file detected: $VERDICT_FILE"
|
|
1266
|
+
break
|
|
1267
|
+
fi
|
|
1268
|
+
fi
|
|
1269
|
+
local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
|
|
1270
|
+
if (( codex_elapsed >= ITER_TIMEOUT )); then
|
|
1271
|
+
log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
|
|
1272
|
+
return 1
|
|
1273
|
+
fi
|
|
1274
|
+
sleep "$POLL_INTERVAL"
|
|
1275
|
+
done
|
|
1276
|
+
else
|
|
1277
|
+
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
1278
|
+
log " Polling for verify-verdict.json ($suffix)..."
|
|
1279
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
1280
|
+
log_error "Verifier$suffix poll failed"
|
|
1281
|
+
return 1
|
|
1282
|
+
fi
|
|
1283
|
+
fi
|
|
1284
|
+
|
|
1285
|
+
# Copy verdict to destination
|
|
1286
|
+
cp "$VERDICT_FILE" "$verdict_dest"
|
|
1287
|
+
log " Verifier$suffix verdict saved to $verdict_dest"
|
|
1288
|
+
return 0
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
1292
|
+
run_consensus_verification() {
|
|
1293
|
+
local iter="$1"
|
|
1294
|
+
local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
|
|
1295
|
+
local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
|
|
1296
|
+
|
|
1297
|
+
CONSENSUS_ROUND=0
|
|
1298
|
+
CLAUDE_VERDICT=""
|
|
1299
|
+
CODEX_VERDICT=""
|
|
1300
|
+
|
|
1301
|
+
while (( CONSENSUS_ROUND < 3 )); do
|
|
1302
|
+
(( CONSENSUS_ROUND++ ))
|
|
1303
|
+
log " Consensus round $CONSENSUS_ROUND/3..."
|
|
1304
|
+
|
|
1305
|
+
# Run claude verifier first
|
|
1306
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1307
|
+
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1308
|
+
return 1
|
|
1309
|
+
fi
|
|
1310
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1311
|
+
|
|
1312
|
+
# Run codex verifier second
|
|
1313
|
+
if ! run_single_verifier "$iter" "codex" "$CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1314
|
+
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1315
|
+
return 1
|
|
1316
|
+
fi
|
|
1317
|
+
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1318
|
+
|
|
1319
|
+
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
1320
|
+
local _combined_action="retry"
|
|
1321
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
|
|
1322
|
+
elif (( CONSENSUS_ROUND >= 3 )); then _combined_action="blocked"
|
|
1323
|
+
fi
|
|
1324
|
+
log_debug "[EXEC] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
|
|
1325
|
+
|
|
1326
|
+
# Both pass → success
|
|
1327
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
1328
|
+
# Merge verdicts: use claude verdict as primary, note codex agreement
|
|
1329
|
+
cp "$claude_verdict_file" "$VERDICT_FILE"
|
|
1330
|
+
return 0
|
|
1331
|
+
fi
|
|
1332
|
+
|
|
1333
|
+
# Either fails → build combined fix contract
|
|
1334
|
+
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
1335
|
+
{
|
|
1336
|
+
echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
|
|
1337
|
+
echo ""
|
|
1338
|
+
echo "## Claude Verdict: $CLAUDE_VERDICT"
|
|
1339
|
+
if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1340
|
+
echo "### Claude Issues"
|
|
1341
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1342
|
+
fi
|
|
1343
|
+
echo ""
|
|
1344
|
+
echo "## Codex Verdict: $CODEX_VERDICT"
|
|
1345
|
+
if [[ "$CODEX_VERDICT" = "fail" ]]; then
|
|
1346
|
+
echo "### Codex Issues"
|
|
1347
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1348
|
+
fi
|
|
1349
|
+
echo ""
|
|
1350
|
+
echo "## Traceability"
|
|
1351
|
+
echo "Only changes that resolve a listed issue are allowed."
|
|
1352
|
+
} | atomic_write "$fix_contract"
|
|
1353
|
+
|
|
1354
|
+
log " Combined fix contract: $fix_contract"
|
|
1355
|
+
|
|
1356
|
+
# If this is not the last round, the caller will dispatch the Worker with the fix contract
|
|
1357
|
+
# For now, write a fail verdict so the main loop can handle the fix loop
|
|
1358
|
+
if (( CONSENSUS_ROUND < 3 )); then
|
|
1359
|
+
# Create a merged fail verdict for the main loop
|
|
1360
|
+
{
|
|
1361
|
+
echo '{'
|
|
1362
|
+
echo ' "verdict": "fail",'
|
|
1363
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1364
|
+
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/3): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1365
|
+
echo ' "issues": [],'
|
|
1366
|
+
echo ' "recommended_state_transition": "continue",'
|
|
1367
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
|
|
1368
|
+
echo '}'
|
|
1369
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1370
|
+
return 2 # special return: consensus disagreement, needs retry
|
|
1371
|
+
fi
|
|
1372
|
+
done
|
|
1373
|
+
|
|
1374
|
+
# Max consensus rounds exceeded
|
|
1375
|
+
log_error "Consensus failed after 3 rounds"
|
|
1376
|
+
{
|
|
1377
|
+
echo '{'
|
|
1378
|
+
echo ' "verdict": "fail",'
|
|
1379
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1380
|
+
echo ' "summary": "Consensus failed after 3 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1381
|
+
echo ' "issues": [],'
|
|
1382
|
+
echo ' "recommended_state_transition": "blocked",'
|
|
1383
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 3 }'
|
|
1384
|
+
echo '}'
|
|
1385
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1386
|
+
return 1
|
|
1387
|
+
}
|
|
1388
|
+
|
|
929
1389
|
# =============================================================================
|
|
930
1390
|
# Security Warning
|
|
931
1391
|
# =============================================================================
|
|
@@ -947,6 +1407,21 @@ print_security_warning() {
|
|
|
947
1407
|
# =============================================================================
|
|
948
1408
|
|
|
949
1409
|
main() {
|
|
1410
|
+
# --- Lockfile: prevent duplicate execution ---
|
|
1411
|
+
local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
|
|
1412
|
+
mkdir -p "$(dirname "$lockfile")" 2>/dev/null
|
|
1413
|
+
if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
|
|
1414
|
+
local lock_pid
|
|
1415
|
+
lock_pid=$(cat "$lockfile" 2>/dev/null)
|
|
1416
|
+
if kill -0 "$lock_pid" 2>/dev/null; then
|
|
1417
|
+
log_error "Another instance is already running (PID $lock_pid)"
|
|
1418
|
+
exit 1
|
|
1419
|
+
fi
|
|
1420
|
+
# Stale lock — overwrite
|
|
1421
|
+
echo $$ > "$lockfile"
|
|
1422
|
+
fi
|
|
1423
|
+
mkdir -p "$LOGS_DIR" 2>/dev/null
|
|
1424
|
+
|
|
950
1425
|
# --- Startup ---
|
|
951
1426
|
log "Ralph Desk Tmux Runner starting..."
|
|
952
1427
|
log " Slug: $SLUG"
|
|
@@ -954,8 +1429,49 @@ main() {
|
|
|
954
1429
|
log " Max iterations: $MAX_ITER"
|
|
955
1430
|
log " Worker model: $WORKER_MODEL"
|
|
956
1431
|
log " Verifier model: $VERIFIER_MODEL"
|
|
1432
|
+
log " Verify mode: $VERIFY_MODE"
|
|
1433
|
+
log " Verify consensus:$VERIFY_CONSENSUS"
|
|
957
1434
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
958
1435
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
1436
|
+
# --- Debug: Log execution plan ---
|
|
1437
|
+
if (( DEBUG )); then
|
|
1438
|
+
# Extract US IDs from PRD
|
|
1439
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1440
|
+
local us_list=""
|
|
1441
|
+
if [[ -f "$prd_file" ]]; then
|
|
1442
|
+
us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1443
|
+
fi
|
|
1444
|
+
local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
|
|
1445
|
+
|
|
1446
|
+
log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1447
|
+
log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1448
|
+
log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1449
|
+
log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS max_iter=$MAX_ITER"
|
|
1450
|
+
|
|
1451
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1452
|
+
# Build expected flow
|
|
1453
|
+
local expected_flow=""
|
|
1454
|
+
for us in $(echo "$us_list" | tr ',' ' '); do
|
|
1455
|
+
expected_flow="${expected_flow}worker->verify($us)->"
|
|
1456
|
+
done
|
|
1457
|
+
expected_flow="${expected_flow}verify(ALL)->COMPLETE"
|
|
1458
|
+
log_debug "[PLAN] expected_flow=$expected_flow"
|
|
1459
|
+
else
|
|
1460
|
+
log_debug "[PLAN] expected_flow=worker(all)->verify(ALL)->COMPLETE"
|
|
1461
|
+
fi
|
|
1462
|
+
|
|
1463
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1464
|
+
log_debug "[PLAN] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
|
|
1465
|
+
fi
|
|
1466
|
+
fi
|
|
1467
|
+
|
|
1468
|
+
# Extract US list for per-US sequencing
|
|
1469
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1470
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1471
|
+
if [[ -f "$prd_file" ]]; then
|
|
1472
|
+
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1473
|
+
fi
|
|
1474
|
+
fi
|
|
959
1475
|
|
|
960
1476
|
# Dependency checks
|
|
961
1477
|
check_dependencies
|
|
@@ -982,6 +1498,9 @@ main() {
|
|
|
982
1498
|
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
983
1499
|
log ""
|
|
984
1500
|
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
1501
|
+
local _iter_contract=""
|
|
1502
|
+
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
1503
|
+
log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
985
1504
|
|
|
986
1505
|
# --- governance.md s7 step 1: Check sentinels ---
|
|
987
1506
|
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
@@ -1021,14 +1540,21 @@ main() {
|
|
|
1021
1540
|
|
|
1022
1541
|
update_status "worker" "running"
|
|
1023
1542
|
|
|
1024
|
-
# --- governance.md s7 step 5: Execute Worker (interactive
|
|
1025
|
-
# Step 5a: Launch interactive
|
|
1026
|
-
local worker_launch
|
|
1027
|
-
|
|
1543
|
+
# --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
|
|
1544
|
+
# Step 5a: Launch interactive worker engine in Worker pane
|
|
1545
|
+
local worker_launch
|
|
1546
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1547
|
+
worker_launch="${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1548
|
+
log " Launching Worker codex in pane $WORKER_PANE..."
|
|
1549
|
+
else
|
|
1550
|
+
worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
1551
|
+
log " Launching Worker claude in pane $WORKER_PANE..."
|
|
1552
|
+
fi
|
|
1028
1553
|
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
|
|
1029
1554
|
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1555
|
+
log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
1030
1556
|
|
|
1031
|
-
# Step 5b: Wait for claude TUI to be ready (
|
|
1557
|
+
# Step 5b: Wait for claude TUI to be ready (tmux pattern)
|
|
1032
1558
|
if ! wait_for_pane_ready "$WORKER_PANE" 30; then
|
|
1033
1559
|
log_error "Worker claude failed to start"
|
|
1034
1560
|
write_blocked_sentinel "Worker claude failed to start in pane"
|
|
@@ -1036,39 +1562,76 @@ main() {
|
|
|
1036
1562
|
return 1
|
|
1037
1563
|
fi
|
|
1038
1564
|
|
|
1039
|
-
# Step 5c: Wait for claude to fully initialize, then send instruction
|
|
1565
|
+
# Step 5c: Wait for claude to fully initialize, then send instruction directly
|
|
1040
1566
|
sleep 3
|
|
1041
1567
|
local worker_instruction="Read and execute the instructions in $worker_prompt"
|
|
1042
|
-
|
|
1043
|
-
|
|
1568
|
+
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1569
|
+
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1570
|
+
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
1571
|
+
|
|
1572
|
+
# Verify claude actually started working — keep sending C-m until activity detected
|
|
1573
|
+
local submit_attempts=0
|
|
1574
|
+
while (( submit_attempts < 10 )); do
|
|
1575
|
+
sleep 2
|
|
1576
|
+
local pane_check
|
|
1577
|
+
pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
|
|
1578
|
+
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1579
|
+
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
1580
|
+
log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
1581
|
+
break
|
|
1582
|
+
fi
|
|
1583
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1584
|
+
sleep 0.3
|
|
1585
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1586
|
+
(( submit_attempts++ ))
|
|
1587
|
+
done
|
|
1588
|
+
if (( submit_attempts >= 10 )); then
|
|
1589
|
+
log " WARNING: Could not confirm Worker started working after 10 attempts"
|
|
1590
|
+
log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=10"
|
|
1044
1591
|
fi
|
|
1045
|
-
# Extra C-m to ensure submission (long text may false-positive the consumed check)
|
|
1046
|
-
sleep 0.5
|
|
1047
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1048
|
-
sleep 0.3
|
|
1049
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1050
1592
|
|
|
1051
1593
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
1052
1594
|
log " Polling for iter-signal.json..."
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1595
|
+
local worker_poll_done=0
|
|
1596
|
+
while (( ! worker_poll_done )); do
|
|
1597
|
+
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
1598
|
+
worker_poll_done=1
|
|
1599
|
+
log_debug "[EXEC] iter=$ITERATION poll_signal_received=true"
|
|
1600
|
+
else
|
|
1601
|
+
# Check if Worker is still actively running (not stuck)
|
|
1602
|
+
local worker_cmd
|
|
1603
|
+
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1604
|
+
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
1605
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll..."
|
|
1606
|
+
log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd"
|
|
1607
|
+
log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
1608
|
+
update_status "worker" "slow"
|
|
1609
|
+
# Loop continues — re-poll same iteration
|
|
1610
|
+
else
|
|
1611
|
+
# Worker is truly dead/stuck
|
|
1612
|
+
(( MONITOR_FAILURE_COUNT++ ))
|
|
1613
|
+
log_debug "[EXEC] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
|
|
1614
|
+
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
1615
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
|
|
1616
|
+
write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
|
|
1617
|
+
update_status "blocked" "monitor_failures"
|
|
1618
|
+
return 1
|
|
1619
|
+
fi
|
|
1620
|
+
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
|
|
1621
|
+
update_status "worker" "poll_failed"
|
|
1622
|
+
worker_poll_done=1 # exit poll loop, continue to next iteration
|
|
1623
|
+
log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
|
|
1624
|
+
# Kill dead worker session so next iteration starts fresh
|
|
1625
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1626
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
|
|
1627
|
+
sleep 1
|
|
1628
|
+
fi
|
|
1069
1629
|
fi
|
|
1070
|
-
|
|
1071
|
-
|
|
1630
|
+
done
|
|
1631
|
+
|
|
1632
|
+
if [[ ! -f "$SIGNAL_FILE" ]]; then
|
|
1633
|
+
log_debug "[EXEC] iter=$ITERATION no_signal_after_poll=true continuing"
|
|
1634
|
+
# No signal — monitor failure, go to next iteration
|
|
1072
1635
|
continue
|
|
1073
1636
|
fi
|
|
1074
1637
|
|
|
@@ -1083,6 +1646,11 @@ main() {
|
|
|
1083
1646
|
|
|
1084
1647
|
log " Worker signal: status=$signal_status summary=\"$signal_summary\""
|
|
1085
1648
|
|
|
1649
|
+
# Read us_id early for EXEC logging (also used later in verify branch)
|
|
1650
|
+
local signal_us_id_early=""
|
|
1651
|
+
signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1652
|
+
log_debug "[EXEC] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
|
|
1653
|
+
|
|
1086
1654
|
case "$signal_status" in
|
|
1087
1655
|
continue)
|
|
1088
1656
|
# --- governance.md s7 step 6: continue -> go to step 8 ---
|
|
@@ -1091,52 +1659,94 @@ main() {
|
|
|
1091
1659
|
;;
|
|
1092
1660
|
verify)
|
|
1093
1661
|
# --- governance.md s7 step 7: Execute Verifier ---
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1662
|
+
# Read us_id from signal for per-US scoping
|
|
1663
|
+
local signal_us_id=""
|
|
1664
|
+
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1665
|
+
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
1098
1666
|
|
|
1099
1667
|
update_status "verifier" "running"
|
|
1100
1668
|
|
|
1101
|
-
#
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1669
|
+
# --- Consensus vs single verification ---
|
|
1670
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1671
|
+
# US-004: Run consensus verification (claude + codex sequentially)
|
|
1672
|
+
local consensus_rc=0
|
|
1673
|
+
run_consensus_verification "$ITERATION" || consensus_rc=$?
|
|
1674
|
+
|
|
1675
|
+
if (( consensus_rc == 2 )); then
|
|
1676
|
+
# Consensus disagreement — treat as fail, fix loop will handle
|
|
1677
|
+
log " Consensus disagreement, treating as fail."
|
|
1678
|
+
elif (( consensus_rc != 0 )); then
|
|
1679
|
+
# Consensus verification failed entirely
|
|
1680
|
+
log_error "Consensus verification failed"
|
|
1681
|
+
write_blocked_sentinel "Consensus verification failed after max rounds"
|
|
1682
|
+
update_status "blocked" "consensus_failed"
|
|
1683
|
+
return 1
|
|
1684
|
+
fi
|
|
1685
|
+
else
|
|
1686
|
+
# Standard single-engine verification
|
|
1687
|
+
write_verifier_trigger "$ITERATION"
|
|
1688
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
1689
|
+
|
|
1690
|
+
# Step 7a: Clean previous Verifier session if running
|
|
1691
|
+
local verifier_cmd
|
|
1692
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1693
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1694
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1695
|
+
sleep 0.5
|
|
1696
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1697
|
+
sleep 2
|
|
1698
|
+
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
1699
|
+
fi
|
|
1700
|
+
|
|
1701
|
+
local verifier_launch
|
|
1702
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1703
|
+
verifier_launch="${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1704
|
+
log " Launching Verifier codex in pane $VERIFIER_PANE..."
|
|
1705
|
+
else
|
|
1706
|
+
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1707
|
+
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
1708
|
+
fi
|
|
1709
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1710
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1711
|
+
log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
1712
|
+
|
|
1713
|
+
# Step 7b: Wait for TUI to be ready
|
|
1714
|
+
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1715
|
+
log_error "Verifier failed to start"
|
|
1716
|
+
update_status "verifier" "start_failed"
|
|
1717
|
+
continue
|
|
1718
|
+
fi
|
|
1719
|
+
|
|
1720
|
+
# Step 7c: Send instruction
|
|
1721
|
+
sleep 3
|
|
1722
|
+
local verifier_instruction="Read and execute the instructions in $verifier_prompt"
|
|
1723
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1724
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1725
|
+
log_debug "Verifier instruction sent directly"
|
|
1726
|
+
|
|
1727
|
+
# Verify verifier actually started working
|
|
1728
|
+
local vs_submit=0
|
|
1729
|
+
while (( vs_submit < 10 )); do
|
|
1730
|
+
sleep 2
|
|
1731
|
+
local vs_check
|
|
1732
|
+
vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1733
|
+
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1734
|
+
log_debug "Verifier started working after $((vs_submit + 1)) checks"
|
|
1735
|
+
break
|
|
1736
|
+
fi
|
|
1737
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1738
|
+
sleep 0.3
|
|
1739
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1740
|
+
(( vs_submit++ ))
|
|
1741
|
+
done
|
|
1742
|
+
|
|
1743
|
+
# Poll for verify-verdict.json
|
|
1744
|
+
log " Polling for verify-verdict.json..."
|
|
1745
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
1746
|
+
log_error "Verifier poll failed"
|
|
1747
|
+
update_status "verifier" "poll_failed"
|
|
1748
|
+
continue
|
|
1749
|
+
fi
|
|
1140
1750
|
fi
|
|
1141
1751
|
|
|
1142
1752
|
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
@@ -1149,12 +1759,28 @@ main() {
|
|
|
1149
1759
|
|
|
1150
1760
|
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
1151
1761
|
log " Verifier summary: \"$verdict_summary\""
|
|
1762
|
+
local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
1763
|
+
log_debug "[EXEC] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
|
|
1152
1764
|
|
|
1153
1765
|
case "$verdict" in
|
|
1154
1766
|
pass)
|
|
1155
1767
|
CONSECUTIVE_FAILURES=0
|
|
1156
|
-
|
|
1157
|
-
|
|
1768
|
+
CONSENSUS_ROUND=0
|
|
1769
|
+
|
|
1770
|
+
# --- Per-US tracking ---
|
|
1771
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
1772
|
+
# Add this US to verified list
|
|
1773
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
1774
|
+
VERIFIED_US="${VERIFIED_US},${signal_us_id}"
|
|
1775
|
+
else
|
|
1776
|
+
VERIFIED_US="$signal_us_id"
|
|
1777
|
+
fi
|
|
1778
|
+
log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
|
|
1779
|
+
log_debug "[EXEC] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
|
|
1780
|
+
update_status "verifier" "pass_us"
|
|
1781
|
+
# Worker will do next US on next iteration
|
|
1782
|
+
elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
|
|
1783
|
+
# Final full verify passed or complete recommended
|
|
1158
1784
|
write_complete_sentinel "$verdict_summary"
|
|
1159
1785
|
update_status "complete" "pass"
|
|
1160
1786
|
return 0
|
|
@@ -1185,9 +1811,11 @@ main() {
|
|
|
1185
1811
|
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
1186
1812
|
} | atomic_write "$fix_contract"
|
|
1187
1813
|
log " Fix contract: $fix_contract"
|
|
1814
|
+
log_debug "[EXEC] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
1188
1815
|
|
|
1189
1816
|
# Circuit breaker: consecutive failures
|
|
1190
1817
|
if (( CONSECUTIVE_FAILURES >= 3 )); then
|
|
1818
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"3 consecutive verification failures\""
|
|
1191
1819
|
log_error "Circuit breaker: 3 consecutive verification failures"
|
|
1192
1820
|
write_blocked_sentinel "3 consecutive verification failures"
|
|
1193
1821
|
update_status "blocked" "consecutive_failures"
|
|
@@ -1233,6 +1861,7 @@ main() {
|
|
|
1233
1861
|
|
|
1234
1862
|
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
1235
1863
|
if ! check_stale_context; then
|
|
1864
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
|
|
1236
1865
|
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
|
|
1237
1866
|
update_status "blocked" "stale_context"
|
|
1238
1867
|
return 1
|