@ai-dev-methodologies/rlp-desk 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1259 @@
1
+ #!/bin/zsh
2
+ set -uo pipefail
3
+ # NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
4
+ # because the main loop uses explicit error checks throughout.
5
+
6
+ # =============================================================================
7
+ # Ralph Desk Tmux Runner
8
+ #
9
+ # Implements the Leader loop from governance.md section 7 as a shell script.
10
+ # Uses omc-teams proven patterns: write-then-notify, pane IDs (%N),
11
+ # copy-mode guards, verification-based retry, heartbeat monitoring,
12
+ # idle pane nudging, exponential backoff restarts, atomic file writes.
13
+ #
14
+ # Usage:
15
+ # LOOP_NAME=<slug> ./run_ralph_desk.zsh
16
+ #
17
+ # Required env:
18
+ # LOOP_NAME - slug identifier for the campaign
19
+ #
20
+ # Optional env:
21
+ # ROOT - project root (default: $PWD)
22
+ # MAX_ITER - max iterations (default: 20)
23
+ # WORKER_MODEL - claude model for Worker (default: sonnet)
24
+ # VERIFIER_MODEL - claude model for Verifier (default: opus)
25
+ # POLL_INTERVAL - seconds between signal checks (default: 5)
26
+ # ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
27
+ # HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
28
+ # MAX_RESTARTS - max restart attempts per worker (default: 3)
29
+ # IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
30
+ # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
+ #
32
+ # Dependencies: tmux, claude CLI, jq
33
+ # =============================================================================
34
+
35
+ # --- Environment Variables ---
36
+ SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
37
+ ROOT="${ROOT:-$PWD}"
38
+ MAX_ITER="${MAX_ITER:-20}"
39
+ WORKER_MODEL="${WORKER_MODEL:-sonnet}"
40
+ VERIFIER_MODEL="${VERIFIER_MODEL:-opus}"
41
+ POLL_INTERVAL="${POLL_INTERVAL:-5}"
42
+ ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
43
+ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
44
+ MAX_RESTARTS="${MAX_RESTARTS:-3}"
45
+ IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
46
+ MAX_NUDGES="${MAX_NUDGES:-3}"
47
+
48
+ # --- Derived Paths ---
49
+ DESK="$ROOT/.claude/ralph-desk"
50
+ PROMPTS_DIR="$DESK/prompts"
51
+ CONTEXT_DIR="$DESK/context"
52
+ MEMOS_DIR="$DESK/memos"
53
+ LOGS_DIR="$DESK/logs/$SLUG"
54
+ WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
55
+ VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
56
+ CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
57
+ MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
58
+ SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
59
+ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
60
+ VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
61
+ COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
62
+ BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
63
+ STATUS_FILE="$LOGS_DIR/status.json"
64
+ SESSION_CONFIG="$LOGS_DIR/session-config.json"
65
+ WORKER_HEARTBEAT="$LOGS_DIR/worker-heartbeat.json"
66
+ VERIFIER_HEARTBEAT="$LOGS_DIR/verifier-heartbeat.json"
67
+
68
+ # --- Session Naming ---
69
+ TIMESTAMP=$(date +%Y%m%d-%H%M%S)
70
+ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
71
+
72
+ # --- State Tracking ---
73
+ typeset -A LAST_PANE_CONTENT
74
+ typeset -A PANE_IDLE_SINCE
75
+ typeset -A WORKER_RESTARTS
76
+ STALE_CONTEXT_COUNT=0
77
+ HEARTBEAT_STALE_COUNT=0
78
+ MONITOR_FAILURE_COUNT=0
79
+ CONSECUTIVE_FAILURES=0
80
+ PREV_CONTEXT_HASH=""
81
+ ITERATION=0
82
+ START_TIME=$(date +%s)
83
+
84
+ # =============================================================================
85
+ # Utility Functions
86
+ # =============================================================================
87
+
88
+ DEBUG="${DEBUG:-0}"
89
+ DEBUG_LOG="$ROOT/.claude/ralph-desk/logs/${LOOP_NAME:-unknown}/debug.log"
90
+
91
+ log() {
92
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
93
+ }
94
+
95
+ log_debug() {
96
+ if (( DEBUG )); then
97
+ mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
98
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
99
+ fi
100
+ }
101
+
102
+ log_error() {
103
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
104
+ }
105
+
106
+ # --- governance.md s7: Atomic file writes (omc-teams pattern) ---
107
+ # All file writes by the Leader use tmp+mv to prevent corruption.
108
+ atomic_write() {
109
+ local target="$1"
110
+ local tmp="${target}.tmp.$$"
111
+ cat > "$tmp"
112
+ mv "$tmp" "$target"
113
+ }
114
+
115
+ # =============================================================================
116
+ # Dependency Checks
117
+ # =============================================================================
118
+
119
+ # --- governance.md s7 step 1: Validate prerequisites before starting ---
120
+ check_dependencies() {
121
+ local missing=0
122
+
123
+ if ! command -v tmux >/dev/null 2>&1; then
124
+ log_error "tmux is required but not found. Install with: brew install tmux"
125
+ missing=1
126
+ fi
127
+
128
+ if ! command -v claude >/dev/null 2>&1; then
129
+ log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
130
+ missing=1
131
+ fi
132
+
133
+ if ! command -v jq >/dev/null 2>&1; then
134
+ log_error "jq is required but not found. Install with: brew install jq"
135
+ missing=1
136
+ fi
137
+
138
+ if (( missing )); then
139
+ exit 1
140
+ fi
141
+
142
+ # Resolve full path to claude binary for reliable launches
143
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
144
+ log " Claude binary: $CLAUDE_BIN"
145
+ }
146
+
147
+ # =============================================================================
148
+ # Scaffold Validation
149
+ # =============================================================================
150
+
151
+ validate_scaffold() {
152
+ local errors=0
153
+
154
+ if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
155
+ log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
156
+ errors=1
157
+ fi
158
+
159
+ if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
160
+ log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
161
+ errors=1
162
+ fi
163
+
164
+ if [[ ! -f "$CONTEXT_FILE" ]]; then
165
+ log_error "Context file not found: $CONTEXT_FILE"
166
+ errors=1
167
+ fi
168
+
169
+ if [[ ! -f "$MEMORY_FILE" ]]; then
170
+ log_error "Memory file not found: $MEMORY_FILE"
171
+ errors=1
172
+ fi
173
+
174
+ if (( errors )); then
175
+ log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
176
+ exit 1
177
+ fi
178
+
179
+ mkdir -p "$LOGS_DIR"
180
+ }
181
+
182
+ # =============================================================================
183
+ # Session Management (omc-teams pattern: pane IDs)
184
+ # =============================================================================
185
+
186
+ # --- governance.md s7 step 1: Check for existing sessions ---
187
+ check_existing_sessions() {
188
+ local current_session
189
+ current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
190
+ local existing
191
+ existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
192
+ if [[ -n "$existing" ]]; then
193
+ log_error "Existing tmux session(s) found for slug '$SLUG':"
194
+ echo "$existing" | while read -r s; do
195
+ echo " - $s"
196
+ done
197
+ echo ""
198
+ echo "Kill existing session first:"
199
+ echo " tmux kill-session -t <session-name>"
200
+ exit 1
201
+ fi
202
+ }
203
+
204
+ # --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
205
+ create_session() {
206
+ log "Creating tmux session: $SESSION_NAME"
207
+
208
+ # omc-teams split-pane pattern
209
+ if [[ -n "${TMUX:-}" ]]; then
210
+ # Inside tmux: split CURRENT pane in place
211
+ # Current pane stays as-is (leader/user stays here)
212
+ # Worker/Verifier appear on the RIGHT, user sees them immediately
213
+ LEADER_PANE=$(tmux display-message -p '#{pane_id}')
214
+ SESSION_NAME=$(tmux display-message -p '#{session_name}')
215
+ log " Splitting current pane in session: $SESSION_NAME"
216
+
217
+ # -h off current pane → right column (worker)
218
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
219
+ # -v off worker → stacked below on right (verifier)
220
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
221
+ else
222
+ # Outside tmux: wrap current terminal into a new tmux session and attach
223
+ # omc-teams pattern: user sees panes immediately, no separate attach needed
224
+ tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT"
225
+ LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
226
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
227
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
228
+
229
+ fi
230
+
231
+ log " Leader pane: $LEADER_PANE"
232
+ log " Worker pane: $WORKER_PANE"
233
+ log " Verifier pane: $VERIFIER_PANE"
234
+
235
+ # Write session config (atomic write)
236
+ echo '{
237
+ "session_name": "'"$SESSION_NAME"'",
238
+ "slug": "'"$SLUG"'",
239
+ "created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
240
+ "panes": {
241
+ "leader": "'"$LEADER_PANE"'",
242
+ "worker": "'"$WORKER_PANE"'",
243
+ "verifier": "'"$VERIFIER_PANE"'"
244
+ },
245
+ "pid": '$$',
246
+ "root": "'"$ROOT"'",
247
+ "models": {
248
+ "worker": "'"$WORKER_MODEL"'",
249
+ "verifier": "'"$VERIFIER_MODEL"'"
250
+ },
251
+ "config": {
252
+ "max_iter": '"$MAX_ITER"',
253
+ "poll_interval": '"$POLL_INTERVAL"',
254
+ "iter_timeout": '"$ITER_TIMEOUT"',
255
+ "heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
256
+ "max_restarts": '"$MAX_RESTARTS"',
257
+ "idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
258
+ "max_nudges": '"$MAX_NUDGES"'
259
+ }
260
+ }' | atomic_write "$SESSION_CONFIG"
261
+
262
+ log " Session config: $SESSION_CONFIG"
263
+ }
264
+
265
+ # =============================================================================
266
+ # Copy-Mode Guard (omc-teams pattern)
267
+ # =============================================================================
268
+
269
+ # --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
270
+ check_copy_mode() {
271
+ local pane_id="$1"
272
+ local in_mode
273
+ in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
274
+ if [[ "$in_mode" -eq 1 ]]; then
275
+ return 1 # pane is in copy mode, cannot send keys
276
+ fi
277
+ return 0
278
+ }
279
+
280
+ # =============================================================================
281
+ # Verification-Based Send Retry (omc-teams pattern)
282
+ # =============================================================================
283
+
284
+ # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
285
+ safe_send_keys() {
286
+ local pane_id="$1"
287
+ local text="$2"
288
+
289
+ # --- Exact omc-teams sendToWorker pattern (tmux-session.js:527-626) ---
290
+
291
+ # Guard: copy-mode captures keys; skip entirely
292
+ if ! check_copy_mode "$pane_id"; then
293
+ log_debug " Pane $pane_id in copy mode, skipping send"
294
+ return 1
295
+ fi
296
+
297
+ # Check for trust prompt and auto-dismiss
298
+ local initial_capture
299
+ initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
300
+ local pane_busy=0
301
+ if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
302
+ pane_busy=1
303
+ fi
304
+ if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
305
+ log_debug " Trust prompt detected, dismissing"
306
+ tmux send-keys -t "$pane_id" C-m
307
+ sleep 0.12
308
+ tmux send-keys -t "$pane_id" C-m
309
+ sleep 0.2
310
+ fi
311
+
312
+ # Send text in literal mode with -- separator
313
+ log_debug " Sending text to pane $pane_id (${#text} chars)"
314
+ tmux send-keys -t "$pane_id" -l -- "$text"
315
+
316
+ # Allow input buffer to settle (omc-teams: 150ms)
317
+ sleep 0.15
318
+
319
+ # Submit: up to 6 rounds of C-m double-press
320
+ local round=0
321
+ while (( round < 6 )); do
322
+ sleep 0.1
323
+ if (( round == 0 && pane_busy )); then
324
+ # Busy pane: Tab+C-m queue semantics (omc-teams pattern)
325
+ tmux send-keys -t "$pane_id" Tab
326
+ sleep 0.08
327
+ tmux send-keys -t "$pane_id" C-m
328
+ else
329
+ tmux send-keys -t "$pane_id" C-m
330
+ sleep 0.2
331
+ tmux send-keys -t "$pane_id" C-m
332
+ fi
333
+ sleep 0.14
334
+
335
+ # Check if text was consumed
336
+ local check_capture
337
+ check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
338
+ if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
339
+ log_debug " Text consumed after round $((round + 1))"
340
+ return 0
341
+ fi
342
+ sleep 0.14
343
+ (( round++ ))
344
+ done
345
+
346
+ # Safety gate: copy-mode check
347
+ if ! check_copy_mode "$pane_id"; then
348
+ log_debug " Copy mode activated during send, aborting"
349
+ return 1
350
+ fi
351
+
352
+ # Adaptive fallback: C-u clear line, resend (omc-teams pattern)
353
+ log_debug " Adaptive retry — clearing line and resending"
354
+ tmux send-keys -t "$pane_id" C-u
355
+ sleep 0.08
356
+ if ! check_copy_mode "$pane_id"; then
357
+ return 1
358
+ fi
359
+ tmux send-keys -t "$pane_id" -l -- "$text"
360
+ sleep 0.12
361
+ local retry_round=0
362
+ while (( retry_round < 4 )); do
363
+ tmux send-keys -t "$pane_id" C-m
364
+ sleep 0.18
365
+ tmux send-keys -t "$pane_id" C-m
366
+ sleep 0.14
367
+ local retry_capture
368
+ retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
369
+ if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
370
+ log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
371
+ return 0
372
+ fi
373
+ (( retry_round++ ))
374
+ done
375
+
376
+ # Fail-open: one last nudge
377
+ if ! check_copy_mode "$pane_id"; then
378
+ return 1
379
+ fi
380
+ tmux send-keys -t "$pane_id" C-m
381
+ sleep 0.12
382
+ tmux send-keys -t "$pane_id" C-m
383
+ log_debug " Fail-open — text may or may not have been submitted"
384
+ return 0
385
+ }
386
+
387
+ # =============================================================================
388
+ # Wait for Pane Ready (omc-teams pattern: paneLooksReady)
389
+ # =============================================================================
390
+
391
+ wait_for_pane_ready() {
392
+ local pane_id="$1"
393
+ local timeout="${2:-10}" # omc-teams default: 10s
394
+ local start=$(date +%s)
395
+ log " Waiting for pane $pane_id ready..."
396
+ while (( $(date +%s) - start < timeout )); do
397
+ local captured
398
+ captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
399
+
400
+ # Auto-dismiss trust prompt (omc-teams pattern: paneHasTrustPrompt)
401
+ if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
402
+ log " Trust prompt detected, auto-dismissing..."
403
+ tmux send-keys -t "$pane_id" Enter
404
+ sleep 0.12
405
+ tmux send-keys -t "$pane_id" Enter
406
+ sleep 2
407
+ continue
408
+ fi
409
+
410
+ # omc-teams paneLooksReady: check each line for prompt char at line start
411
+ local ready=0
412
+ echo "$captured" | while IFS= read -r line; do
413
+ local trimmed="${line## }"
414
+ if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
415
+ ready=1
416
+ break
417
+ fi
418
+ done 2>/dev/null
419
+
420
+ # Also check via grep as fallback
421
+ if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
422
+ ready=1
423
+ fi
424
+
425
+ if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
426
+ # Check no active task running
427
+ if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
428
+ log " Pane $pane_id is ready."
429
+ return 0
430
+ fi
431
+ fi
432
+ sleep 0.25
433
+ done
434
+ # Timeout — return success anyway (fail-open, let safe_send_keys handle it)
435
+ log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
436
+ return 0
437
+ }
438
+
439
+ # =============================================================================
440
+ # Heartbeat Monitoring (omc-teams pattern)
441
+ # =============================================================================
442
+
443
+ # --- governance.md s7 step 5+6: Check heartbeat freshness ---
444
+ check_heartbeat() {
445
+ local hb_file="$1"
446
+ local threshold="$HEARTBEAT_STALE_THRESHOLD"
447
+
448
+ if [[ ! -f "$hb_file" ]]; then
449
+ return 1
450
+ fi
451
+
452
+ local hb_epoch now_epoch
453
+ # Read epoch seconds directly (avoids timezone parsing bugs)
454
+ hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
455
+
456
+ if [[ -z "$hb_epoch" ]]; then
457
+ return 1
458
+ fi
459
+
460
+ now_epoch=$(date +%s)
461
+ (( now_epoch - hb_epoch < threshold ))
462
+ }
463
+
464
+ # Check if heartbeat indicates process has exited
465
+ check_heartbeat_exited() {
466
+ local hb_file="$1"
467
+ if [[ ! -f "$hb_file" ]]; then
468
+ return 1
469
+ fi
470
+ local hb_status
471
+ hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
472
+ [[ "$hb_status" == "exited" ]]
473
+ }
474
+
475
+ # =============================================================================
476
+ # Idle Pane Nudging (omc-teams pattern)
477
+ # =============================================================================
478
+
479
+ # --- governance.md s7 step 5+6: Nudge idle panes ---
480
+ check_and_nudge_idle_pane() {
481
+ local pane_id="$1"
482
+ local nudge_count_var="$2"
483
+ local current_content
484
+ current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
485
+
486
+ if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
487
+ local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
488
+ local now
489
+ now=$(date +%s)
490
+ if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
491
+ local count=${(P)nudge_count_var}
492
+ if (( count < MAX_NUDGES )); then
493
+ log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
494
+ safe_send_keys "$pane_id" ""
495
+ (( count++ ))
496
+ eval "$nudge_count_var=$count"
497
+ fi
498
+ fi
499
+ else
500
+ LAST_PANE_CONTENT[$pane_id]="$current_content"
501
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
502
+ fi
503
+ }
504
+
505
+ # =============================================================================
506
+ # Exponential Backoff Restart (omc-teams pattern)
507
+ # =============================================================================
508
+
509
+ # --- governance.md s7 step 5: Restart dead workers with backoff ---
510
+ restart_worker() {
511
+ local pane_id="$1"
512
+ local iter="$2"
513
+ local trigger_file="$3"
514
+ local restart_count="${WORKER_RESTARTS[$iter]:-0}"
515
+
516
+ if (( restart_count >= MAX_RESTARTS )); then
517
+ log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
518
+ return 1 # caller writes BLOCKED
519
+ fi
520
+
521
+ # Exponential backoff: 5s, 10s, 20s, 60s (cap)
522
+ local -a delays=(5 10 20 60)
523
+ local delay=${delays[$((restart_count + 1))]:-60}
524
+ log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
525
+ sleep "$delay"
526
+
527
+ # Kill existing claude, wait for shell prompt
528
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
529
+ tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
530
+ sleep 2
531
+
532
+ # Re-launch claude (omc-teams interactive pattern)
533
+ safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
534
+ WORKER_RESTARTS[$iter]=$((restart_count + 1))
535
+ return 0
536
+ }
537
+
538
+ # =============================================================================
539
+ # Write-Then-Notify: Trigger Script Generation (omc-teams CRITICAL pattern)
540
+ # =============================================================================
541
+
542
+ # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
543
+ # NEVER send prompt content through tmux send-keys.
544
+ # Write payloads to files, send only short trigger commands (<200 chars).
545
+ write_worker_trigger() {
546
+ local iter="$1"
547
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
548
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
549
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
550
+
551
+ # Build the worker prompt: base prompt + iteration context
552
+ local contract
553
+ contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
554
+
555
+ # Check for fix contract from previous verifier failure
556
+ local prev_iter=$((iter - 1))
557
+ local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
558
+
559
+ {
560
+ cat "$WORKER_PROMPT_BASE"
561
+ echo ""
562
+ echo "---"
563
+ echo "## Iteration Context"
564
+ echo "- **Iteration**: $iter"
565
+ echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
566
+ echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
567
+
568
+ # Include fix contract if previous verifier failed
569
+ if [[ -f "$fix_contract_file" ]]; then
570
+ echo ""
571
+ echo "---"
572
+ echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
573
+ echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
574
+ echo "Do NOT just resubmit — actually change the code to address each issue."
575
+ echo ""
576
+ cat "$fix_contract_file"
577
+ fi
578
+ } | atomic_write "$prompt_file"
579
+
580
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
581
+ {
582
+ cat <<TRIGGER_EOF
583
+ #!/bin/zsh
584
+ # Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
585
+ # DO NOT use exec here -- it breaks heartbeat cleanup
586
+
587
+ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
588
+
589
+ # Background heartbeat writer (omc-teams pattern)
590
+ (
591
+ while true; do
592
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
593
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
594
+ sleep 15
595
+ done
596
+ ) &
597
+ HEARTBEAT_PID=\$!
598
+
599
+ # Run claude with fresh context (governance.md s7 step 5)
600
+ claude -p "\$(cat $prompt_file)" \\
601
+ --model $WORKER_MODEL \\
602
+ --dangerously-skip-permissions \\
603
+ --output-format text \\
604
+ 2>&1 | tee $output_log
605
+
606
+ # Cleanup heartbeat writer
607
+ kill \$HEARTBEAT_PID 2>/dev/null
608
+ wait \$HEARTBEAT_PID 2>/dev/null
609
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
610
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
611
+ TRIGGER_EOF
612
+ } | atomic_write "$trigger_file"
613
+ chmod +x "$trigger_file"
614
+
615
+ log " Worker prompt: $prompt_file"
616
+ log " Worker trigger: $trigger_file"
617
+ }
618
+
619
+ write_verifier_trigger() {
620
+ local iter="$1"
621
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
622
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-trigger.sh"
623
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-output.log"
624
+
625
+ # Build verifier prompt from base
626
+ {
627
+ cat "$VERIFIER_PROMPT_BASE"
628
+ echo ""
629
+ echo "---"
630
+ echo "## Verification Context"
631
+ echo "- **Iteration**: $iter"
632
+ echo "- **Done Claim**: $DONE_CLAIM_FILE"
633
+ } | atomic_write "$prompt_file"
634
+
635
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
636
+ {
637
+ cat <<TRIGGER_EOF
638
+ #!/bin/zsh
639
+ # Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
640
+ # DO NOT use exec here -- it breaks heartbeat cleanup
641
+
642
+ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
643
+
644
+ # Background heartbeat writer (omc-teams pattern)
645
+ (
646
+ while true; do
647
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
648
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
649
+ sleep 15
650
+ done
651
+ ) &
652
+ HEARTBEAT_PID=\$!
653
+
654
+ # Run claude with fresh context (governance.md s7 step 7)
655
+ claude -p "\$(cat $prompt_file)" \\
656
+ --model $VERIFIER_MODEL \\
657
+ --dangerously-skip-permissions \\
658
+ --output-format text \\
659
+ 2>&1 | tee $output_log
660
+
661
+ # Cleanup heartbeat writer
662
+ kill \$HEARTBEAT_PID 2>/dev/null
663
+ wait \$HEARTBEAT_PID 2>/dev/null
664
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
665
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
666
+ TRIGGER_EOF
667
+ } | atomic_write "$trigger_file"
668
+ chmod +x "$trigger_file"
669
+
670
+ log " Verifier prompt: $prompt_file"
671
+ log " Verifier trigger: $trigger_file"
672
+ }
673
+
674
+ # =============================================================================
675
+ # Status Updates
676
+ # =============================================================================
677
+
678
+ # --- governance.md s7 step 8: Update status.json ---
679
+ update_status() {
680
+ local phase="$1"
681
+ local last_result="$2"
682
+
683
+ echo '{
684
+ "slug": "'"$SLUG"'",
685
+ "iteration": '"$ITERATION"',
686
+ "max_iter": '"$MAX_ITER"',
687
+ "phase": "'"$phase"'",
688
+ "worker_model": "'"$WORKER_MODEL"'",
689
+ "verifier_model": "'"$VERIFIER_MODEL"'",
690
+ "last_result": "'"$last_result"'",
691
+ "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
692
+ "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
693
+ }' | atomic_write "$STATUS_FILE"
694
+ }
695
+
696
+ # --- governance.md s7 step 8: Write result log ---
697
+ write_result_log() {
698
+ local iter="$1"
699
+ local result="$2"
700
+ local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
701
+
702
+ local git_diff=""
703
+ git_diff=$(git diff --stat HEAD~1 HEAD 2>/dev/null || echo "(no git diff available)")
704
+
705
+ {
706
+ echo "# Iteration $iter Result"
707
+ echo ""
708
+ echo "## Status"
709
+ echo "$result [leader-measured]"
710
+ echo ""
711
+ echo "## Files Changed"
712
+ echo '```'
713
+ echo "$git_diff"
714
+ echo '```'
715
+ echo "[git-measured]"
716
+ echo ""
717
+ echo "## Timestamp"
718
+ echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
719
+ } | atomic_write "$result_file"
720
+ }
721
+
722
+ # =============================================================================
723
+ # Sentinel Writers
724
+ # =============================================================================
725
+
726
+ # --- governance.md s7: Only the Leader writes sentinels ---
727
+ write_complete_sentinel() {
728
+ local summary="$1"
729
+ echo "# Campaign Complete
730
+
731
+ Completed at iteration $ITERATION.
732
+ $summary
733
+
734
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
735
+ log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
736
+ }
737
+
738
+ write_blocked_sentinel() {
739
+ local reason="$1"
740
+ echo "# Campaign Blocked
741
+
742
+ Blocked at iteration $ITERATION.
743
+ Reason: $reason
744
+
745
+ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
746
+ log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
747
+ }
748
+
749
+ # =============================================================================
750
+ # Cleanup (trap handler)
751
+ # =============================================================================
752
+
753
+ cleanup() {
754
+ log "Cleaning up..."
755
+
756
+ # Kill claude processes then kill panes
757
+ log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
758
+ if [[ -n "${WORKER_PANE:-}" ]]; then
759
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
760
+ tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
761
+ fi
762
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
763
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
764
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
765
+ fi
766
+ sleep 2
767
+ # Kill the panes themselves
768
+ log_debug "cleanup: killing panes $WORKER_PANE $VERIFIER_PANE"
769
+ tmux kill-pane -t "$WORKER_PANE" 2>&1 | while read -r line; do log_debug "kill worker: $line"; done
770
+ tmux kill-pane -t "$VERIFIER_PANE" 2>&1 | while read -r line; do log_debug "kill verifier: $line"; done
771
+
772
+ # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
773
+ setopt local_options nonomatch 2>/dev/null
774
+ rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
775
+
776
+ # Print summary
777
+ local end_time
778
+ end_time=$(date +%s)
779
+ local elapsed=$(( end_time - START_TIME ))
780
+ local minutes=$(( elapsed / 60 ))
781
+ local seconds=$(( elapsed % 60 ))
782
+
783
+ echo ""
784
+ echo "============================================================"
785
+ echo " Ralph Desk Tmux Runner - Session Complete"
786
+ echo "============================================================"
787
+ echo " Session: $SESSION_NAME"
788
+ echo " Slug: $SLUG"
789
+ echo " Iterations: $ITERATION / $MAX_ITER"
790
+ echo " Elapsed: ${minutes}m ${seconds}s"
791
+ echo ""
792
+
793
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
794
+ echo " Final State: COMPLETE"
795
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then
796
+ echo " Final State: BLOCKED"
797
+ else
798
+ echo " Final State: STOPPED (interrupted or timeout)"
799
+ fi
800
+
801
+ echo ""
802
+ echo " Tmux session left alive for inspection:"
803
+ echo " tmux attach -t $SESSION_NAME"
804
+ echo " tmux kill-session -t $SESSION_NAME"
805
+ echo "============================================================"
806
+ }
807
+
808
+ # =============================================================================
809
+ # Poll Loop (used for both Worker and Verifier)
810
+ # =============================================================================
811
+
812
+ # --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
813
+ poll_for_signal() {
814
+ local signal_file="$1"
815
+ local heartbeat_file="$2"
816
+ local pane_id="$3"
817
+ local trigger_file="$4"
818
+ local role="$5" # "worker" or "verifier"
819
+ local nudge_count=0
820
+ local poll_start
821
+ poll_start=$(date +%s)
822
+
823
+ # Initialize idle tracking for this pane
824
+ LAST_PANE_CONTENT[$pane_id]=""
825
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
826
+
827
+ while true; do
828
+ local now
829
+ now=$(date +%s)
830
+ local elapsed=$(( now - poll_start ))
831
+
832
+ # Per-iteration timeout check
833
+ if (( elapsed >= ITER_TIMEOUT )); then
834
+ log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
835
+ return 1 # timeout
836
+ fi
837
+
838
+ # Check if signal file appeared
839
+ if [[ -f "$signal_file" ]]; then
840
+ log " Signal file detected: $signal_file"
841
+ return 0 # success
842
+ fi
843
+
844
+ # Check heartbeat freshness (omc-teams pattern)
845
+ if [[ -f "$heartbeat_file" ]]; then
846
+ if check_heartbeat_exited "$heartbeat_file"; then
847
+ # Process exited but no signal file -- give a brief grace period
848
+ sleep 3
849
+ if [[ -f "$signal_file" ]]; then
850
+ log " Signal file detected after process exit: $signal_file"
851
+ return 0
852
+ fi
853
+ log_error "$role exited without writing signal file"
854
+ # Attempt restart with exponential backoff
855
+ if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
856
+ # Reset poll timer for the restart
857
+ poll_start=$(date +%s)
858
+ nudge_count=0
859
+ LAST_PANE_CONTENT[$pane_id]=""
860
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
861
+ sleep "$POLL_INTERVAL"
862
+ continue
863
+ else
864
+ return 1 # max restarts exceeded
865
+ fi
866
+ fi
867
+
868
+ if ! check_heartbeat "$heartbeat_file"; then
869
+ log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
870
+ (( HEARTBEAT_STALE_COUNT++ ))
871
+ # Circuit breaker: 3 consecutive heartbeat stale events
872
+ if (( HEARTBEAT_STALE_COUNT >= 3 )); then
873
+ log_error "Circuit breaker: 3 consecutive heartbeat stale events"
874
+ return 1
875
+ fi
876
+ # Attempt restart
877
+ if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
878
+ poll_start=$(date +%s)
879
+ nudge_count=0
880
+ continue
881
+ else
882
+ return 1
883
+ fi
884
+ else
885
+ # Heartbeat is fresh, reset stale counter
886
+ HEARTBEAT_STALE_COUNT=0
887
+ fi
888
+ fi
889
+
890
+ # Idle pane nudging (omc-teams pattern)
891
+ check_and_nudge_idle_pane "$pane_id" "nudge_count"
892
+
893
+ sleep "$POLL_INTERVAL"
894
+ done
895
+ }
896
+
897
+ # =============================================================================
898
+ # Circuit Breaker: Stale Context Detection
899
+ # =============================================================================
900
+
901
+ # --- governance.md s7 step 8: Stale context detection ---
902
+ compute_context_hash() {
903
+ if [[ -f "$CONTEXT_FILE" ]]; then
904
+ md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
905
+ else
906
+ echo "no-context"
907
+ fi
908
+ }
909
+
910
+ check_stale_context() {
911
+ local current_hash
912
+ current_hash=$(compute_context_hash)
913
+
914
+ if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
915
+ (( STALE_CONTEXT_COUNT++ ))
916
+ log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
917
+ if (( STALE_CONTEXT_COUNT >= 3 )); then
918
+ log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
919
+ return 1
920
+ fi
921
+ else
922
+ STALE_CONTEXT_COUNT=0
923
+ fi
924
+
925
+ PREV_CONTEXT_HASH="$current_hash"
926
+ return 0
927
+ }
928
+
929
+ # =============================================================================
930
+ # Security Warning
931
+ # =============================================================================
932
+
933
+ print_security_warning() {
934
+ echo ""
935
+ echo "================================================================"
936
+ echo " WARNING: Running with --dangerously-skip-permissions"
937
+ echo ""
938
+ echo " The claude CLI will execute tools (file writes, shell commands)"
939
+ echo " without asking for confirmation. Only run this on code you"
940
+ echo " trust in an environment you control."
941
+ echo "================================================================"
942
+ echo ""
943
+ }
944
+
945
+ # =============================================================================
946
+ # Main Leader Loop
947
+ # =============================================================================
948
+
949
+ main() {
950
+ # --- Startup ---
951
+ log "Ralph Desk Tmux Runner starting..."
952
+ log " Slug: $SLUG"
953
+ log " Root: $ROOT"
954
+ log " Max iterations: $MAX_ITER"
955
+ log " Worker model: $WORKER_MODEL"
956
+ log " Verifier model: $VERIFIER_MODEL"
957
+ log " Poll interval: ${POLL_INTERVAL}s"
958
+ log " Iter timeout: ${ITER_TIMEOUT}s"
959
+
960
+ # Dependency checks
961
+ check_dependencies
962
+
963
+ # Print security warning (governance.md s7: --dangerously-skip-permissions)
964
+ print_security_warning
965
+
966
+ # Validate scaffold
967
+ validate_scaffold
968
+
969
+ # Check for existing sessions
970
+ check_existing_sessions
971
+
972
+ # Create tmux session with pane IDs (governance.md s7 step 1)
973
+ create_session
974
+
975
+ # Set trap for cleanup on exit/error
976
+ trap cleanup EXIT
977
+
978
+ # Initialize context hash for stale detection
979
+ PREV_CONTEXT_HASH=$(compute_context_hash)
980
+
981
+ # --- governance.md s7: Leader Loop ---
982
+ for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
983
+ log ""
984
+ log "========== Iteration $ITERATION / $MAX_ITER =========="
985
+
986
+ # --- governance.md s7 step 1: Check sentinels ---
987
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
988
+ log "COMPLETE sentinel found. Campaign succeeded."
989
+ update_status "complete" "complete"
990
+ return 0
991
+ fi
992
+ if [[ -f "$BLOCKED_SENTINEL" ]]; then
993
+ log "BLOCKED sentinel found. Campaign blocked."
994
+ update_status "blocked" "blocked"
995
+ return 1
996
+ fi
997
+
998
+ # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
999
+ rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
1000
+ rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
1001
+
1002
+ # --- Clean previous claude session in panes (one-shot lifecycle) ---
1003
+ # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
1004
+ if (( ITERATION > 1 )); then
1005
+ # Send C-c first (in case claude is mid-task), then /exit
1006
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1007
+ sleep 1
1008
+ tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
1009
+ sleep 2
1010
+ # Wait for shell prompt before proceeding
1011
+ wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
1012
+ fi
1013
+
1014
+ # Reset per-iteration state
1015
+ local worker_nudge_count=0
1016
+ local verifier_nudge_count=0
1017
+
1018
+ # --- governance.md s7 step 4: Build worker prompt + trigger ---
1019
+ write_worker_trigger "$ITERATION"
1020
+ local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
1021
+
1022
+ update_status "worker" "running"
1023
+
1024
+ # --- governance.md s7 step 5: Execute Worker (interactive claude, omc-teams pattern) ---
1025
+ # Step 5a: Launch interactive claude in Worker pane
1026
+ local worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1027
+ log " Launching Worker claude in pane $WORKER_PANE..."
1028
+ tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
1029
+ tmux send-keys -t "$WORKER_PANE" Enter
1030
+
1031
+ # Step 5b: Wait for claude TUI to be ready (omc-teams pattern)
1032
+ if ! wait_for_pane_ready "$WORKER_PANE" 30; then
1033
+ log_error "Worker claude failed to start"
1034
+ write_blocked_sentinel "Worker claude failed to start in pane"
1035
+ update_status "blocked" "worker_start_failed"
1036
+ return 1
1037
+ fi
1038
+
1039
+ # Step 5c: Wait for claude to fully initialize, then send instruction
1040
+ sleep 3
1041
+ local worker_instruction="Read and execute the instructions in $worker_prompt"
1042
+ if ! safe_send_keys "$WORKER_PANE" "$worker_instruction"; then
1043
+ log_error "Failed to send instruction to Worker"
1044
+ fi
1045
+ # Extra C-m to ensure submission (long text may false-positive the consumed check)
1046
+ sleep 0.5
1047
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1048
+ sleep 0.3
1049
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1050
+
1051
+ # --- governance.md s7 step 5+6: Poll for Worker completion ---
1052
+ log " Polling for iter-signal.json..."
1053
+ if ! poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1054
+ # Monitor failure or timeout
1055
+ (( MONITOR_FAILURE_COUNT++ ))
1056
+ if (( MONITOR_FAILURE_COUNT >= 3 )); then
1057
+ write_blocked_sentinel "3 consecutive monitor failures"
1058
+ update_status "blocked" "monitor_failures"
1059
+ return 1
1060
+ fi
1061
+ log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1062
+ update_status "worker" "poll_failed"
1063
+ continue
1064
+ fi
1065
+
1066
+ # Reset monitor failure count on success
1067
+ MONITOR_FAILURE_COUNT=0
1068
+
1069
+ # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
1070
+ local signal_status
1071
+ signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
1072
+ local signal_summary
1073
+ signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
1074
+
1075
+ log " Worker signal: status=$signal_status summary=\"$signal_summary\""
1076
+
1077
+ case "$signal_status" in
1078
+ continue)
1079
+ # --- governance.md s7 step 6: continue -> go to step 8 ---
1080
+ log " Worker requests continue. Moving to next iteration."
1081
+ update_status "worker" "continue"
1082
+ ;;
1083
+ verify)
1084
+ # --- governance.md s7 step 7: Execute Verifier ---
1085
+ log " Worker claims done. Dispatching Verifier..."
1086
+
1087
+ write_verifier_trigger "$ITERATION"
1088
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1089
+
1090
+ update_status "verifier" "running"
1091
+
1092
+ # Step 7a: Clean previous Verifier session if claude is running
1093
+ local verifier_cmd
1094
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1095
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" ]]; then
1096
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1097
+ sleep 0.5
1098
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1099
+ sleep 2
1100
+ wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1101
+ fi
1102
+
1103
+ local verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1104
+ log " Launching Verifier claude in pane $VERIFIER_PANE..."
1105
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1106
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1107
+
1108
+ # Step 7b: Wait for claude TUI to be ready
1109
+ if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1110
+ log_error "Verifier claude failed to start"
1111
+ update_status "verifier" "start_failed"
1112
+ continue
1113
+ fi
1114
+
1115
+ # Step 7c: Wait for claude to fully initialize, then send instruction
1116
+ sleep 3
1117
+ local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1118
+ safe_send_keys "$VERIFIER_PANE" "$verifier_instruction"
1119
+ # Extra C-m to ensure submission
1120
+ sleep 0.5
1121
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1122
+ sleep 0.3
1123
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1124
+
1125
+ # Poll for verify-verdict.json
1126
+ log " Polling for verify-verdict.json..."
1127
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
1128
+ log_error "Verifier poll failed"
1129
+ update_status "verifier" "poll_failed"
1130
+ continue
1131
+ fi
1132
+
1133
+ # --- governance.md s7 step 7: Read verdict via jq ---
1134
+ local verdict
1135
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
1136
+ local recommended
1137
+ recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
1138
+ local verdict_summary
1139
+ verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
1140
+
1141
+ log " Verifier: verdict=$verdict recommended=$recommended"
1142
+ log " Verifier summary: \"$verdict_summary\""
1143
+
1144
+ case "$verdict" in
1145
+ pass)
1146
+ CONSECUTIVE_FAILURES=0
1147
+ if [[ "$recommended" == "complete" ]]; then
1148
+ # Write COMPLETE sentinel (only Leader writes sentinels)
1149
+ write_complete_sentinel "$verdict_summary"
1150
+ update_status "complete" "pass"
1151
+ return 0
1152
+ else
1153
+ log " Verifier passed but did not recommend complete. Continuing."
1154
+ update_status "verifier" "pass_continue"
1155
+ fi
1156
+ ;;
1157
+ fail)
1158
+ # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
1159
+ (( CONSECUTIVE_FAILURES++ ))
1160
+ local verdict_summary_fail
1161
+ verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
1162
+ log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
1163
+
1164
+ # Extract issues from verdict for next Worker's fix contract
1165
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
1166
+ {
1167
+ echo "# Fix Contract (from Verifier iteration $ITERATION)"
1168
+ echo ""
1169
+ echo "## Summary"
1170
+ echo "$verdict_summary_fail"
1171
+ echo ""
1172
+ echo "## Issues (from verify-verdict.json)"
1173
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
1174
+ echo ""
1175
+ echo "## Next Iteration Contract"
1176
+ jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
1177
+ } | atomic_write "$fix_contract"
1178
+ log " Fix contract: $fix_contract"
1179
+
1180
+ # Circuit breaker: consecutive failures
1181
+ if (( CONSECUTIVE_FAILURES >= 3 )); then
1182
+ log_error "Circuit breaker: 3 consecutive verification failures"
1183
+ write_blocked_sentinel "3 consecutive verification failures"
1184
+ update_status "blocked" "consecutive_failures"
1185
+ return 1
1186
+ fi
1187
+
1188
+ update_status "verifier" "fail"
1189
+ ;;
1190
+ request_info)
1191
+ # --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
1192
+ local verdict_summary_ri
1193
+ verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
1194
+ log " Verifier requests info (degraded in tmux lean mode)."
1195
+ log " Questions: \"$verdict_summary_ri\""
1196
+ log " Treating as soft fail — Worker will see verdict in next iteration."
1197
+ update_status "verifier" "request_info"
1198
+ ;;
1199
+ blocked)
1200
+ write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary"
1201
+ update_status "blocked" "verifier_blocked"
1202
+ return 1
1203
+ ;;
1204
+ *)
1205
+ log_error "Unknown verdict: $verdict"
1206
+ update_status "verifier" "unknown_verdict"
1207
+ ;;
1208
+ esac
1209
+ ;;
1210
+ blocked)
1211
+ # --- governance.md s7 step 6: blocked -> write sentinel ---
1212
+ write_blocked_sentinel "Worker reported blocked: $signal_summary"
1213
+ update_status "blocked" "worker_blocked"
1214
+ return 1
1215
+ ;;
1216
+ *)
1217
+ log_error "Unknown signal status: $signal_status"
1218
+ update_status "worker" "unknown_status"
1219
+ ;;
1220
+ esac
1221
+
1222
+ # --- governance.md s7 step 8: Write result log ---
1223
+ write_result_log "$ITERATION" "$signal_status"
1224
+
1225
+ # --- governance.md s7 step 8: Circuit breaker - stale context check ---
1226
+ if ! check_stale_context; then
1227
+ write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
1228
+ update_status "blocked" "stale_context"
1229
+ return 1
1230
+ fi
1231
+
1232
+ # --- governance.md s7 step 8: Update status ---
1233
+ update_status "idle" "${signal_status:-unknown}"
1234
+ done
1235
+
1236
+ # Max iterations reached
1237
+ log "Max iterations ($MAX_ITER) reached."
1238
+ update_status "timeout" "max_iter"
1239
+ return 1
1240
+ }
1241
+
1242
+ # =============================================================================
1243
+ # Entry Point
1244
+ # =============================================================================
1245
+
1246
+ # Require tmux — tmux mode only works inside an active tmux session
1247
+ if [[ -z "${TMUX:-}" ]]; then
1248
+ echo "ERROR: tmux mode requires running inside a tmux session."
1249
+ echo ""
1250
+ echo " Start tmux first, then retry:"
1251
+ echo " tmux"
1252
+ echo " LOOP_NAME=$SLUG $0"
1253
+ echo ""
1254
+ echo " Or use Agent() mode instead (no tmux needed):"
1255
+ echo " /rlp-desk run $SLUG"
1256
+ exit 1
1257
+ fi
1258
+
1259
+ main "$@"