@ai-dev-methodologies/rlp-desk 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2750 +0,0 @@
1
- #!/bin/zsh
2
- set -uo pipefail
3
- # NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
4
- # because the main loop uses explicit error checks throughout.
5
-
6
- # =============================================================================
7
- # Ralph Desk Tmux Runner
8
- #
9
- # Implements the Leader loop from governance.md section 7 as a shell script.
10
- # Uses tmux proven patterns: write-then-notify, pane IDs (%N),
11
- # copy-mode guards, verification-based retry, heartbeat monitoring,
12
- # idle pane nudging, exponential backoff restarts, atomic file writes.
13
- #
14
- # Usage:
15
- # LOOP_NAME=<slug> ./run_ralph_desk.zsh
16
- #
17
- # Required env:
18
- # LOOP_NAME - slug identifier for the campaign
19
- #
20
- # Optional env:
21
- # ROOT - project root (default: $PWD)
22
- # MAX_ITER - max iterations (default: 20)
23
- # WORKER_MODEL - claude model for Worker (default: sonnet)
24
- # VERIFIER_MODEL - claude model for Verifier (default: opus)
25
- # POLL_INTERVAL - seconds between signal checks (default: 5)
26
- # ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
27
- # HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
28
- # MAX_RESTARTS - max restart attempts per worker (default: 3)
29
- # IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
30
- # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
- #
32
- # Per-role codex config:
33
- # WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.4)
34
- # WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
35
- # VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.4)
36
- # VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
37
- #
38
- # Consensus scope:
39
- # CONSENSUS_SCOPE - when consensus applies (default: all)
40
- # all=every verify, final-only=final ALL only
41
- #
42
- # Dependencies: tmux, claude CLI, jq
43
- # Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
44
- # =============================================================================
45
-
46
- # --- Environment Variables ---
47
- SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
48
- ROOT="${ROOT:-$PWD}"
49
- MAX_ITER="${MAX_ITER:-20}"
50
- WORKER_MODEL="${WORKER_MODEL:-haiku}"
51
- VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
52
- FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
53
- POLL_INTERVAL="${POLL_INTERVAL:-5}"
54
- ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
55
- HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
56
- MAX_RESTARTS="${MAX_RESTARTS:-3}"
57
- IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
58
- MAX_NUDGES="${MAX_NUDGES:-3}"
59
- WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
60
- AUTONOMOUS_MODE="${AUTONOMOUS_MODE:-0}" # 1=don't stop on ambiguity, PRD is authoritative
61
-
62
- # --- Engine Selection (auto-detect from model format) ---
63
- # claude models (haiku/sonnet/opus) with :effort → claude engine + effort
64
- # codex models (gpt-*/spark) with :reasoning → codex engine + reasoning
65
- # plain name → claude engine (no effort/reasoning)
66
- _auto_detect_engine() {
67
- local model_var="$1" engine_var="$2" codex_model_var="$3" codex_reasoning_var="$4" effort_var="${5:-}"
68
- local model_val="${(P)model_var}"
69
- if [[ "$model_val" == *:* ]]; then
70
- local model_part="${model_val%%:*}"
71
- local level_part="${model_val##*:}"
72
- case "$model_part" in
73
- haiku|sonnet|opus)
74
- # Claude model with effort — keep engine as claude, store effort
75
- eval "$engine_var=claude"
76
- eval "$model_var=$model_part"
77
- [[ -n "$effort_var" ]] && eval "$effort_var=$level_part"
78
- ;;
79
- *)
80
- # Codex model with reasoning
81
- [[ "$model_part" == "spark" ]] && model_part="gpt-5.3-codex-spark"
82
- eval "$engine_var=codex"
83
- eval "$model_var=$model_part"
84
- [[ -n "$codex_model_var" ]] && eval "$codex_model_var=$model_part"
85
- [[ -n "$codex_reasoning_var" ]] && eval "$codex_reasoning_var=$level_part"
86
- ;;
87
- esac
88
- fi
89
- }
90
-
91
- WORKER_ENGINE="${WORKER_ENGINE:-claude}"
92
- VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}"
93
- FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
94
-
95
- # Effort levels for Claude models (set by _auto_detect_engine or CLI --worker-model opus:max)
96
- WORKER_EFFORT="${WORKER_EFFORT:-}"
97
- VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
98
- FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
99
-
100
- # Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
101
- _auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
102
- _auto_detect_engine VERIFIER_MODEL VERIFIER_ENGINE VERIFIER_CODEX_MODEL VERIFIER_CODEX_REASONING VERIFIER_EFFORT
103
- _auto_detect_engine FINAL_VERIFIER_MODEL FINAL_VERIFIER_ENGINE "" "" FINAL_VERIFIER_EFFORT
104
- WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
105
- WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
106
- VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
107
- VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
108
- CODEX_BIN="" # resolved by check_dependencies when engine=codex
109
-
110
- # --- Verify Mode ---
111
- VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
112
- # Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
113
- CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
114
- CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.4:medium}" # per-US cross-verifier (lighter)
115
- FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.4:high}" # final cross-verifier (stricter)
116
- # Legacy compat: map old flags to CONSENSUS_MODE
117
- if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
118
- CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
119
- elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
120
- CONSENSUS_MODE="final-only"
121
- fi
122
- CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
123
- CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
124
- # Effective CB threshold: doubled when consensus mode active
125
- if [[ "$CONSENSUS_MODE" != "off" ]]; then
126
- EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
127
- else
128
- EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
129
- fi
130
- _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
131
- _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
132
-
133
- # --- Derived Paths ---
134
- DESK="$ROOT/.claude/ralph-desk"
135
- PROMPTS_DIR="$DESK/prompts"
136
- CONTEXT_DIR="$DESK/context"
137
- MEMOS_DIR="$DESK/memos"
138
- LOGS_DIR="$DESK/logs/$SLUG"
139
- RUNTIME_DIR="$LOGS_DIR/runtime"
140
- PRD_FILE="$DESK/plans/prd-$SLUG.md"
141
- TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
142
- # --- Analytics Directory (user-level, cross-project) ---
143
- ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
144
- ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
145
- CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
146
- METADATA_FILE="$ANALYTICS_DIR/metadata.json"
147
- WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
148
- VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
149
- CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
150
- MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
151
- SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
152
- DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
153
- VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
154
- COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
155
- BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
156
- LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
157
- STATUS_FILE="$RUNTIME_DIR/status.json"
158
- SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
159
- WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
160
- VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
161
- COST_LOG="$LOGS_DIR/cost-log.jsonl"
162
-
163
- # --- Session Naming ---
164
- TIMESTAMP=$(date +%Y%m%d-%H%M%S)
165
- SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
166
-
167
- # --- State Tracking ---
168
- typeset -A LAST_PANE_CONTENT
169
- typeset -A PANE_IDLE_SINCE
170
- typeset -A WORKER_RESTARTS
171
- typeset -A US_FAIL_HISTORY
172
- STALE_CONTEXT_COUNT=0
173
- HEARTBEAT_STALE_COUNT=0
174
- MONITOR_FAILURE_COUNT=0
175
- CONSECUTIVE_FAILURES=0
176
- PREV_CONTEXT_HASH=""
177
- PREV_PRD_HASH=""
178
- PREV_PRD_US_LIST=""
179
- _PRD_CHANGED=0
180
- ITERATION=0
181
- START_TIME=$(date +%s)
182
- BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
183
- CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
184
- SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
185
- VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
186
- CONSENSUS_ROUND=0 # current consensus round for current US
187
- US_LIST="" # comma-separated US IDs from PRD (per-us mode)
188
- LOCKFILE_ACQUIRED=0
189
- LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
190
- _SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
191
- _LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
192
- _MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
193
- _ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
194
- _ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
195
-
196
- # =============================================================================
197
- # Utility Functions
198
- # =============================================================================
199
-
200
- DEBUG="${DEBUG:-0}"
201
- DEBUG_LOG="$ANALYTICS_DIR/debug.log"
202
-
203
- # Source shared business logic
204
- LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
205
- source "$LIB_DIR/lib_ralph_desk.zsh"
206
-
207
- # A16: Warn if running in foreground (may conflict with Claude Code pane)
208
- if [[ -z "${RLP_BACKGROUND:-}" ]]; then
209
- echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
210
- echo " Recommended: launch via Bash tool with run_in_background: true" >&2
211
- echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
212
- fi
213
-
214
- # check_dead_pane() — determine if pane command indicates a dead/exited process
215
- # Engine-aware: bash is normal for codex workers (trigger runs in bash),
216
- # but indicates dead pane for claude workers.
217
- # Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
218
- # Returns: 0 if dead, 1 if alive
219
- check_dead_pane() {
220
- local poll_cmd="$1"
221
- local engine="${2:-claude}"
222
- local role="${3:-worker}"
223
-
224
- if [[ -z "$poll_cmd" ]]; then
225
- return 0 # empty = dead
226
- elif [[ "$poll_cmd" == "zsh" ]]; then
227
- return 0 # bare zsh = dead
228
- elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
229
- return 0 # bash = dead for claude (codex uses bash trigger)
230
- fi
231
- return 1 # alive
232
- }
233
-
234
- # launch_worker_codex() — launch codex Worker TUI, send instruction, verify submission
235
- # Matches launch_worker_claude() pattern for consistent tmux-visible execution.
236
- # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
237
- # Returns: 0 on success, 1 on fatal failure
238
- launch_worker_codex() {
239
- local pane_id="$1"
240
- local prompt_file="$2"
241
- local iter="$3"
242
- local worker_launch="$4"
243
-
244
- log " Launching Worker codex TUI in pane $pane_id..."
245
- paste_to_pane "$pane_id" "$worker_launch"
246
- tmux send-keys -t "$pane_id" C-m
247
-
248
- # Wait for codex TUI to be ready
249
- if ! wait_for_pane_ready "$pane_id" 30; then
250
- log_error "Worker codex failed to start"
251
- return 1
252
- fi
253
-
254
- # Send instruction to codex TUI
255
- sleep 3
256
- local worker_instruction="Read and execute the instructions in $prompt_file"
257
- paste_to_pane "$pane_id" "$worker_instruction"
258
- tmux send-keys -t "$pane_id" C-m
259
- log_debug "Worker codex instruction sent (${#worker_instruction} chars)"
260
-
261
- # Submit loop — verify codex started working
262
- local submit_attempts=0
263
- while (( submit_attempts < 15 )); do
264
- sleep 2
265
- local pane_check
266
- pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
267
- if echo "$pane_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
268
- log_debug "Worker codex started working after $((submit_attempts + 1)) checks"
269
- break
270
- fi
271
- if (( submit_attempts == 8 )); then
272
- log_debug "Adaptive instruction retry: clearing line and re-typing"
273
- tmux send-keys -t "$pane_id" C-u 2>/dev/null
274
- sleep 0.1
275
- paste_to_pane "$pane_id" "$worker_instruction"
276
- tmux send-keys -t "$pane_id" C-m
277
- fi
278
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
279
- sleep 0.3
280
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
281
- (( submit_attempts++ ))
282
- done
283
- return 0
284
- }
285
-
286
- # launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
287
- # Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
288
- # restart recovery on submit failure.
289
- # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
290
- # Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
291
- launch_worker_claude() {
292
- local pane_id="$1"
293
- local prompt_file="$2"
294
- local iter="$3"
295
- local worker_launch="$4"
296
-
297
- log " Launching Worker claude in pane $pane_id..."
298
- paste_to_pane "$pane_id" "$worker_launch"
299
- tmux send-keys -t "$pane_id" C-m
300
-
301
- # Wait for claude TUI to be ready
302
- if ! wait_for_pane_ready "$pane_id" 30; then
303
- log_error "Worker claude failed to start"
304
- return 1
305
- fi
306
-
307
- # Send instruction to claude TUI
308
- sleep 3
309
- local worker_instruction="Read and execute the instructions in $prompt_file"
310
- paste_to_pane "$pane_id" "$worker_instruction"
311
- tmux send-keys -t "$pane_id" C-m
312
- log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
313
-
314
- # 15-iteration submit loop — verify claude started working
315
- local submit_attempts=0
316
- while (( submit_attempts < 15 )); do
317
- sleep 2
318
- local pane_check
319
- pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
320
- if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
321
- log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
322
- log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
323
- break
324
- fi
325
- # Every 3 failed attempts, re-send full instruction
326
- if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
327
- log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
328
- tmux send-keys -t "$pane_id" C-u 2>/dev/null
329
- sleep 0.2
330
- paste_to_pane "$pane_id" "$worker_instruction"
331
- sleep 0.15
332
- tmux send-keys -t "$pane_id" C-m
333
- sleep 1
334
- fi
335
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
336
- sleep 0.3
337
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
338
- (( submit_attempts++ ))
339
- done
340
-
341
- # If 15 attempts failed, restart claude and retry
342
- if (( submit_attempts >= 15 )); then
343
- log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
344
- log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
345
- tmux send-keys -t "$pane_id" C-c 2>/dev/null
346
- sleep 0.5
347
- tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
348
- sleep 2
349
- wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
350
- paste_to_pane "$pane_id" "$worker_launch"
351
- tmux send-keys -t "$pane_id" C-m
352
- if wait_for_pane_ready "$pane_id" 30; then
353
- sleep 3
354
- paste_to_pane "$pane_id" "$worker_instruction"
355
- tmux send-keys -t "$pane_id" C-m
356
- log " Worker restarted and instruction re-sent"
357
- log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
358
- else
359
- log_error "Worker restart failed — pane not ready"
360
- log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
361
- fi
362
- fi
363
-
364
- return 0
365
- }
366
-
367
- # launch_verifier_codex() — launch codex Verifier TUI, send instruction, verify submission
368
- # Matches launch_verifier_claude() pattern for consistent tmux-visible execution.
369
- # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
370
- # Returns: 0 on success
371
- launch_verifier_codex() {
372
- local pane_id="$1"
373
- local prompt_file="$2"
374
- local iter="$3"
375
- local verifier_launch="$4"
376
-
377
- log " Launching Verifier codex TUI in pane $pane_id..."
378
- paste_to_pane "$pane_id" "$verifier_launch"
379
- tmux send-keys -t "$pane_id" C-m
380
-
381
- if ! wait_for_pane_ready "$pane_id" 30; then
382
- log_error "Verifier codex failed to start"
383
- return 1
384
- fi
385
-
386
- sleep 3
387
- local verifier_instruction="Read and execute the instructions in $prompt_file"
388
- paste_to_pane "$pane_id" "$verifier_instruction"
389
- tmux send-keys -t "$pane_id" C-m
390
- log_debug "Verifier codex instruction sent"
391
-
392
- # Submit loop — verify codex started working
393
- local submit_attempts=0
394
- while (( submit_attempts < 15 )); do
395
- sleep 2
396
- local vs_check
397
- vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
398
- if echo "$vs_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
399
- log_debug "Verifier codex started working after $((submit_attempts + 1)) checks"
400
- break
401
- fi
402
- if (( submit_attempts == 8 )); then
403
- log_debug "Adaptive instruction retry: clearing line and re-typing"
404
- tmux send-keys -t "$pane_id" C-u 2>/dev/null
405
- sleep 0.1
406
- paste_to_pane "$pane_id" "$verifier_instruction"
407
- tmux send-keys -t "$pane_id" C-m
408
- fi
409
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
410
- sleep 0.3
411
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
412
- (( submit_attempts++ ))
413
- done
414
- return 0
415
- }
416
-
417
- # launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
418
- # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
419
- # Returns: 0 on success
420
- launch_verifier_claude() {
421
- local pane_id="$1"
422
- local prompt_file="$2"
423
- local iter="$3"
424
- local verifier_launch="$4"
425
-
426
- log " Launching Verifier claude in pane $pane_id..."
427
- paste_to_pane "$pane_id" "$verifier_launch"
428
- tmux send-keys -t "$pane_id" C-m
429
-
430
- if ! wait_for_pane_ready "$pane_id" 30; then
431
- log_error "Verifier failed to start"
432
- return 1
433
- fi
434
-
435
- sleep 3
436
- local verifier_instruction="Read and execute the instructions in $prompt_file"
437
- paste_to_pane "$pane_id" "$verifier_instruction"
438
- tmux send-keys -t "$pane_id" C-m
439
- log_debug "Verifier instruction sent directly"
440
-
441
- # Submit loop — verify verifier started working
442
- local submit_attempts=0
443
- while (( submit_attempts < 15 )); do
444
- sleep 2
445
- local vs_check
446
- vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
447
- if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
448
- log_debug "Verifier started working after $((submit_attempts + 1)) checks"
449
- break
450
- fi
451
- if (( submit_attempts == 8 )); then
452
- log_debug "Adaptive instruction retry: clearing line and re-typing"
453
- tmux send-keys -t "$pane_id" C-u 2>/dev/null
454
- sleep 0.1
455
- paste_to_pane "$pane_id" "$verifier_instruction"
456
- tmux send-keys -t "$pane_id" C-m
457
- fi
458
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
459
- sleep 0.3
460
- tmux send-keys -t "$pane_id" C-m 2>/dev/null
461
- (( submit_attempts++ ))
462
- done
463
- return 0
464
- }
465
-
466
- # handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
467
- # On exit: check done-claim, auto-generate iter-signal.
468
- # Args: $1=iteration $2=signal_file
469
- # Returns: 0 (signal generated), 1 (error)
470
- handle_worker_exit_codex() {
471
- local iter="$1"
472
- local signal_file="$2"
473
-
474
- log " Codex worker process exited. Checking for done-claim..."
475
- if [[ -f "$DONE_CLAIM_FILE" ]]; then
476
- local dc_us_id
477
- dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
478
- log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
479
- echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
480
- else
481
- log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
482
- local current_us
483
- current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
484
- local mem_us
485
- mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
486
- [[ -n "$mem_us" ]] && current_us="$mem_us"
487
- echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
488
- fi
489
- return 0
490
- }
491
-
492
- # handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
493
- # Args: $1=pane_id $2=iteration $3=trigger_file
494
- # Returns: 0 (restarted), 1 (max restarts exceeded)
495
- handle_worker_exit_claude() {
496
- local pane_id="$1"
497
- local iter="$2"
498
- local trigger_file="$3"
499
-
500
- log_error "Worker exited without writing signal file"
501
- if restart_worker "$pane_id" "$iter" "$trigger_file"; then
502
- return 0
503
- else
504
- return 1
505
- fi
506
- }
507
-
508
- # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
509
- replace_worker_pane() {
510
- local old_pane="$1"
511
- local role="$2" # "worker" or "verifier"
512
-
513
- log " Replacing dead $role pane $old_pane..."
514
- tmux kill-pane -t "$old_pane" 2>/dev/null
515
-
516
- # Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
517
- local new_pane
518
- if [[ "$role" == "verifier" ]]; then
519
- # Verifier goes below worker: split vertically from worker pane
520
- if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
521
- new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
522
- else
523
- # Fallback: worker pane also dead, split horizontally from leader
524
- new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
525
- fi
526
- else
527
- # Worker goes above verifier: split vertically before verifier pane
528
- if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
529
- new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
530
- else
531
- # Fallback: verifier pane also dead, split horizontally from leader
532
- new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
533
- fi
534
- fi
535
-
536
- log " New $role pane: $new_pane (replaced $old_pane)"
537
- log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
538
-
539
- # Update session-config.json with new pane ID
540
- if [[ -f "$SESSION_CONFIG" ]]; then
541
- jq --arg role "$role" --arg pane "$new_pane" \
542
- '.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
543
- log_debug "Updated session-config.json: $role pane → $new_pane"
544
- fi
545
-
546
- echo "$new_pane"
547
- }
548
-
549
- # =============================================================================
550
- # Dependency Checks
551
- # =============================================================================
552
-
553
- # --- governance.md s7 step 1: Validate prerequisites before starting ---
554
- check_dependencies() {
555
- local missing=0
556
-
557
- if ! command -v tmux >/dev/null 2>&1; then
558
- log_error "tmux is required but not found. Install with: brew install tmux"
559
- missing=1
560
- fi
561
-
562
- # claude required only when claude engine is used for Worker or Verifier execution;
563
- # codex-only campaigns can run without claude — generate_sv_report degrades gracefully
564
- if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
565
- if ! command -v claude >/dev/null 2>&1; then
566
- log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
567
- missing=1
568
- fi
569
- fi
570
-
571
- if ! command -v jq >/dev/null 2>&1; then
572
- log_error "jq is required but not found. Install with: brew install jq"
573
- missing=1
574
- fi
575
-
576
- # Codex binary required only when engine=codex or consensus verification is enabled
577
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
578
- if ! command -v codex >/dev/null 2>&1; then
579
- log_error "codex CLI not found. Install: npm install -g @openai/codex"
580
- missing=1
581
- fi
582
- fi
583
-
584
- if (( missing )); then
585
- exit 1
586
- fi
587
-
588
- # Resolve full path to claude binary when claude engine is in use
589
- if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
590
- CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
591
- log " Claude binary: $CLAUDE_BIN"
592
- fi
593
-
594
- # Resolve codex binary if needed
595
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
596
- CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
597
- log " Codex binary: $CODEX_BIN"
598
- fi
599
- }
600
-
601
- # =============================================================================
602
- # Session Management (tmux pattern: pane IDs)
603
- # =============================================================================
604
-
605
- # --- governance.md s7 step 1: Check for existing sessions ---
606
- check_existing_sessions() {
607
- local current_session
608
- current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
609
- local existing
610
- existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
611
- if [[ -n "$existing" ]]; then
612
- log_error "Existing tmux session(s) found for slug '$SLUG':"
613
- echo "$existing" | while read -r s; do
614
- echo " - $s"
615
- done
616
- echo ""
617
- echo "Kill existing session first:"
618
- echo " tmux kill-session -t <session-name>"
619
- exit 1
620
- fi
621
- }
622
-
623
- # --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
624
- create_session() {
625
- log "Creating tmux session: $SESSION_NAME"
626
-
627
- # tmux split-pane pattern
628
- if [[ -n "${TMUX:-}" ]]; then
629
- # Inside tmux: split CURRENT pane in place
630
- # Current pane stays as-is (leader/user stays here)
631
- # Worker/Verifier appear on the RIGHT, user sees them immediately
632
- LEADER_PANE=$(tmux display-message -p '#{pane_id}')
633
- SESSION_NAME=$(tmux display-message -p '#{session_name}')
634
- log " Splitting current pane in session: $SESSION_NAME"
635
-
636
- # -h off current pane → right column (worker)
637
- WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
638
- # -v off worker → stacked below on right (verifier)
639
- VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
640
- else
641
- # Outside tmux: wrap current terminal into a new tmux session and attach
642
- # tmux pattern: user sees panes immediately, no separate attach needed
643
- tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT"
644
- LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
645
- WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
646
- VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
647
-
648
- fi
649
-
650
- # Set pane titles and enable border labels for visual distinction
651
- local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
652
- local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
653
- [[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
654
- tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
655
- tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
656
- tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
657
- # Color-coded pane borders: green=leader, blue=worker, yellow=verifier
658
- tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
659
- tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
660
- tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
661
- # Show pane titles in border
662
- tmux set-option pane-border-status top 2>/dev/null
663
- tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
664
-
665
- log " Leader pane: $LEADER_PANE"
666
- log " Worker pane: $WORKER_PANE"
667
- log " Verifier pane: $VERIFIER_PANE"
668
-
669
- # AC12: Capture baseline commit before writing session config
670
- BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
671
-
672
- # Truncate cost-log for fresh run (previous data in versioned campaign reports)
673
- > "$COST_LOG"
674
-
675
- # SV flag warning for tmux mode
676
- if (( WITH_SELF_VERIFICATION )); then
677
- log " NOTE: --with-self-verification recorded but SV report generation is Agent-mode only"
678
- fi
679
-
680
- # Write session config (atomic write)
681
- echo '{
682
- "session_name": "'"$SESSION_NAME"'",
683
- "slug": "'"$SLUG"'",
684
- "created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
685
- "baseline_commit": "'"$BASELINE_COMMIT"'",
686
- "panes": {
687
- "leader": "'"$LEADER_PANE"'",
688
- "worker": "'"$WORKER_PANE"'",
689
- "verifier": "'"$VERIFIER_PANE"'"
690
- },
691
- "pid": '$$',
692
- "root": "'"$ROOT"'",
693
- "models": {
694
- "worker": "'"$WORKER_MODEL"'",
695
- "verifier": "'"$VERIFIER_MODEL"'"
696
- },
697
- "engines": {
698
- "worker": "'"$WORKER_ENGINE"'",
699
- "verifier": "'"$VERIFIER_ENGINE"'",
700
- "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
701
- "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
702
- "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
703
- "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
704
- },
705
- "verification": {
706
- "verify_mode": "'"$VERIFY_MODE"'",
707
- "consensus_mode": "'"$CONSENSUS_MODE"'"
708
- },
709
- "config": {
710
- "max_iter": '"$MAX_ITER"',
711
- "poll_interval": '"$POLL_INTERVAL"',
712
- "iter_timeout": '"$ITER_TIMEOUT"',
713
- "heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
714
- "max_restarts": '"$MAX_RESTARTS"',
715
- "idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
716
- "max_nudges": '"$MAX_NUDGES"',
717
- "cb_threshold": '"$CB_THRESHOLD"',
718
- "effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
719
- "with_self_verification": '"$WITH_SELF_VERIFICATION"',
720
- "autonomous_mode": '"$AUTONOMOUS_MODE"'
721
- }
722
- }' | atomic_write "$SESSION_CONFIG"
723
-
724
- log " Session config: $SESSION_CONFIG"
725
- }
726
-
727
- # =============================================================================
728
- # Copy-Mode Guard (tmux pattern)
729
- # =============================================================================
730
-
731
- # --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
732
- check_copy_mode() {
733
- local pane_id="$1"
734
- local in_mode
735
- in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
736
- if [[ "$in_mode" -eq 1 ]]; then
737
- return 1 # pane is in copy mode, cannot send keys
738
- fi
739
- return 0
740
- }
741
-
742
- # =============================================================================
743
- # Verification-Based Send Retry (tmux pattern)
744
- # =============================================================================
745
-
746
- # --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
747
- paste_to_pane() {
748
- local pane_id="$1"
749
- local text="$2"
750
- local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
751
- echo -n "$text" > "$tmpbuf"
752
- tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
753
- tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
754
- rm -f "$tmpbuf"
755
- }
756
-
757
- # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
758
- safe_send_keys() {
759
- local pane_id="$1"
760
- local text="$2"
761
-
762
- # --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
763
-
764
- # Guard: copy-mode captures keys; skip entirely
765
- if ! check_copy_mode "$pane_id"; then
766
- log_debug " Pane $pane_id in copy mode, skipping send"
767
- return 1
768
- fi
769
-
770
- # Check for trust prompt and auto-dismiss
771
- local initial_capture
772
- initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
773
- local pane_busy=0
774
- if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
775
- pane_busy=1
776
- fi
777
- if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
778
- log_debug " Trust prompt detected, dismissing"
779
- tmux send-keys -t "$pane_id" C-m
780
- sleep 0.12
781
- fi
782
- # Auto-approve permission prompts ("Do you want to create/overwrite X?")
783
- if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
784
- log_debug " Permission prompt detected, auto-approving"
785
- tmux send-keys -t "$pane_id" C-m
786
- sleep 0.3
787
- fi
788
- # Auto-dismiss codex update prompt (select Skip)
789
- if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
790
- log_debug " Codex update prompt detected, selecting Skip"
791
- tmux send-keys -t "$pane_id" "2" C-m
792
- sleep 0.2
793
- fi
794
- # Send text via buffer paste (reliable for long strings)
795
- log_debug " Pasting text to pane $pane_id (${#text} chars)"
796
- paste_to_pane "$pane_id" "$text"
797
-
798
- # Allow input buffer to settle (tmux: 150ms)
799
- sleep 0.15
800
-
801
- # Submit: up to 6 rounds of C-m double-press
802
- local round=0
803
- while (( round < 6 )); do
804
- sleep 0.1
805
- if (( round == 0 && pane_busy )); then
806
- # Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
807
- tmux send-keys -t "$pane_id" C-m
808
- else
809
- tmux send-keys -t "$pane_id" C-m
810
- sleep 0.2
811
- tmux send-keys -t "$pane_id" C-m
812
- fi
813
- sleep 0.14
814
-
815
- # Check if text was consumed
816
- local check_capture
817
- check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
818
- if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
819
- log_debug " Text consumed after round $((round + 1))"
820
- return 0
821
- fi
822
- sleep 0.14
823
- (( round++ ))
824
- done
825
-
826
- # Safety gate: copy-mode check
827
- if ! check_copy_mode "$pane_id"; then
828
- log_debug " Copy mode activated during send, aborting"
829
- return 1
830
- fi
831
-
832
- # Adaptive fallback: C-u clear line, resend (tmux pattern)
833
- log_debug " Adaptive retry — clearing line and resending"
834
- tmux send-keys -t "$pane_id" C-u
835
- sleep 0.08
836
- if ! check_copy_mode "$pane_id"; then
837
- return 1
838
- fi
839
- paste_to_pane "$pane_id" "$text"
840
- sleep 0.12
841
- local retry_round=0
842
- while (( retry_round < 4 )); do
843
- tmux send-keys -t "$pane_id" C-m
844
- sleep 0.18
845
- tmux send-keys -t "$pane_id" C-m
846
- sleep 0.14
847
- local retry_capture
848
- retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
849
- if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
850
- log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
851
- return 0
852
- fi
853
- (( retry_round++ ))
854
- done
855
-
856
- # Fail-open: one last nudge
857
- if ! check_copy_mode "$pane_id"; then
858
- return 1
859
- fi
860
- tmux send-keys -t "$pane_id" C-m
861
- sleep 0.12
862
- tmux send-keys -t "$pane_id" C-m
863
- log_debug " Fail-open — text may or may not have been submitted"
864
- return 0
865
- }
866
-
867
- # =============================================================================
868
- # Wait for Pane Ready (tmux pattern: paneLooksReady)
869
- # =============================================================================
870
-
871
- wait_for_pane_ready() {
872
- local pane_id="$1"
873
- local timeout="${2:-10}" # tmux default: 10s
874
- local start=$(date +%s)
875
- log " Waiting for pane $pane_id ready..."
876
- while (( $(date +%s) - start < timeout )); do
877
- local captured
878
- captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
879
-
880
- # Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
881
- if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
882
- log " Trust prompt detected, auto-dismissing..."
883
- tmux send-keys -t "$pane_id" C-m
884
- sleep 0.12
885
- tmux send-keys -t "$pane_id" C-m
886
- sleep 2
887
- continue
888
- fi
889
-
890
- # Auto-approve permission prompts ("Do you want to create/overwrite X?")
891
- if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
892
- log " Permission prompt detected, auto-approving..."
893
- tmux send-keys -t "$pane_id" C-m
894
- sleep 0.5
895
- continue
896
- fi
897
-
898
- # Auto-dismiss codex update prompt (select Skip = option 2)
899
- if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
900
- log " Codex update prompt detected, selecting Skip..."
901
- tmux send-keys -t "$pane_id" "2" C-m
902
- sleep 0.5
903
- continue
904
- fi
905
-
906
- # tmux paneLooksReady: check each line for prompt char at line start
907
- local ready=0
908
- echo "$captured" | while IFS= read -r line; do
909
- local trimmed="${line## }"
910
- if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
911
- ready=1
912
- break
913
- fi
914
- done 2>/dev/null
915
-
916
- # Also check via grep as fallback
917
- if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
918
- ready=1
919
- fi
920
-
921
- if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
922
- # Check no active task running
923
- if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
924
- log " Pane $pane_id is ready."
925
- return 0
926
- fi
927
- fi
928
- sleep 0.25
929
- done
930
- # Timeout — return success anyway (fail-open, let safe_send_keys handle it)
931
- log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
932
- return 0
933
- }
934
-
935
- # =============================================================================
936
- # Heartbeat Monitoring (tmux pattern)
937
- # =============================================================================
938
-
939
- # --- governance.md s7 step 5+6: Check heartbeat freshness ---
940
- check_heartbeat() {
941
- local hb_file="$1"
942
- local threshold="$HEARTBEAT_STALE_THRESHOLD"
943
-
944
- if [[ ! -f "$hb_file" ]]; then
945
- return 1
946
- fi
947
-
948
- local hb_epoch now_epoch
949
- # Read epoch seconds directly (avoids timezone parsing bugs)
950
- hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
951
-
952
- if [[ -z "$hb_epoch" ]]; then
953
- return 1
954
- fi
955
-
956
- now_epoch=$(date +%s)
957
- (( now_epoch - hb_epoch < threshold ))
958
- }
959
-
960
- # Check if heartbeat indicates process has exited
961
- check_heartbeat_exited() {
962
- local hb_file="$1"
963
- if [[ ! -f "$hb_file" ]]; then
964
- return 1
965
- fi
966
- local hb_status
967
- hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
968
- [[ "$hb_status" == "exited" ]]
969
- }
970
-
971
- # =============================================================================
972
- # Idle Pane Nudging (tmux pattern)
973
- # =============================================================================
974
-
975
- # --- governance.md s7 step 5+6: Nudge idle panes ---
976
- check_and_nudge_idle_pane() {
977
- local pane_id="$1"
978
- local nudge_count_var="$2"
979
- local current_content
980
- current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
981
-
982
- if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
983
- local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
984
- local now
985
- now=$(date +%s)
986
- if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
987
- # A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
988
- local _nudge_capture
989
- _nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
990
- if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
991
- log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
992
- else
993
- local count=${(P)nudge_count_var}
994
- if (( count < MAX_NUDGES )); then
995
- log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
996
- safe_send_keys "$pane_id" ""
997
- (( count++ ))
998
- eval "$nudge_count_var=$count"
999
- fi
1000
- fi
1001
- fi
1002
- else
1003
- LAST_PANE_CONTENT[$pane_id]="$current_content"
1004
- PANE_IDLE_SINCE[$pane_id]=$(date +%s)
1005
- fi
1006
- }
1007
-
1008
- # =============================================================================
1009
- # Exponential Backoff Restart (tmux pattern)
1010
- # =============================================================================
1011
-
1012
- # --- governance.md s7 step 5: Restart dead workers with backoff ---
1013
- restart_worker() {
1014
- local pane_id="$1"
1015
- local iter="$2"
1016
- local trigger_file="$3"
1017
-
1018
- # Codex workers are 1-shot exec; restart is not applicable
1019
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
1020
- log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
1021
- return 1
1022
- fi
1023
-
1024
- local restart_count="${WORKER_RESTARTS[$iter]:-0}"
1025
-
1026
- if (( restart_count >= MAX_RESTARTS )); then
1027
- log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
1028
- return 1 # caller writes BLOCKED
1029
- fi
1030
-
1031
- # Exponential backoff: 5s, 10s, 20s, 60s (cap)
1032
- local -a delays=(5 10 20 60)
1033
- local delay=${delays[$((restart_count + 1))]:-60}
1034
- log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
1035
- sleep "$delay"
1036
-
1037
- # Kill existing claude, wait for shell prompt
1038
- tmux send-keys -t "$pane_id" C-c 2>/dev/null
1039
- tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
1040
- sleep 2
1041
-
1042
- # Re-launch worker (tmux interactive pattern)
1043
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
1044
- safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
1045
- else
1046
- safe_send_keys "$pane_id" "$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
1047
- fi
1048
- WORKER_RESTARTS[$iter]=$((restart_count + 1))
1049
- return 0
1050
- }
1051
-
1052
- # =============================================================================
1053
- # Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
1054
- # =============================================================================
1055
-
1056
- # Per-US PRD injection helper
1057
- # Substitutes the full PRD path with a per-US split path in the Worker prompt base.
1058
- # Falls back to the full PRD with a stderr warning if the split file is missing.
1059
- # Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
1060
- inject_per_us_prd() {
1061
- local prompt_base="$1"
1062
- local full_prd="$2"
1063
- local per_us_prd="${3:-}"
1064
-
1065
- if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
1066
- sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
1067
- else
1068
- if [[ -n "$per_us_prd" ]]; then
1069
- echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
1070
- fi
1071
- cat "$prompt_base"
1072
- fi
1073
- }
1074
-
1075
- # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
1076
- # NEVER send prompt content through tmux send-keys.
1077
- # Write payloads to files, send only short trigger commands (<200 chars).
1078
- write_worker_trigger() {
1079
- local iter="$1"
1080
- local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
1081
- local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
1082
- local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
1083
-
1084
- # Build the worker prompt: base prompt + iteration context
1085
- local contract
1086
- contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
1087
-
1088
- # Check for fix contract from previous verifier failure
1089
- local prev_iter=$((iter - 1))
1090
- local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
1091
-
1092
- # Compute next unverified US before prompt assembly (required for per-US PRD injection)
1093
- local next_us=""
1094
- if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1095
- for us in $(echo "$US_LIST" | tr ',' ' '); do
1096
- if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
1097
- next_us="$us"
1098
- break
1099
- fi
1100
- done
1101
- fi
1102
-
1103
- {
1104
- # Per-US PRD injection: substitute full PRD path with per-US split path when available
1105
- local per_us_prd=""
1106
- [[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
1107
- inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
1108
- echo ""
1109
- echo "---"
1110
- echo "## Iteration Context"
1111
- echo "- **Iteration**: $iter"
1112
- echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
1113
- echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
1114
- if (( _PRD_CHANGED )); then
1115
- echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
1116
- fi
1117
-
1118
- # Include fix contract if previous verifier failed
1119
- if [[ -f "$fix_contract_file" ]]; then
1120
- echo ""
1121
- echo "---"
1122
- echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
1123
- echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
1124
- echo "Do NOT just resubmit — actually change the code to address each issue."
1125
- echo ""
1126
- cat "$fix_contract_file"
1127
- fi
1128
-
1129
- # Per-US mode: tell Worker exactly which US to work on
1130
- if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1131
- if [[ -n "$next_us" ]]; then
1132
- echo ""
1133
- echo "---"
1134
- echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
1135
- echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
1136
- echo "The Leader has determined that **${next_us}** is the next unverified story."
1137
- echo "You MUST implement ONLY **${next_us}** in this iteration."
1138
- echo "Do NOT implement any other user stories."
1139
- # Per-US test-spec injection: point Worker to scoped test-spec if available
1140
- local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
1141
- if [[ -f "$per_us_test_spec" ]]; then
1142
- echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
1143
- else
1144
- echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1145
- fi
1146
- echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
1147
- echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
1148
- echo ""
1149
- echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
1150
- elif [[ -n "$VERIFIED_US" ]]; then
1151
- # All individual US verified — this is the final full verify iteration
1152
- echo ""
1153
- echo "---"
1154
- echo "## FINAL VERIFICATION ITERATION"
1155
- echo "All individual US have been verified: $VERIFIED_US"
1156
- echo "Run all tests and verification commands to confirm everything works together."
1157
- echo "Signal verify with us_id=\"ALL\" for the final full verification."
1158
- fi
1159
- elif [[ "$VERIFY_MODE" = "batch" ]]; then
1160
- echo ""
1161
- echo "---"
1162
- if [[ -n "$VERIFIED_US" ]]; then
1163
- echo "## BATCH MODE — CONTINUE FROM PARTIAL PROGRESS"
1164
- echo "The following US have already been verified: **$VERIFIED_US**"
1165
- echo "- Do NOT re-implement these — they are done."
1166
- echo "- Focus ONLY on the remaining unverified user stories."
1167
- echo '- Signal verify with us_id="ALL" when the remaining stories are complete.'
1168
- else
1169
- echo "## BATCH MODE OVERRIDE"
1170
- echo "Ignore any per-US signal instructions above. In batch mode:"
1171
- echo "- Implement ALL user stories in this iteration"
1172
- echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
1173
- echo "- Do NOT signal verify after individual stories"
1174
- fi
1175
- fi
1176
-
1177
- # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1178
- if (( AUTONOMOUS_MODE )); then
1179
- echo ""
1180
- echo "---"
1181
- echo "## AUTONOMOUS MODE"
1182
- echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1183
- echo "**Resolution priority**: PRD > test-spec > context > memory"
1184
- echo "If documents disagree, follow PRD and proceed. Log any conflict you find by"
1185
- echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1186
- echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1187
- echo "Do NOT wait for human input. Keep working."
1188
- fi
1189
- } | atomic_write "$prompt_file"
1190
-
1191
- # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1192
- # Engine-specific launch command (expanded at write time)
1193
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
1194
- local engine_cmd="${CODEX_BIN:-codex} \\
1195
- -m $WORKER_CODEX_MODEL \\
1196
- -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
1197
- --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1198
- \"\$(cat $prompt_file)\""
1199
- local engine_comment="# Run codex with fresh context (fallback trigger — TUI primary launch via launch_worker_codex)"
1200
- else
1201
- local engine_cmd
1202
- engine_cmd=$(build_claude_cmd print "$WORKER_MODEL" "$prompt_file" "$output_log" "$WORKER_EFFORT")
1203
- local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 5)"
1204
- fi
1205
-
1206
- {
1207
- cat <<TRIGGER_EOF
1208
- #!/bin/zsh
1209
- # Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
1210
- # DO NOT use exec here -- it breaks heartbeat cleanup
1211
-
1212
- HEARTBEAT_FILE="$WORKER_HEARTBEAT"
1213
-
1214
- # Background heartbeat writer (tmux pattern)
1215
- (
1216
- while true; do
1217
- echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1218
- mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1219
- sleep 15
1220
- done
1221
- ) &
1222
- HEARTBEAT_PID=\$!
1223
-
1224
- $engine_comment
1225
- $engine_cmd
1226
-
1227
- # Cleanup heartbeat writer
1228
- kill \$HEARTBEAT_PID 2>/dev/null
1229
- wait \$HEARTBEAT_PID 2>/dev/null
1230
- echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1231
- mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1232
- TRIGGER_EOF
1233
- } | atomic_write "$trigger_file"
1234
- chmod +x "$trigger_file"
1235
-
1236
- log " Worker prompt: $prompt_file"
1237
- log " Worker trigger: $trigger_file"
1238
- }
1239
-
1240
- write_verifier_trigger() {
1241
- local iter="$1"
1242
- local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
1243
- local verifier_model="${3:-$VERIFIER_MODEL}"
1244
- local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
1245
- local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1246
- local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1247
- local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
1248
-
1249
- # Read us_id from iter-signal.json for per-US scoping
1250
- local us_id=""
1251
- if [[ -f "$SIGNAL_FILE" ]]; then
1252
- us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1253
- fi
1254
-
1255
- # Build verifier prompt from base with US scope
1256
- {
1257
- cat "$VERIFIER_PROMPT_BASE"
1258
- echo ""
1259
- echo "---"
1260
- echo "## Verification Context"
1261
- echo "- **Iteration**: $iter"
1262
- echo "- **Done Claim**: $DONE_CLAIM_FILE"
1263
- echo "- **Verify Mode**: $VERIFY_MODE"
1264
- if [[ -n "$us_id" ]]; then
1265
- if [[ "$us_id" = "ALL" ]]; then
1266
- echo "- **Scope**: FULL VERIFY — check ALL acceptance criteria from the PRD"
1267
- else
1268
- echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
1269
- fi
1270
- if [[ -n "$VERIFIED_US" ]]; then
1271
- echo "- **Previously verified US**: $VERIFIED_US"
1272
- echo "- **Note**: Skip re-verifying the above US. Focus on unverified stories."
1273
- fi
1274
- fi
1275
-
1276
- # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1277
- if (( AUTONOMOUS_MODE )); then
1278
- echo ""
1279
- echo "---"
1280
- echo "## AUTONOMOUS MODE"
1281
- echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1282
- echo "**Resolution priority**: PRD > test-spec > context > memory"
1283
- echo "If documents disagree, follow PRD and proceed. Log any conflict by"
1284
- echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1285
- echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1286
- echo "Do NOT wait for human input. Keep verifying."
1287
- fi
1288
- } | atomic_write "$prompt_file"
1289
-
1290
- # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1291
- # Engine-specific launch command (expanded at write time)
1292
- if [[ "$verifier_engine" = "codex" ]]; then
1293
- local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
1294
- -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
1295
- --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1296
- \"\$(cat $prompt_file)\" \\
1297
- > >(tee $output_log) 2>&1"
1298
- local engine_comment="# Run codex with fresh context (governance.md s7 step 7) — process substitution preserves tty"
1299
- else
1300
- local engine_cmd
1301
- engine_cmd=$(build_claude_cmd print "$verifier_model" "$prompt_file" "$output_log" "$VERIFIER_EFFORT")
1302
- local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 7)"
1303
- fi
1304
-
1305
- {
1306
- cat <<TRIGGER_EOF
1307
- #!/bin/zsh
1308
- # Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
1309
- # DO NOT use exec here -- it breaks heartbeat cleanup
1310
-
1311
- HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
1312
-
1313
- # Background heartbeat writer (tmux pattern)
1314
- (
1315
- while true; do
1316
- echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1317
- mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1318
- sleep 15
1319
- done
1320
- ) &
1321
- HEARTBEAT_PID=\$!
1322
-
1323
- $engine_comment
1324
- $engine_cmd
1325
-
1326
- # Cleanup heartbeat writer
1327
- kill \$HEARTBEAT_PID 2>/dev/null
1328
- wait \$HEARTBEAT_PID 2>/dev/null
1329
- echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1330
- mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1331
- TRIGGER_EOF
1332
- } | atomic_write "$trigger_file"
1333
- chmod +x "$trigger_file"
1334
-
1335
- log " Verifier prompt: $prompt_file"
1336
- log " Verifier trigger: $trigger_file"
1337
- }
1338
-
1339
- # =============================================================================
1340
- # Cleanup (trap handler)
1341
- # =============================================================================
1342
-
1343
- cleanup() {
1344
- log "Cleaning up..."
1345
-
1346
- # Remove lockfile
1347
- if (( LOCKFILE_ACQUIRED )); then
1348
- rm -f "$LOCKFILE_PATH" 2>/dev/null
1349
- else
1350
- log_debug "cleanup: lockfile not owned by this process, skipping removal"
1351
- fi
1352
-
1353
- # Kill claude processes then kill panes
1354
- log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
1355
- if [[ -n "${WORKER_PANE:-}" ]]; then
1356
- tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1357
- tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
1358
- fi
1359
- if [[ -n "${VERIFIER_PANE:-}" ]]; then
1360
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1361
- tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1362
- fi
1363
- sleep 2
1364
- # Kill panes on completion
1365
- if [[ -n "${WORKER_PANE:-}" ]]; then
1366
- tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
1367
- fi
1368
- if [[ -n "${VERIFIER_PANE:-}" ]]; then
1369
- tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
1370
- fi
1371
- log " Panes cleaned up."
1372
-
1373
- # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
1374
- setopt local_options nonomatch 2>/dev/null
1375
- rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
1376
-
1377
- # AC4: Generate campaign report on all terminal states (always-on)
1378
- generate_campaign_report
1379
-
1380
- # US-001: Generate SV report after campaign report (tmux mode)
1381
- generate_sv_report
1382
-
1383
- # Print summary
1384
- local end_time
1385
- end_time=$(date +%s)
1386
- local elapsed=$(( end_time - START_TIME ))
1387
- local minutes=$(( elapsed / 60 ))
1388
- local seconds=$(( elapsed % 60 ))
1389
-
1390
- local final_status="UNKNOWN"
1391
- if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
1392
- elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
1393
- else final_status="TIMEOUT"; fi
1394
-
1395
- # --- Update metadata.json with final status ---
1396
- if [[ -f "$METADATA_FILE" ]]; then
1397
- jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1398
- '.campaign_status = $status | .end_time = $end_time' \
1399
- "$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
1400
- fi
1401
-
1402
- if (( DEBUG )); then
1403
- local end_ts=$(date +%s)
1404
- local elapsed=$((end_ts - START_TIME))
1405
-
1406
- log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
1407
-
1408
- # --- Validation ---
1409
- log_debug "[FLOW] === Execution Validation ==="
1410
-
1411
- # 1. Did the correct verify mode run?
1412
- log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
1413
-
1414
- # 2. Per-US: were all US individually verified?
1415
- if [[ "$VERIFY_MODE" = "per-us" ]]; then
1416
- local prd_file="$DESK/plans/prd-$SLUG.md"
1417
- local expected_us=""
1418
- if [[ -f "$prd_file" ]]; then
1419
- expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1420
- fi
1421
- local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
1422
- local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
1423
-
1424
- if [[ "$final_status" = "COMPLETE" ]]; then
1425
- if (( verified_count >= expected_count )); then
1426
- log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
1427
- else
1428
- log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
1429
- fi
1430
- else
1431
- log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
1432
- fi
1433
- fi
1434
-
1435
- # 3. Consensus: were both engines used?
1436
- if [[ "$CONSENSUS_MODE" != "off" ]]; then
1437
- if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
1438
- log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1439
- else
1440
- log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1441
- fi
1442
- fi
1443
-
1444
- # 4. Engine match: did the configured engines actually run?
1445
- local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1446
- local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1447
- log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
1448
-
1449
- # 5. Fix loops: how many fix contracts were generated?
1450
- local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
1451
- log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
1452
-
1453
- # 6. Circuit breakers: any triggered?
1454
- local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
1455
- log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
1456
-
1457
- # 7. Overall result
1458
- log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
1459
- fi
1460
-
1461
- echo ""
1462
- echo "============================================================"
1463
- echo " Ralph Desk Tmux Runner - Session Complete"
1464
- echo "============================================================"
1465
- echo " Session: $SESSION_NAME"
1466
- echo " Slug: $SLUG"
1467
- echo " Iterations: $ITERATION / $MAX_ITER"
1468
- echo " Elapsed: ${minutes}m ${seconds}s"
1469
- echo ""
1470
-
1471
- if [[ -f "$COMPLETE_SENTINEL" ]]; then
1472
- echo " Final State: COMPLETE"
1473
- elif [[ -f "$BLOCKED_SENTINEL" ]]; then
1474
- echo " Final State: BLOCKED"
1475
- else
1476
- echo " Final State: STOPPED (interrupted or timeout)"
1477
- fi
1478
-
1479
- echo ""
1480
- echo " Tmux session left alive for inspection:"
1481
- echo " tmux attach -t $SESSION_NAME"
1482
- echo " tmux kill-session -t $SESSION_NAME"
1483
- echo "============================================================"
1484
- }
1485
-
1486
- # =============================================================================
1487
- # Poll Loop (used for both Worker and Verifier)
1488
- # =============================================================================
1489
-
1490
- # --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
1491
- poll_for_signal() {
1492
- local signal_file="$1"
1493
- local heartbeat_file="$2"
1494
- local pane_id="$3"
1495
- local trigger_file="$4"
1496
- local role="$5" # "worker" or "verifier"
1497
- local nudge_count=0
1498
- local api_retry_count=0
1499
- local poll_start
1500
- poll_start=$(date +%s)
1501
-
1502
- # Initialize idle tracking for this pane
1503
- LAST_PANE_CONTENT[$pane_id]=""
1504
- PANE_IDLE_SINCE[$pane_id]=$(date +%s)
1505
-
1506
- while true; do
1507
- local now
1508
- now=$(date +%s)
1509
- local elapsed=$(( now - poll_start ))
1510
-
1511
- # Per-iteration timeout check
1512
- if (( elapsed >= ITER_TIMEOUT )); then
1513
- log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
1514
- return 1 # timeout
1515
- fi
1516
-
1517
- # Check if signal file appeared
1518
- if [[ -f "$signal_file" ]]; then
1519
- log " Signal file detected: $signal_file"
1520
- return 0 # success
1521
- fi
1522
-
1523
- # A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
1524
- # ONLY for Worker polling — Verifier waits for verdict file, not done-claim
1525
- if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
1526
- local dc_us_id
1527
- dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
1528
- if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
1529
- log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
1530
- log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
1531
- echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
1532
- return 0
1533
- fi
1534
- fi
1535
-
1536
- # API transient-error recovery with bounded backoff
1537
- local pane_output_for_retry
1538
- pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
1539
- local is_api_text_retry=0
1540
- if [[ -n "$pane_output_for_retry" ]] &&
1541
- ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
1542
- || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
1543
- || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
1544
- || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
1545
- || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
1546
- is_api_text_retry=1
1547
- fi
1548
-
1549
- if (( is_api_text_retry )) || is_api_error "$pane_id"; then
1550
- (( api_retry_count++ ))
1551
- log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
1552
- if (( api_retry_count >= _API_MAX_RETRIES )); then
1553
- log_error "API unavailable after ${_API_MAX_RETRIES} retries"
1554
- write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
1555
- return 2
1556
- fi
1557
- # A5: If pane shows "queued messages" or rate-limit corruption, restart pane
1558
- if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
1559
- log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
1560
- log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
1561
- tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
1562
- tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
1563
- wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
1564
- fi
1565
- sleep "$_API_RETRY_INTERVAL_S"
1566
- continue
1567
- else
1568
- api_retry_count=0
1569
- fi
1570
-
1571
- # Check heartbeat freshness (tmux pattern)
1572
- if [[ -f "$heartbeat_file" ]]; then
1573
- if check_heartbeat_exited "$heartbeat_file"; then
1574
- # Process exited but no signal file -- give a brief grace period
1575
- sleep 3
1576
- if [[ -f "$signal_file" ]]; then
1577
- log " Signal file detected after process exit: $signal_file"
1578
- return 0
1579
- fi
1580
- # Dispatch to engine-specific exit handler
1581
- if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
1582
- handle_worker_exit_codex "$ITERATION" "$signal_file"
1583
- return 0
1584
- fi
1585
- # Claude path (or verifier of any engine)
1586
- if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
1587
- # Reset poll timer for the restart
1588
- poll_start=$(date +%s)
1589
- nudge_count=0
1590
- LAST_PANE_CONTENT[$pane_id]=""
1591
- PANE_IDLE_SINCE[$pane_id]=$(date +%s)
1592
- sleep "$POLL_INTERVAL"
1593
- continue
1594
- else
1595
- return 1 # max restarts exceeded
1596
- fi
1597
- fi
1598
-
1599
- if ! check_heartbeat "$heartbeat_file"; then
1600
- log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
1601
- (( HEARTBEAT_STALE_COUNT++ ))
1602
- # Circuit breaker: 3 consecutive heartbeat stale events
1603
- if (( HEARTBEAT_STALE_COUNT >= 3 )); then
1604
- log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
1605
- log_error "Circuit breaker: 3 consecutive heartbeat stale events"
1606
- return 1
1607
- fi
1608
- # Attempt restart
1609
- if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
1610
- poll_start=$(date +%s)
1611
- nudge_count=0
1612
- continue
1613
- else
1614
- return 1
1615
- fi
1616
- else
1617
- # Heartbeat is fresh, reset stale counter
1618
- HEARTBEAT_STALE_COUNT=0
1619
- fi
1620
- fi
1621
-
1622
- # Dead pane detection during poll: check if claude/codex process died
1623
- local poll_cmd
1624
- poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
1625
- # Dead pane detection — delegates to check_dead_pane() for engine-aware logic
1626
- if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
1627
- log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
1628
- log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
1629
- # Return failure so caller can handle recovery
1630
- return 1
1631
- fi
1632
-
1633
- # Auto-approve permission prompts during poll
1634
- local poll_capture
1635
- poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
1636
- if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1637
- log " Permission prompt detected during poll, auto-approving..."
1638
- log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
1639
- tmux send-keys -t "$pane_id" C-m
1640
- sleep 0.5
1641
- fi
1642
-
1643
- # Idle pane nudging (tmux pattern)
1644
- check_and_nudge_idle_pane "$pane_id" "nudge_count"
1645
-
1646
- sleep "$POLL_INTERVAL"
1647
- done
1648
- }
1649
-
1650
- # =============================================================================
1651
- # Consensus Verification (run two verifiers sequentially in same pane)
1652
- # =============================================================================
1653
-
1654
- # --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
1655
- run_single_verifier() {
1656
- local iter="$1"
1657
- local engine="$2" # claude|codex
1658
- local model="$3" # model for this verifier
1659
- local suffix="$4" # "-claude" or "-codex"
1660
- local verdict_dest="$5" # where to copy the verdict file
1661
-
1662
- # Write trigger for this engine
1663
- write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
1664
- local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1665
- local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1666
-
1667
- # Clean previous Verifier session (with dead pane detection)
1668
- local verifier_cmd
1669
- verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1670
- if [[ -z "$verifier_cmd" ]]; then
1671
- log " Verifier pane $VERIFIER_PANE is gone — replacing..."
1672
- log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
1673
- replace_worker_pane "$VERIFIER_PANE" "verifier"
1674
- VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
1675
- log " New verifier pane: $VERIFIER_PANE"
1676
- elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
1677
- log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
1678
- log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
1679
- tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
1680
- sleep 0.2
1681
- tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
1682
- sleep 0.3
1683
- elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1684
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1685
- sleep 0.5
1686
- tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1687
- sleep 2
1688
- fi
1689
- # Always ensure clean shell state before launching new verifier
1690
- wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1691
- # Clear pane to avoid residual text interference
1692
- tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
1693
- sleep 0.5
1694
-
1695
- # Remove previous verdict file
1696
- rm -f "$VERDICT_FILE" 2>/dev/null
1697
-
1698
- # Launch verifier — dispatch to engine-specific function
1699
- local verifier_launch
1700
- if [[ "$engine" = "codex" ]]; then
1701
- verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
1702
- launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
1703
- log_debug "Verifier$suffix codex TUI dispatched"
1704
- else
1705
- verifier_launch="$(build_claude_cmd tui "$model" "" "" "$VERIFIER_EFFORT")"
1706
- if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
1707
- log_error "Verifier$suffix failed to start"
1708
- return 1
1709
- fi
1710
- log_debug "Verifier$suffix claude dispatched"
1711
- fi
1712
-
1713
- # Poll for verdict
1714
- if [[ "$engine" = "codex" ]]; then
1715
- # Codex exec: file poll + wait for process exit to avoid reading partial results
1716
- log " Polling for verify-verdict.json ($suffix, codex TUI)..."
1717
- local codex_poll_start
1718
- codex_poll_start=$(date +%s)
1719
- local _verdict_detected=0
1720
- while true; do
1721
- # Phase 1: wait for verdict file
1722
- if (( ! _verdict_detected )) && [[ -f "$VERDICT_FILE" ]]; then
1723
- if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
1724
- log " Verdict file detected, waiting for codex process to finish..."
1725
- _verdict_detected=1
1726
- fi
1727
- fi
1728
- # Phase 2: verdict exists, wait for codex to exit (pane returns to shell)
1729
- if (( _verdict_detected )); then
1730
- local _pane_cmd
1731
- _pane_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null || echo "")
1732
- if [[ "$_pane_cmd" = "zsh" || "$_pane_cmd" = "bash" || -z "$_pane_cmd" ]]; then
1733
- log " Codex verifier$suffix process exited. Proceeding."
1734
- # Re-read verdict in case codex updated it before exiting
1735
- if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
1736
- break
1737
- fi
1738
- fi
1739
- fi
1740
- local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
1741
- if (( codex_elapsed >= ITER_TIMEOUT )); then
1742
- if (( _verdict_detected )); then
1743
- log " Codex verifier$suffix timed out waiting for exit, but verdict exists. Proceeding."
1744
- break
1745
- fi
1746
- log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
1747
- return 1
1748
- fi
1749
- sleep "$POLL_INTERVAL"
1750
- done
1751
- else
1752
- # Claude: use full poll_for_signal with heartbeat/nudge
1753
- log " Polling for verify-verdict.json ($suffix)..."
1754
- if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
1755
- local verifier_poll_rc=$?
1756
- if (( verifier_poll_rc == 2 )); then
1757
- return 1
1758
- fi
1759
- log_error "Verifier$suffix poll failed"
1760
- return 1
1761
- fi
1762
- fi
1763
-
1764
- # Copy verdict to destination
1765
- cp "$VERDICT_FILE" "$verdict_dest"
1766
- log " Verifier$suffix verdict saved to $verdict_dest"
1767
- return 0
1768
- }
1769
-
1770
- # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
1771
- # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
1772
- # Sets FAILED_US global on failure.
1773
- run_sequential_final_verify() {
1774
- local iter="$1"
1775
- FAILED_US=""
1776
-
1777
- log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
1778
- log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
1779
-
1780
- for us in $(echo "$US_LIST" | tr ',' ' '); do
1781
- log " Final verify: checking $us..."
1782
-
1783
- # Temporarily override signal file to scope verifier to this US
1784
- local orig_signal
1785
- orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
1786
- echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
1787
-
1788
- # Write scoped verifier trigger
1789
- write_verifier_trigger "$iter"
1790
- local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
1791
-
1792
- # Clean verifier pane
1793
- local verifier_cmd
1794
- verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1795
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1796
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
1797
- tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
1798
- fi
1799
- wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1800
-
1801
- # Launch verifier
1802
- local verifier_launch
1803
- if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1804
- verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
1805
- launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
1806
- else
1807
- verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
1808
- launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
1809
- log_error "Failed to launch verifier for $us"
1810
- FAILED_US="$us"
1811
- return 1
1812
- }
1813
- fi
1814
-
1815
- # Poll for verdict
1816
- rm -f "$VERDICT_FILE"
1817
- local poll_rc=0
1818
- poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
1819
- if (( poll_rc != 0 )); then
1820
- log_error "Verifier poll failed for $us (rc=$poll_rc)"
1821
- FAILED_US="$us"
1822
- return 1
1823
- fi
1824
-
1825
- # Check verdict
1826
- local verdict
1827
- verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
1828
- if [[ "$verdict" != "pass" ]]; then
1829
- FAILED_US="$us"
1830
- log " Sequential final verify FAILED at $us"
1831
- log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
1832
- return 1
1833
- fi
1834
- log " Sequential final verify: $us PASSED"
1835
-
1836
- # Archive per-US final verdict
1837
- cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
1838
- done
1839
-
1840
- # Integration check: run tests if VERIFICATION_CMD is set
1841
- if [[ -n "${VERIFICATION_CMD:-}" ]]; then
1842
- log " Running integration test suite after sequential verify..."
1843
- log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
1844
- if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
1845
- log " Integration test suite FAILED"
1846
- FAILED_US="integration"
1847
- return 2
1848
- fi
1849
- log " Integration test suite PASSED"
1850
- fi
1851
-
1852
- log " Sequential final verify: ALL PASSED"
1853
- return 0
1854
- }
1855
-
1856
- # --- US-005: Determine whether consensus verification should run for this signal ---
1857
- # Returns 0 (use consensus) or 1 (single engine).
1858
- # Uses unified CONSENSUS_MODE: off|all|final-only
1859
- _should_use_consensus() {
1860
- local signal_us_id="${1:-}"
1861
- case "$CONSENSUS_MODE" in
1862
- all) return 0 ;;
1863
- final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
1864
- off|*) return 1 ;;
1865
- esac
1866
- }
1867
-
1868
- # --- US-004: Run consensus verification (claude + codex sequentially) ---
1869
- run_consensus_verification() {
1870
- local iter="$1"
1871
- local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
1872
- local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
1873
-
1874
- CONSENSUS_ROUND=0
1875
- CLAUDE_VERDICT=""
1876
- CODEX_VERDICT=""
1877
-
1878
- while (( CONSENSUS_ROUND < 6 )); do
1879
- (( CONSENSUS_ROUND++ ))
1880
- log " Consensus round $CONSENSUS_ROUND/6..."
1881
-
1882
- # Run claude verifier first
1883
- local _claude_t0=$(date +%s)
1884
- if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1885
- log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
1886
- return 1
1887
- fi
1888
- ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
1889
- CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1890
- # A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
1891
- if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1892
- log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
1893
- log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
1894
- rm -f "$claude_verdict_file" 2>/dev/null
1895
- if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1896
- log_error "Claude verifier retry also failed"
1897
- return 1
1898
- fi
1899
- CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1900
- if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1901
- log_error "Claude verdict still null after retry — consensus cannot proceed"
1902
- return 1
1903
- fi
1904
- fi
1905
- log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1906
-
1907
- # consensus-fail-fast removed (complexity vs value too low)
1908
-
1909
- # Run codex verifier second
1910
- local _codex_t0=$(date +%s)
1911
- if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
1912
- log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
1913
- return 1
1914
- fi
1915
- ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
1916
- CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
1917
- log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
1918
-
1919
- log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
1920
- local _combined_action="retry"
1921
- if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
1922
- elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
1923
- fi
1924
- log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
1925
-
1926
- # Both pass → success
1927
- if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
1928
- # Create merged verdict with per-engine details
1929
- {
1930
- echo '{'
1931
- echo ' "verdict": "pass",'
1932
- echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1933
- echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
1934
- echo ' "recommended_state_transition": "complete",'
1935
- echo ' "consensus": {'
1936
- echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
1937
- echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
1938
- echo ' "round": '"$CONSENSUS_ROUND"
1939
- echo ' }'
1940
- echo '}'
1941
- } | atomic_write "$VERDICT_FILE"
1942
- return 0
1943
- fi
1944
-
1945
- # Consensus disagreement
1946
- log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
1947
-
1948
- # NOTE: pre_existing_failure heuristic was removed (v0.3.5).
1949
- # It used unreliable grep-in-description string matching to classify
1950
- # consensus failures as "pre-existing", bypassing the consensus rule.
1951
- # Consensus disagreement now ALWAYS flows to fix contract.
1952
- # Codex CLI crash (no verdict file) is handled upstream via run_single_verifier return 1 → BLOCKED.
1953
-
1954
- # --- Consensus disagreement: build fix contract ---
1955
- local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
1956
- {
1957
- echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
1958
- echo ""
1959
- echo "## Claude Verdict: $CLAUDE_VERDICT"
1960
- if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
1961
- echo "### Claude Issues"
1962
- jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1963
- fi
1964
- echo ""
1965
- echo "## Codex Verdict: $CODEX_VERDICT"
1966
- if [[ "$CODEX_VERDICT" = "fail" ]]; then
1967
- echo "### Codex Issues"
1968
- jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1969
- fi
1970
- echo ""
1971
- echo "## Traceability"
1972
- echo "Only changes that resolve a listed issue are allowed."
1973
- } | atomic_write "$fix_contract"
1974
-
1975
- log " Combined fix contract: $fix_contract"
1976
-
1977
- # If this is not the last round, the caller will dispatch the Worker with the fix contract
1978
- # For now, write a fail verdict so the main loop can handle the fix loop
1979
- if (( CONSENSUS_ROUND < 6 )); then
1980
- # Create a merged fail verdict for the main loop — include issues from BOTH verdicts
1981
- local merged_issues="[]"
1982
- local claude_issues codex_issues
1983
- claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
1984
- codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
1985
- merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
1986
- {
1987
- echo '{'
1988
- echo ' "verdict": "fail",'
1989
- echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1990
- echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1991
- echo ' "issues": '"$merged_issues"','
1992
- echo ' "recommended_state_transition": "continue",'
1993
- echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
1994
- echo '}'
1995
- } | atomic_write "$VERDICT_FILE"
1996
- return 2 # special return: consensus disagreement, needs retry
1997
- fi
1998
- done
1999
-
2000
- # Max consensus rounds exceeded — include issues from both verdicts
2001
- log_error "Consensus failed after 6 rounds"
2002
- local final_claude_issues final_codex_issues final_merged_issues
2003
- final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
2004
- final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
2005
- final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
2006
- {
2007
- echo '{'
2008
- echo ' "verdict": "fail",'
2009
- echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2010
- echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
2011
- echo ' "issues": '"$final_merged_issues"','
2012
- echo ' "recommended_state_transition": "blocked",'
2013
- echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
2014
- echo '}'
2015
- } | atomic_write "$VERDICT_FILE"
2016
- return 1
2017
- }
2018
-
2019
- # =============================================================================
2020
- # Main Leader Loop
2021
- # =============================================================================
2022
-
2023
- main() {
2024
- # --- Lockfile: prevent duplicate execution ---
2025
- local lockfile="$LOCKFILE_PATH"
2026
- mkdir -p "$(dirname "$lockfile")" 2>/dev/null
2027
- if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
2028
- local lock_pid
2029
- lock_pid=$(cat "$lockfile" 2>/dev/null)
2030
- if kill -0 "$lock_pid" 2>/dev/null; then
2031
- log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
2032
- exit 1
2033
- fi
2034
- # Stale lock — overwrite
2035
- log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
2036
- echo $$ > "$lockfile"
2037
- LOCKFILE_ACQUIRED=1
2038
- else
2039
- LOCKFILE_ACQUIRED=1
2040
- fi
2041
- trap cleanup EXIT INT TERM
2042
- mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
2043
-
2044
- # --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
2045
- mkdir -p "$ANALYTICS_DIR" 2>/dev/null
2046
-
2047
- # --- debug.log versioning (in analytics dir, --debug only) ---
2048
- if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
2049
- local dbg_n=1
2050
- while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
2051
- (( dbg_n++ ))
2052
- done
2053
- mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
2054
- fi
2055
-
2056
- # --- campaign.jsonl versioning (always-on) ---
2057
- if [[ -f "$CAMPAIGN_JSONL" ]]; then
2058
- local cj_n=1
2059
- while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
2060
- (( cj_n++ ))
2061
- done
2062
- mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
2063
- fi
2064
-
2065
- # --- metadata.json: always write at campaign start (cross-project identification) ---
2066
- jq -n \
2067
- --arg slug "$SLUG" \
2068
- --arg project_root "$ROOT" \
2069
- --arg project_name "$(basename "$ROOT")" \
2070
- --arg campaign_status "running" \
2071
- --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
2072
- --arg end_time "" \
2073
- --arg worker_model "$WORKER_MODEL" \
2074
- --arg verifier_model "$VERIFIER_MODEL" \
2075
- --argjson debug "$DEBUG" \
2076
- --argjson with_sv "$WITH_SELF_VERIFICATION" \
2077
- --argjson consensus "${VERIFY_CONSENSUS:-0}" \
2078
- '{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
2079
- > "$METADATA_FILE"
2080
-
2081
- # --- Startup ---
2082
- log "Ralph Desk Tmux Runner starting..."
2083
- log " Slug: $SLUG"
2084
- log " Root: $ROOT"
2085
- log " Max iterations: $MAX_ITER"
2086
- log " Worker model: $WORKER_MODEL"
2087
- log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
2088
- log " Verify mode: $VERIFY_MODE"
2089
- log " Consensus mode: $CONSENSUS_MODE"
2090
- log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
2091
- log " Poll interval: ${POLL_INTERVAL}s"
2092
- log " Iter timeout: ${ITER_TIMEOUT}s"
2093
- # --- Debug: Log execution plan ---
2094
- if (( DEBUG )); then
2095
- # Extract US IDs from PRD
2096
- local prd_file="$DESK/plans/prd-$SLUG.md"
2097
- local us_list=""
2098
- if [[ -f "$prd_file" ]]; then
2099
- us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2100
- fi
2101
- local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
2102
-
2103
- log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
2104
- log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
2105
- log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
2106
- log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
2107
- log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
2108
-
2109
- if [[ "$VERIFY_MODE" = "per-us" ]]; then
2110
- # Build expected flow
2111
- local expected_flow=""
2112
- for us in $(echo "$us_list" | tr ',' ' '); do
2113
- expected_flow="${expected_flow}worker->verify($us)->"
2114
- done
2115
- expected_flow="${expected_flow}verify(ALL)->COMPLETE"
2116
- log_debug "[OPTION] expected_flow=$expected_flow"
2117
- else
2118
- log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
2119
- fi
2120
-
2121
- if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
2122
- log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
2123
- fi
2124
- fi
2125
-
2126
- # Extract US list for per-US sequencing
2127
- if [[ "$VERIFY_MODE" = "per-us" ]]; then
2128
- local prd_file="$DESK/plans/prd-$SLUG.md"
2129
- if [[ -f "$prd_file" ]]; then
2130
- US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2131
- fi
2132
-
2133
- # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
2134
- local memory_file="$DESK/memos/${SLUG}-memory.md"
2135
- if [[ -f "$memory_file" ]]; then
2136
- local completed_us
2137
- completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
2138
- if [[ -n "$completed_us" ]]; then
2139
- VERIFIED_US="$completed_us"
2140
- log " Loaded completed stories from memory: $VERIFIED_US"
2141
- log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
2142
- fi
2143
- fi
2144
-
2145
- # D1: Fallback — restore verified_us from status.json if memory had none
2146
- if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
2147
- local status_verified
2148
- status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
2149
- if [[ -n "$status_verified" ]]; then
2150
- VERIFIED_US="$status_verified"
2151
- log " Restored verified_us from status.json: $VERIFIED_US"
2152
- log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
2153
- fi
2154
- fi
2155
- fi
2156
-
2157
- # Initialize PRD snapshot state for live update detection
2158
- PREV_PRD_HASH=$(compute_prd_hash)
2159
- PREV_PRD_US_LIST=$(count_prd_us)
2160
-
2161
- # Dependency checks
2162
- check_dependencies
2163
-
2164
- # Print security warning (governance.md s7: --dangerously-skip-permissions)
2165
- print_security_warning
2166
-
2167
- # Validate scaffold
2168
- validate_scaffold
2169
-
2170
- # Check for existing sessions
2171
- check_existing_sessions
2172
-
2173
- # Create tmux session with pane IDs (governance.md s7 step 1)
2174
- create_session
2175
-
2176
- # Set trap for cleanup on exit/error
2177
- trap cleanup EXIT
2178
-
2179
- # Initialize context hash for stale detection
2180
- PREV_CONTEXT_HASH=$(compute_context_hash)
2181
-
2182
- # --- governance.md s7: Leader Loop ---
2183
- local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
2184
-
2185
- for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
2186
- log ""
2187
- log "========== Iteration $ITERATION / $MAX_ITER =========="
2188
- local ITER_START_TIME
2189
- ITER_START_TIME=$(date +%s)
2190
- local _iter_contract=""
2191
- _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
2192
- log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
2193
-
2194
- # --- governance.md s7 step 1: Check sentinels ---
2195
- if [[ -f "$COMPLETE_SENTINEL" ]]; then
2196
- log "COMPLETE sentinel found. Campaign succeeded."
2197
- update_status "complete" "complete"
2198
- return 0
2199
- fi
2200
- if [[ -f "$BLOCKED_SENTINEL" ]]; then
2201
- log "BLOCKED sentinel found. Campaign blocked."
2202
- update_status "blocked" "blocked"
2203
- return 1
2204
- fi
2205
-
2206
- # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
2207
- rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
2208
- rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
2209
-
2210
- # --- Clean previous claude session in panes (one-shot lifecycle) ---
2211
- # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
2212
- if (( ITERATION > 1 )); then
2213
- # Send C-c first (in case claude is mid-task), then /exit
2214
- tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
2215
- sleep 1
2216
- tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
2217
- sleep 2
2218
- # Wait for shell prompt before proceeding
2219
- wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
2220
- fi
2221
-
2222
- # Reset per-iteration state
2223
- local worker_nudge_count=0
2224
- local verifier_nudge_count=0
2225
- ITER_VERIFIER_START=""
2226
- ITER_VERIFIER_END=""
2227
-
2228
- # --- US-004: detect PRD changes for live update + re-split ---
2229
- check_prd_update
2230
-
2231
- # --- governance.md s7 step 4: Build worker prompt + trigger ---
2232
- write_worker_trigger "$ITERATION"
2233
- local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
2234
-
2235
- # AC1: capture worker start timestamp
2236
- ITER_WORKER_START=$(date +%s)
2237
-
2238
- update_status "worker" "running"
2239
-
2240
- # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
2241
- log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
2242
-
2243
- local worker_launch
2244
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
2245
- worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2246
- if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2247
- write_blocked_sentinel "Worker codex failed to start in pane"
2248
- update_status "blocked" "worker_start_failed"
2249
- return 1
2250
- fi
2251
- else
2252
- worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
2253
- if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2254
- write_blocked_sentinel "Worker claude failed to start in pane"
2255
- update_status "blocked" "worker_start_failed"
2256
- return 1
2257
- fi
2258
- fi
2259
-
2260
- # --- governance.md s7 step 5+6: Poll for Worker completion ---
2261
- log " Polling for iter-signal.json..."
2262
- local worker_poll_done=0
2263
- while (( ! worker_poll_done )); do
2264
- local worker_poll_rc=0
2265
- if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
2266
- worker_poll_done=1
2267
- log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
2268
- else
2269
- worker_poll_rc=$?
2270
- if (( worker_poll_rc == 2 )); then
2271
- return 1
2272
- fi
2273
- # Check if Worker is still actively running (not stuck)
2274
- local worker_cmd
2275
- worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
2276
- if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
2277
- # Process alive — extend indefinitely (no hard ceiling kill)
2278
- # Stale-context breaker and nudge system handle truly stuck workers
2279
- local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
2280
- local ceiling_exceeded=""
2281
- if (( iter_elapsed >= HARD_CEILING )); then
2282
- ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
2283
- log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
2284
- log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
2285
- fi
2286
- log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
2287
- log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
2288
- log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
2289
- update_status "worker" "slow"
2290
- # Loop continues — re-poll same iteration
2291
- else
2292
- # Worker is truly dead/stuck
2293
- (( MONITOR_FAILURE_COUNT++ ))
2294
- log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
2295
- if (( MONITOR_FAILURE_COUNT >= 3 )); then
2296
- log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
2297
- write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
2298
- update_status "blocked" "monitor_failures"
2299
- return 1
2300
- fi
2301
- log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
2302
- update_status "worker" "poll_failed"
2303
- log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
2304
- # Worker is truly dead/stuck — BLOCK and let user decide
2305
- write_blocked_sentinel "Worker process dead/stuck (poll failed). Pane preserved for inspection."
2306
- update_status "blocked" "worker_dead"
2307
- return 1
2308
- fi
2309
- fi
2310
- done
2311
-
2312
- if [[ ! -f "$SIGNAL_FILE" ]]; then
2313
- log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
2314
- # No signal — monitor failure, go to next iteration
2315
- continue
2316
- fi
2317
-
2318
- # Reset monitor failure count on success
2319
- MONITOR_FAILURE_COUNT=0
2320
-
2321
- # AC1: capture worker end timestamp; reset consensus timing
2322
- ITER_WORKER_END=$(date +%s)
2323
- ITER_VERIFIER_CLAUDE_DURATION_S=""
2324
- ITER_VERIFIER_CODEX_DURATION_S=""
2325
-
2326
- # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
2327
- local signal_status
2328
- signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
2329
- local signal_summary
2330
- signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
2331
-
2332
- log " Worker signal: status=$signal_status summary=\"$signal_summary\""
2333
-
2334
- # Read us_id early for EXEC logging (also used later in verify branch)
2335
- local signal_us_id_early=""
2336
- signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
2337
- log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
2338
-
2339
- case "$signal_status" in
2340
- continue)
2341
- # --- governance.md s7 step 6: continue -> go to step 8 ---
2342
- log " Worker requests continue. Moving to next iteration."
2343
- update_status "worker" "continue"
2344
- ;;
2345
- verify)
2346
- # --- governance.md s7 step 7: Execute Verifier ---
2347
- # Read us_id from signal for per-US scoping
2348
- local signal_us_id=""
2349
- signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
2350
- log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
2351
-
2352
- # AC1: capture verifier start timestamp
2353
- ITER_VERIFIER_START=$(date +%s)
2354
-
2355
- update_status "verifier" "running"
2356
-
2357
- # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
2358
- if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
2359
- log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
2360
- local seq_rc=0
2361
- run_sequential_final_verify "$ITERATION" || seq_rc=$?
2362
- if (( seq_rc == 0 )); then
2363
- write_complete_sentinel "Sequential final verify passed (all US verified individually)"
2364
- update_status "complete" "pass"
2365
- write_campaign_jsonl "$ITERATION" "ALL" "pass"
2366
- return 0
2367
- else
2368
- # Sequential verify failed — fall through to fix loop with failed US
2369
- log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
2370
- signal_us_id="${FAILED_US:-ALL}"
2371
- # Synthesize a fail verdict for the fix loop
2372
- echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
2373
- fi
2374
- fi
2375
-
2376
- # --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
2377
- local use_consensus=0
2378
- _should_use_consensus "$signal_us_id" && use_consensus=1
2379
-
2380
- # --- Consensus vs single verification ---
2381
- if (( use_consensus )); then
2382
- # US-004: Run consensus verification (claude + codex sequentially)
2383
- local consensus_rc=0
2384
- run_consensus_verification "$ITERATION" || consensus_rc=$?
2385
-
2386
- if (( consensus_rc == 2 )); then
2387
- # Consensus disagreement — treat as fail, fix loop will handle
2388
- log " Consensus disagreement, treating as fail."
2389
- elif (( consensus_rc != 0 )); then
2390
- # Consensus verification failed entirely
2391
- log_error "Consensus verification failed"
2392
- write_blocked_sentinel "Consensus verification failed after max rounds"
2393
- update_status "blocked" "consensus_failed"
2394
- return 1
2395
- fi
2396
- else
2397
- # Standard single-engine verification
2398
- write_verifier_trigger "$ITERATION"
2399
- local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
2400
-
2401
- # Step 7a: Clean previous Verifier session (with dead pane detection)
2402
- local verifier_cmd
2403
- verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2404
- if [[ -z "$verifier_cmd" ]]; then
2405
- log " Verifier pane $VERIFIER_PANE is gone — replacing..."
2406
- log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
2407
- replace_worker_pane "$VERIFIER_PANE" "verifier"
2408
- VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
2409
- log " New verifier pane: $VERIFIER_PANE"
2410
- elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
2411
- log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
2412
- log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2413
- tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2414
- sleep 0.2
2415
- tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
2416
- sleep 0.3
2417
- elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2418
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2419
- sleep 0.5
2420
- tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2421
- sleep 2
2422
- fi
2423
- wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2424
-
2425
- local verifier_launch
2426
- if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2427
- verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2428
- else
2429
- verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
2430
- fi
2431
- log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
2432
-
2433
- if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2434
- launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
2435
- else
2436
- if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
2437
- update_status "verifier" "start_failed"
2438
- continue
2439
- fi
2440
- fi
2441
-
2442
- # Poll for verify-verdict.json
2443
- log " Polling for verify-verdict.json..."
2444
- if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
2445
- local verifier_poll_rc=$?
2446
- if (( verifier_poll_rc == 2 )); then
2447
- return 1
2448
- fi
2449
- log_error "Verifier poll failed"
2450
- # Verifier is dead/stuck — BLOCK and let user decide
2451
- write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
2452
- update_status "blocked" "verifier_dead"
2453
- return 1
2454
- fi
2455
- fi
2456
-
2457
- # AC1: capture verifier end timestamp
2458
- ITER_VERIFIER_END=$(date +%s)
2459
-
2460
- # --- governance.md s7 step 7: Read verdict via jq ---
2461
- local verdict
2462
- verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
2463
- local recommended
2464
- recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
2465
- local verdict_summary
2466
- verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
2467
-
2468
- log " Verifier: verdict=$verdict recommended=$recommended"
2469
- log " Verifier summary: \"$verdict_summary\""
2470
- local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
2471
- log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
2472
-
2473
- case "$verdict" in
2474
- pass)
2475
- CONSECUTIVE_FAILURES=0
2476
- CONSENSUS_ROUND=0
2477
- _SAME_US_FAIL_COUNT=0
2478
- _LAST_FAILED_US=""
2479
- if (( _MODEL_UPGRADED )); then
2480
- log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
2481
- log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
2482
- WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
2483
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
2484
- WORKER_CODEX_MODEL="$WORKER_MODEL"
2485
- WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
2486
- fi
2487
- _MODEL_UPGRADED=0
2488
- fi
2489
-
2490
- # --- Verified US tracking (both per-us and batch modes) ---
2491
- if [[ -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
2492
- # Add this US to verified list
2493
- if [[ -n "$VERIFIED_US" ]]; then
2494
- VERIFIED_US="${VERIFIED_US},${signal_us_id}"
2495
- else
2496
- VERIFIED_US="$signal_us_id"
2497
- fi
2498
- log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
2499
- log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
2500
- update_status "verifier" "pass_us"
2501
- # Worker will do next US on next iteration
2502
- elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
2503
- # Final full verify passed or complete recommended
2504
- write_complete_sentinel "$verdict_summary"
2505
- update_status "complete" "pass"
2506
- write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
2507
- return 0
2508
- else
2509
- log " Verifier passed but did not recommend complete. Continuing."
2510
- update_status "verifier" "pass_continue"
2511
- fi
2512
- ;;
2513
- fail)
2514
- # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
2515
-
2516
- # Parse per_us_results from verdict to track partial progress (batch + per-us)
2517
- local _prev_verified="$VERIFIED_US"
2518
- if jq -e '.per_us_results' "$VERDICT_FILE" &>/dev/null; then
2519
- local _newly_passed
2520
- _newly_passed=$(jq -r '.per_us_results | to_entries[] | select(.value == "pass") | .key' "$VERDICT_FILE" 2>/dev/null)
2521
- for _pus in $(echo "$_newly_passed"); do
2522
- if ! echo ",$VERIFIED_US," | grep -q ",$_pus,"; then
2523
- if [[ -n "$VERIFIED_US" ]]; then
2524
- VERIFIED_US="${VERIFIED_US},${_pus}"
2525
- else
2526
- VERIFIED_US="$_pus"
2527
- fi
2528
- log " Partial progress: $_pus passed (overall FAIL). Verified so far: $VERIFIED_US"
2529
- fi
2530
- done
2531
- log_debug "[FLOW] iter=$ITERATION partial_progress prev=$_prev_verified now=$VERIFIED_US"
2532
- fi
2533
-
2534
- # Partial progress resets consecutive failures (progress was made)
2535
- if [[ "$VERIFIED_US" != "$_prev_verified" ]]; then
2536
- CONSECUTIVE_FAILURES=0
2537
- log " Progress detected — consecutive_failures reset to 0"
2538
- log_debug "[GOV] iter=$ITERATION consecutive_failures_reset=partial_progress"
2539
- fi
2540
-
2541
- (( CONSECUTIVE_FAILURES++ ))
2542
- record_us_failure "${signal_us_id:-unknown}"
2543
- check_model_upgrade "${signal_us_id:-unknown}"
2544
-
2545
- # Mid-CB warning: alert at halfway point (governance §8 early warning)
2546
- if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
2547
- log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
2548
- log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
2549
- fi
2550
- local verdict_summary_fail
2551
- verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
2552
- log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
2553
-
2554
- # Extract issues from verdict for next Worker's fix contract
2555
- local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
2556
- {
2557
- echo "# Fix Contract (from Verifier iteration $ITERATION)"
2558
- echo ""
2559
- if [[ -n "$VERIFIED_US" ]]; then
2560
- echo "## Verified US (do NOT re-implement these)"
2561
- echo "$VERIFIED_US" | tr ',' '\n' | sed 's/^/- /'
2562
- echo ""
2563
- echo "**Focus ONLY on unverified user stories. The above are already verified.**"
2564
- echo ""
2565
- fi
2566
- echo "## Summary"
2567
- echo "$verdict_summary_fail"
2568
- echo ""
2569
- echo "## Issues (from verify-verdict.json)"
2570
- jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
2571
- echo ""
2572
- echo "## Next Iteration Contract"
2573
- jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
2574
- } | atomic_write "$fix_contract"
2575
- log " Fix contract: $fix_contract"
2576
- log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
2577
-
2578
- # Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
2579
- if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
2580
- # For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
2581
- _ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
2582
- if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
2583
- log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
2584
- log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
2585
- write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2586
- else
2587
- log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
2588
- log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2589
- write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2590
- fi
2591
- update_status "blocked" "consecutive_failures"
2592
- return 1
2593
- fi
2594
-
2595
- update_status "verifier" "fail"
2596
- ;;
2597
- request_info)
2598
- # --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
2599
- local verdict_summary_ri
2600
- verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
2601
- log " Verifier requests info (degraded in tmux lean mode)."
2602
- log " Questions: \"$verdict_summary_ri\""
2603
- log " Treating as soft fail — Worker will see verdict in next iteration."
2604
- update_status "verifier" "request_info"
2605
- ;;
2606
- blocked)
2607
- write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary"
2608
- update_status "blocked" "verifier_blocked"
2609
- return 1
2610
- ;;
2611
- *)
2612
- log_error "Unknown verdict: $verdict"
2613
- update_status "verifier" "unknown_verdict"
2614
- ;;
2615
- esac
2616
- ;;
2617
- blocked)
2618
- # --- governance.md s7 step 6: blocked -> write sentinel ---
2619
- write_blocked_sentinel "Worker reported blocked: $signal_summary"
2620
- update_status "blocked" "worker_blocked"
2621
- return 1
2622
- ;;
2623
- *)
2624
- log_error "Unknown signal status: $signal_status"
2625
- update_status "worker" "unknown_status"
2626
- ;;
2627
- esac
2628
-
2629
- # --- step 7d: Archive iteration artifacts before cleanup ---
2630
- archive_iter_artifacts "$ITERATION"
2631
-
2632
- # --- AC5: Write per-iteration cost estimate ---
2633
- write_cost_log "$ITERATION"
2634
- write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
2635
-
2636
- # --- governance.md s7 step 8: Write result log ---
2637
- write_result_log "$ITERATION" "$signal_status"
2638
-
2639
- # --- governance.md s7 step 8: Circuit breaker - stale context check ---
2640
- if ! check_stale_context; then
2641
- log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
2642
- write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
2643
- update_status "blocked" "stale_context"
2644
- return 1
2645
- fi
2646
-
2647
- # --- governance.md s7 step 8: Update status ---
2648
- update_status "idle" "${signal_status:-unknown}"
2649
- done
2650
-
2651
- # Max iterations reached
2652
- log "Max iterations ($MAX_ITER) reached."
2653
- update_status "timeout" "max_iter"
2654
- return 1
2655
- }
2656
-
2657
- # =============================================================================
2658
- # Entry Point
2659
- # =============================================================================
2660
-
2661
- # --- CLI: parse --worker-model / --verifier-model flags ---
2662
- # These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
2663
- # Format: "model:reasoning" → codex engine; "model-name" → claude engine
2664
- _cli_i=1
2665
- while (( _cli_i <= $# )); do
2666
- case "${@[$_cli_i]}" in
2667
- --worker-model)
2668
- (( _cli_i++ ))
2669
- _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
2670
- WORKER_ENGINE="${_cli_parsed%% *}"
2671
- _cli_rest="${_cli_parsed#* }"
2672
- WORKER_MODEL="${_cli_rest%% *}"
2673
- if [[ "$WORKER_ENGINE" = "codex" ]]; then
2674
- WORKER_CODEX_MODEL="$WORKER_MODEL"
2675
- WORKER_CODEX_REASONING="${_cli_rest##* }"
2676
- elif [[ "$_cli_rest" == *" "* ]]; then
2677
- WORKER_EFFORT="${_cli_rest##* }"
2678
- fi
2679
- ;;
2680
- --verifier-model)
2681
- (( _cli_i++ ))
2682
- _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
2683
- VERIFIER_ENGINE="${_cli_parsed%% *}"
2684
- _cli_rest="${_cli_parsed#* }"
2685
- VERIFIER_MODEL="${_cli_rest%% *}"
2686
- if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2687
- VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
2688
- VERIFIER_CODEX_REASONING="${_cli_rest##* }"
2689
- elif [[ "$_cli_rest" == *" "* ]]; then
2690
- VERIFIER_EFFORT="${_cli_rest##* }"
2691
- fi
2692
- ;;
2693
- --lock-worker-model)
2694
- LOCK_WORKER_MODEL=1
2695
- ;;
2696
- --autonomous)
2697
- AUTONOMOUS_MODE=1
2698
- ;;
2699
- --final-verifier-model)
2700
- (( _cli_i++ ))
2701
- _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
2702
- FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
2703
- _cli_rest="${_cli_parsed#* }"
2704
- FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
2705
- if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
2706
- FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
2707
- FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
2708
- elif [[ "$_cli_rest" == *" "* ]]; then
2709
- FINAL_VERIFIER_EFFORT="${_cli_rest##* }"
2710
- fi
2711
- ;;
2712
- --consensus)
2713
- (( _cli_i++ ))
2714
- CONSENSUS_MODE="${@[$_cli_i]:-off}"
2715
- ;;
2716
- --consensus-model)
2717
- (( _cli_i++ ))
2718
- CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:medium}"
2719
- ;;
2720
- --final-consensus-model)
2721
- (( _cli_i++ ))
2722
- FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:high}"
2723
- ;;
2724
- --final-consensus)
2725
- # Legacy: map to new --consensus final-only
2726
- CONSENSUS_MODE="final-only"
2727
- ;;
2728
- --verify-consensus)
2729
- # Legacy: map to new --consensus all
2730
- CONSENSUS_MODE="all"
2731
- ;;
2732
- esac
2733
- (( _cli_i++ ))
2734
- done
2735
- unset _cli_i _cli_parsed _cli_rest
2736
-
2737
- # Require tmux — tmux mode only works inside an active tmux session
2738
- if [[ -z "${TMUX:-}" ]]; then
2739
- echo "ERROR: tmux mode requires running inside a tmux session."
2740
- echo ""
2741
- echo " Start tmux first, then retry:"
2742
- echo " tmux"
2743
- echo " LOOP_NAME=$SLUG $0"
2744
- echo ""
2745
- echo " Or use Agent() mode instead (no tmux needed):"
2746
- echo " /rlp-desk run $SLUG"
2747
- exit 1
2748
- fi
2749
-
2750
- main "$@"