@ai-dev-methodologies/rlp-desk 0.14.3 → 0.14.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3522 @@
1
+ #!/bin/zsh
2
+ set -uo pipefail
3
+ # NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
4
+ # because the main loop uses explicit error checks throughout.
5
+
6
+ # =============================================================================
7
+ # Ralph Desk Tmux Runner
8
+ #
9
+ # Implements the Leader loop from governance.md section 7 as a shell script.
10
+ # Uses tmux proven patterns: write-then-notify, pane IDs (%N),
11
+ # copy-mode guards, verification-based retry, heartbeat monitoring,
12
+ # idle pane nudging, exponential backoff restarts, atomic file writes.
13
+ #
14
+ # Usage:
15
+ # LOOP_NAME=<slug> ./run_ralph_desk.zsh
16
+ #
17
+ # Required env:
18
+ # LOOP_NAME - slug identifier for the campaign
19
+ #
20
+ # Optional env:
21
+ # ROOT - project root (default: $PWD)
22
+ # MAX_ITER - max iterations (default: 20)
23
+ # WORKER_MODEL - claude model for Worker (default: sonnet)
24
+ # VERIFIER_MODEL - claude model for Verifier (default: opus)
25
+ # POLL_INTERVAL - seconds between signal checks (default: 5)
26
+ # ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
27
+ # HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
28
+ # MAX_RESTARTS - max restart attempts per worker (default: 3)
29
+ # IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
30
+ # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
+ #
32
+ # Per-role codex config:
33
+ # WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.5)
34
+ # WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
35
+ # VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.5)
36
+ # VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
37
+ #
38
+ # Consensus scope:
39
+ # CONSENSUS_SCOPE - when consensus applies (default: all)
40
+ # all=every verify, final-only=final ALL only
41
+ #
42
+ # Dependencies: tmux, claude CLI, jq
43
+ # Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
44
+ # =============================================================================
45
+
46
+ # --- Environment Variables ---
47
+ SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
48
+ ROOT="${ROOT:-$PWD}"
49
+ MAX_ITER="${MAX_ITER:-20}"
50
+ WORKER_MODEL="${WORKER_MODEL:-haiku}"
51
+ VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
52
+ FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
53
+ POLL_INTERVAL="${POLL_INTERVAL:-5}"
54
+ ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
55
+ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
56
+ MAX_RESTARTS="${MAX_RESTARTS:-3}"
57
+ IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
58
+ MAX_NUDGES="${MAX_NUDGES:-3}"
59
+ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
60
+ WITH_SELF_VERIFICATION_REQUESTED="$WITH_SELF_VERIFICATION" # preserves original user intent for traceability (governance §1f)
61
+ SV_SKIPPED_REASON="" # set when SV is disabled despite user request
62
+
63
+ # v0.14.0 — zsh runner restored as primary tmux mode path.
64
+ # v5.7 §4.2's deprecation gate (rejected --flywheel/--flywheel-guard/
65
+ # --with-self-verification) is removed: the Node port shipped without
66
+ # zsh-equivalent safety nets (heartbeat, copy-mode guard, prompt-stall,
67
+ # no-progress, stale-context, claude model upgrade chain, etc.), so the
68
+ # Node leader is now reserved for `--mode agent` (LLM-driven) only.
69
+ # `--mode tmux` invocations from src/node/run.mjs delegate here as a
70
+ # subprocess via env vars. zsh continues to honor FLYWHEEL,
71
+ # FLYWHEEL_GUARD, WITH_SELF_VERIFICATION.
72
+ AUTONOMOUS_MODE="${AUTONOMOUS_MODE:-0}" # 1=don't stop on ambiguity, PRD is authoritative
73
+ # P1-E Lane enforcement: WARN-only by default; --lane-strict opts into BLOCKED
74
+ # escalation. governance §7¾. The opt-in defaults to "warn"; "strict" trips
75
+ # BLOCKED with reason_category=infra_failure + recoverable=true (downgrade
76
+ # from terminal_alert) so an inaccurate mtime audit cannot terminally kill a
77
+ # campaign.
78
+ LANE_MODE="${LANE_MODE:-warn}"
79
+ # US-018 R6 P1-F Test density: WARN by default; --test-density-strict turns
80
+ # init exit non-zero when any AC has < 3 tests (governance §7f).
81
+ TEST_DENSITY_MODE="${TEST_DENSITY_MODE:-warn}"
82
+ # US-021 R9 P2-I consecutive_blocks circuit breaker (governance §8). When the
83
+ # same canonical block reason fires N times in a row the runner writes
84
+ # .sisyphus/mission-abort.json and exits non-zero so contract defects don't
85
+ # silently loop. infra_failure category and the very first iteration are exempt.
86
+ BLOCK_CB_THRESHOLD="${BLOCK_CB_THRESHOLD:-3}"
87
+ CONSECUTIVE_BLOCKS=0
88
+ LAST_BLOCK_REASON=""
89
+
90
+ # US-021 R9 P2-I: track repeated same-reason blocks. infra_failure category and
91
+ # the very first iteration are exempt (mission setup blocks shouldn't trip
92
+ # the abort). Returns 0 if loop should continue, 1 (after writing
93
+ # mission-abort.json) if the threshold is reached.
94
+ # US-023 R11 P2-K: guarantee at least one cost-log.jsonl entry per campaign.
95
+ # An empty cost-log can mean either "no usage recorded" or "logging broken" —
96
+ # we make the distinction observable by always emitting a final entry on exit
97
+ # (idempotent via COST_LOG_FINAL_WRITTEN). Wired into the existing cleanup trap.
98
+ COST_LOG_FINAL_WRITTEN=0
99
+ _emit_final_cost_log() {
100
+ if [[ "${COST_LOG_FINAL_WRITTEN:-0}" -ne 0 ]]; then
101
+ return 0
102
+ fi
103
+ COST_LOG_FINAL_WRITTEN=1
104
+ if [[ -n "${ITERATION:-}" && -n "${LOGS_DIR:-}" ]]; then
105
+ write_cost_log "${ITERATION:-0}" 2>/dev/null || true
106
+ fi
107
+ }
108
+
109
+ # US-024 R12 P0: tmux pane/session lifecycle monitor.
110
+ # Single authoritative timeout: 5 attempts × 1s sleep = 5s budget.
111
+ # Invoked at 3 sites: create_session post-finish, main loop iter entry, and
112
+ # every send-keys/paste post-action before the wait-loop. Writes infra_failure
113
+ # BLOCKED sentinel and exits 1 when any pane or the session is dead beyond budget.
114
+ _r12_check_lifecycle() {
115
+ local site="${1:-unknown}"
116
+ local _attempts=0
117
+ while ! _verify_session_alive "$SESSION_NAME" || \
118
+ ! _verify_pane_alive "$LEADER_PANE" || \
119
+ ! _verify_pane_alive "$WORKER_PANE" || \
120
+ ! _verify_pane_alive "$VERIFIER_PANE"; do
121
+ (( _attempts++ ))
122
+ if (( _attempts >= 5 )); then
123
+ log_error "[r12:$site] tmux session/pane dead after 5x1s polling (5s authoritative budget). session=$SESSION_NAME panes leader=$LEADER_PANE worker=$WORKER_PANE verifier=$VERIFIER_PANE"
124
+ tmux list-panes -a -F '#{session_name}:#{pane_id} dead=#{pane_dead}' 2>&1 | head -20 >> "${DEBUG_LOG:-/dev/null}"
125
+ write_blocked_sentinel "tmux session/pane dead during $site" "${CURRENT_US:-ALL}" "infra_failure"
126
+ exit 1
127
+ fi
128
+ sleep 1
129
+ done
130
+ return 0
131
+ }
132
+
133
+ _check_consecutive_blocks() {
134
+ local reason="$1"
135
+ local category="${2:-metric_failure}"
136
+ local iter="${3:-${ITERATION:-0}}"
137
+ if [[ "$category" == "infra_failure" ]] || (( iter <= 1 )); then
138
+ LAST_BLOCK_REASON=""
139
+ CONSECUTIVE_BLOCKS=0
140
+ return 0
141
+ fi
142
+ local canonical
143
+ canonical=$(_canonical_block_reason "$reason" 2>/dev/null)
144
+ if [[ "$canonical" == "$LAST_BLOCK_REASON" && -n "$canonical" ]]; then
145
+ CONSECUTIVE_BLOCKS=$((CONSECUTIVE_BLOCKS + 1))
146
+ else
147
+ CONSECUTIVE_BLOCKS=1
148
+ LAST_BLOCK_REASON="$canonical"
149
+ fi
150
+ if (( CONSECUTIVE_BLOCKS >= BLOCK_CB_THRESHOLD )); then
151
+ local abort_dir="$DESK/.sisyphus"
152
+ mkdir -p "$abort_dir" 2>/dev/null
153
+ local abort_file="$abort_dir/mission-abort.json"
154
+ printf '{"reason":"consecutive_blocks","count":%s,"last_reason":"%s","threshold":%s,"timestamp":"%s"}\n' \
155
+ "$CONSECUTIVE_BLOCKS" "$canonical" "$BLOCK_CB_THRESHOLD" \
156
+ "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$abort_file"
157
+ log_error "Mission abort: same canonical block reason '$canonical' repeated $CONSECUTIVE_BLOCKS times (>= $BLOCK_CB_THRESHOLD)"
158
+ return 1
159
+ fi
160
+ return 0
161
+ }
162
+
163
+ # --- Engine Selection (auto-detect from model format) ---
164
+ # claude models (haiku/sonnet/opus) with :effort → claude engine + effort
165
+ # codex models (gpt-*/spark) with :reasoning → codex engine + reasoning
166
+ # plain name → claude engine (no effort/reasoning)
167
+ _auto_detect_engine() {
168
+ local model_var="$1" engine_var="$2" codex_model_var="$3" codex_reasoning_var="$4" effort_var="${5:-}"
169
+ local model_val="${(P)model_var}"
170
+ if [[ "$model_val" == *:* ]]; then
171
+ local model_part="${model_val%%:*}"
172
+ local level_part="${model_val##*:}"
173
+ case "$model_part" in
174
+ haiku|sonnet|opus)
175
+ # Claude model with effort — keep engine as claude, store effort
176
+ eval "$engine_var=claude"
177
+ eval "$model_var=$model_part"
178
+ [[ -n "$effort_var" ]] && eval "$effort_var=$level_part"
179
+ ;;
180
+ *)
181
+ # Codex model with reasoning
182
+ [[ "$model_part" == "spark" ]] && model_part="gpt-5.3-codex-spark"
183
+ eval "$engine_var=codex"
184
+ eval "$model_var=$model_part"
185
+ [[ -n "$codex_model_var" ]] && eval "$codex_model_var=$model_part"
186
+ [[ -n "$codex_reasoning_var" ]] && eval "$codex_reasoning_var=$level_part"
187
+ ;;
188
+ esac
189
+ fi
190
+ }
191
+
192
+ WORKER_ENGINE="${WORKER_ENGINE:-claude}"
193
+ VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}"
194
+ FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
195
+
196
+ # Effort levels for Claude models (set by _auto_detect_engine or CLI --worker-model opus:max)
197
+ WORKER_EFFORT="${WORKER_EFFORT:-}"
198
+ VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
199
+ FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
200
+
201
+ # Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
202
+ _auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
203
+ _auto_detect_engine VERIFIER_MODEL VERIFIER_ENGINE VERIFIER_CODEX_MODEL VERIFIER_CODEX_REASONING VERIFIER_EFFORT
204
+ _auto_detect_engine FINAL_VERIFIER_MODEL FINAL_VERIFIER_ENGINE "" "" FINAL_VERIFIER_EFFORT
205
+ WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.5}"
206
+ WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
207
+ VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.5}"
208
+ VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
209
+ CODEX_BIN="" # resolved by check_dependencies when engine=codex
210
+
211
+ # --- Verify Mode ---
212
+ VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
213
+ # Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
214
+ CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
215
+ CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.5:medium}" # per-US cross-verifier (lighter)
216
+ FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.5:high}" # final cross-verifier (stricter)
217
+ # Legacy compat: map old flags to CONSENSUS_MODE
218
+ if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
219
+ CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
220
+ elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
221
+ CONSENSUS_MODE="final-only"
222
+ fi
223
+ CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
224
+ CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
225
+ # Effective CB threshold: doubled when consensus mode active
226
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
227
+ EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
228
+ else
229
+ EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
230
+ fi
231
+ _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
232
+ _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
233
+
234
+ # --- Derived Paths ---
235
+ DESK="$ROOT/${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
236
+ # v0.13.0: legacy detection — refuse to run when .claude/ralph-desk/ is still
237
+ # present. init mode auto-migrates; run mode protects in-flight campaigns.
238
+ if [[ -d "$ROOT/.claude/ralph-desk" ]]; then
239
+ print -u2 "ERROR: Legacy .claude/ralph-desk/ detected at $ROOT/.claude/ralph-desk."
240
+ print -u2 "Run mode does not auto-migrate to protect in-flight campaigns."
241
+ print -u2 "Run: mv .claude/ralph-desk ${RLP_DESK_RUNTIME_DIR:-.rlp-desk} then re-run."
242
+ exit 1
243
+ fi
244
+ # US-026 R14 P0: project-root-hashed runner lockfile prevents duplicate runner spawns
245
+ # on the same project root while allowing parallel runs across different projects.
246
+ # shasum is mac-default; sha1sum on Linux; cksum is POSIX-final fallback.
247
+ ROOT_HASH=$(printf '%s' "$ROOT" | { shasum 2>/dev/null || sha1sum 2>/dev/null || cksum; } | awk '{print substr($1,1,8)}')
248
+ RUNNER_LOCKFILE_PATH="$DESK/logs/.rlp-desk-runner-$ROOT_HASH.lock"
249
+ RUNNER_LOCKDIR="${RUNNER_LOCKFILE_PATH}.d"
250
+ PROMPTS_DIR="$DESK/prompts"
251
+ CONTEXT_DIR="$DESK/context"
252
+ MEMOS_DIR="$DESK/memos"
253
+ LOGS_DIR="$DESK/logs/$SLUG"
254
+ RUNTIME_DIR="$LOGS_DIR/runtime"
255
+ PRD_FILE="$DESK/plans/prd-$SLUG.md"
256
+ TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
257
+ # --- Analytics Directory (v5.7 §4.11.b: project-local) ---
258
+ # Was previously $HOME/.claude/ralph-desk/analytics/<slug>--<hash> (cross-project
259
+ # rollup). With v0.12.0 the canonical location is project-local; cross-project
260
+ # rollup is the Leader's responsibility via ~/.claude/ralph-desk/registry.jsonl
261
+ # (Worker/Verifier prompts never reference the registry path — see §4.11.c).
262
+ ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
263
+ ANALYTICS_DIR="$DESK/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
264
+ CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
265
+ METADATA_FILE="$ANALYTICS_DIR/metadata.json"
266
+ WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
267
+ VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
268
+ CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
269
+ MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
270
+ SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
271
+ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
272
+ VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
273
+ # v0.14.2 Bug Report #4: codex sometimes writes the verdict file to the
274
+ # pre-v0.13.0 legacy path despite the prompt instructing otherwise (CWD
275
+ # heuristics inside the codex CLI). Track the legacy path so the no-progress
276
+ # watcher and the harvest step can both fall back to it before BLOCKing the
277
+ # campaign. Auto-migration logic lives in _migrate_legacy_verdict().
278
+ LEGACY_VERDICT_FILE="$ROOT/.claude/ralph-desk/memos/${SLUG}-verify-verdict.json"
279
+ COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
280
+ BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
281
+ LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
282
+ STATUS_FILE="$RUNTIME_DIR/status.json"
283
+ SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
284
+ WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
285
+ VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
286
+ COST_LOG="$LOGS_DIR/cost-log.jsonl"
287
+
288
+ # --- Session Naming ---
289
+ TIMESTAMP=$(date +%Y%m%d-%H%M%S)
290
+ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
291
+
292
+ # --- State Tracking ---
293
+ typeset -A LAST_PANE_CONTENT
294
+ typeset -A PANE_IDLE_SINCE
295
+ typeset -A WORKER_RESTARTS
296
+ typeset -A US_FAIL_HISTORY
297
+ STALE_CONTEXT_COUNT=0
298
+ HEARTBEAT_STALE_COUNT=0
299
+ MONITOR_FAILURE_COUNT=0
300
+ CONSECUTIVE_FAILURES=0
301
+ PREV_CONTEXT_HASH=""
302
+ PREV_PRD_HASH=""
303
+ PREV_PRD_US_LIST=""
304
+ _PRD_CHANGED=0
305
+ ITERATION=0
306
+ START_TIME=$(date +%s)
307
+ BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
308
+ CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
309
+ SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
310
+ VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
311
+ CONSENSUS_ROUND=0 # current consensus round for current US
312
+ US_LIST="" # comma-separated US IDs from PRD (per-us mode)
313
+ LOCKFILE_ACQUIRED=0
314
+ LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
315
+ _SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
316
+ _LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
317
+ _MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
318
+ _ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
319
+ _ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
320
+
321
+ # =============================================================================
322
+ # Utility Functions
323
+ # =============================================================================
324
+
325
+ DEBUG="${DEBUG:-0}"
326
+ DEBUG_LOG="$ANALYTICS_DIR/debug.log"
327
+
328
+ # Source shared business logic
329
+ LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
330
+ source "$LIB_DIR/lib_ralph_desk.zsh"
331
+
332
+ # A16: Warn if running in foreground (may conflict with Claude Code pane)
333
+ if [[ -z "${RLP_BACKGROUND:-}" ]]; then
334
+ echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
335
+ echo " Recommended: launch via Bash tool with run_in_background: true" >&2
336
+ echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
337
+ fi
338
+
339
+ # check_dead_pane() — determine if pane command indicates a dead/exited process
340
+ # Engine-aware: bash is normal for codex workers (trigger runs in bash),
341
+ # but indicates dead pane for claude workers.
342
+ # Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
343
+ # Returns: 0 if dead, 1 if alive
344
+ check_dead_pane() {
345
+ local poll_cmd="$1"
346
+ local engine="${2:-claude}"
347
+ local role="${3:-worker}"
348
+
349
+ if [[ -z "$poll_cmd" ]]; then
350
+ return 0 # empty = dead
351
+ elif [[ "$poll_cmd" == "zsh" ]]; then
352
+ return 0 # bare zsh = dead
353
+ elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
354
+ return 0 # bash = dead for claude (codex uses bash trigger)
355
+ fi
356
+ return 1 # alive
357
+ }
358
+
359
+ # launch_worker_codex() — launch codex Worker TUI, send instruction, verify submission
360
+ # Matches launch_worker_claude() pattern for consistent tmux-visible execution.
361
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
362
+ # Returns: 0 on success, 1 on fatal failure
363
+ launch_worker_codex() {
364
+ local pane_id="$1"
365
+ local prompt_file="$2"
366
+ local iter="$3"
367
+ local worker_launch="$4"
368
+
369
+ log " Launching Worker codex TUI in pane $pane_id..."
370
+ # Clean pane before launch: kill any lingering process, ensure fresh shell
371
+ local _pre_cmd
372
+ _pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
373
+ if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
374
+ log_debug "Worker pane has lingering process ($_pre_cmd), cleaning..."
375
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
376
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
377
+ fi
378
+ paste_to_pane "$pane_id" "$worker_launch"
379
+ tmux send-keys -t "$pane_id" C-m
380
+
381
+ # Wait for codex TUI prompt (›) instead of shell prompt
382
+ local _codex_ready=0
383
+ local _codex_wait=0
384
+ while (( _codex_wait < 30 )); do
385
+ sleep 1
386
+ local _pane_text
387
+ _pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
388
+ if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
389
+ _codex_ready=1
390
+ log_debug "Worker codex TUI ready after ${_codex_wait}s"
391
+ break
392
+ fi
393
+ (( _codex_wait++ ))
394
+ done
395
+ if (( ! _codex_ready )); then
396
+ log_error "Worker codex TUI not ready after 30s"
397
+ return 1
398
+ fi
399
+
400
+ # Send instruction to codex TUI
401
+ sleep 1
402
+ local worker_instruction="Read and execute the instructions in $prompt_file"
403
+ paste_to_pane "$pane_id" "$worker_instruction"
404
+ tmux send-keys -t "$pane_id" C-m
405
+ log_debug "Worker codex instruction sent (${#worker_instruction} chars)"
406
+
407
+ # Submit loop — verify codex started working
408
+ local submit_attempts=0
409
+ while (( submit_attempts < 15 )); do
410
+ sleep 2
411
+ local pane_check
412
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
413
+ if echo "$pane_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
414
+ log_debug "Worker codex started working after $((submit_attempts + 1)) checks"
415
+ break
416
+ fi
417
+ if (( submit_attempts == 8 )); then
418
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
419
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
420
+ sleep 0.1
421
+ paste_to_pane "$pane_id" "$worker_instruction"
422
+ tmux send-keys -t "$pane_id" C-m
423
+ fi
424
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
425
+ sleep 0.3
426
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
427
+ (( submit_attempts++ ))
428
+ done
429
+ return 0
430
+ }
431
+
432
+ # launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
433
+ # Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
434
+ # restart recovery on submit failure.
435
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
436
+ # Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
437
+ launch_worker_claude() {
438
+ local pane_id="$1"
439
+ local prompt_file="$2"
440
+ local iter="$3"
441
+ local worker_launch="$4"
442
+
443
+ log " Launching Worker claude in pane $pane_id..."
444
+ paste_to_pane "$pane_id" "$worker_launch"
445
+ tmux send-keys -t "$pane_id" C-m
446
+
447
+ # Wait for claude TUI to be ready
448
+ if ! wait_for_pane_ready "$pane_id" 30; then
449
+ log_error "Worker claude failed to start"
450
+ return 1
451
+ fi
452
+
453
+ # Send instruction to claude TUI
454
+ sleep 3
455
+ local worker_instruction="Read and execute the instructions in $prompt_file"
456
+ paste_to_pane "$pane_id" "$worker_instruction"
457
+ tmux send-keys -t "$pane_id" C-m
458
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
459
+
460
+ # 15-iteration submit loop — verify claude started working
461
+ local submit_attempts=0
462
+ while (( submit_attempts < 15 )); do
463
+ sleep 2
464
+ local pane_check
465
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
466
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
467
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
468
+ log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
469
+ break
470
+ fi
471
+ # Every 3 failed attempts, re-send full instruction
472
+ if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
473
+ log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
474
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
475
+ sleep 0.2
476
+ paste_to_pane "$pane_id" "$worker_instruction"
477
+ sleep 0.15
478
+ tmux send-keys -t "$pane_id" C-m
479
+ sleep 1
480
+ fi
481
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
482
+ sleep 0.3
483
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
484
+ (( submit_attempts++ ))
485
+ done
486
+
487
+ # If 15 attempts failed, restart claude and retry
488
+ if (( submit_attempts >= 15 )); then
489
+ log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
490
+ log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
491
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
492
+ sleep 0.5
493
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
494
+ sleep 2
495
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
496
+ paste_to_pane "$pane_id" "$worker_launch"
497
+ tmux send-keys -t "$pane_id" C-m
498
+ if wait_for_pane_ready "$pane_id" 30; then
499
+ sleep 3
500
+ paste_to_pane "$pane_id" "$worker_instruction"
501
+ tmux send-keys -t "$pane_id" C-m
502
+ log " Worker restarted and instruction re-sent"
503
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
504
+ else
505
+ log_error "Worker restart failed — pane not ready"
506
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
507
+ fi
508
+ fi
509
+
510
+ return 0
511
+ }
512
+
513
+ # launch_verifier_codex() — launch codex Verifier TUI, send instruction, verify submission
514
+ # Matches launch_verifier_claude() pattern for consistent tmux-visible execution.
515
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
516
+ # Returns: 0 on success
517
+ launch_verifier_codex() {
518
+ local pane_id="$1"
519
+ local prompt_file="$2"
520
+ local iter="$3"
521
+ local verifier_launch="$4"
522
+
523
+ log " Launching Verifier codex TUI in pane $pane_id..."
524
+ # Clean pane before launch: kill any lingering process, ensure fresh shell
525
+ local _pre_cmd
526
+ _pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
527
+ if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
528
+ log_debug "Verifier pane has lingering process ($_pre_cmd), cleaning..."
529
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
530
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
531
+ fi
532
+ paste_to_pane "$pane_id" "$verifier_launch"
533
+ tmux send-keys -t "$pane_id" C-m
534
+
535
+ # Wait for codex TUI prompt (›) instead of shell prompt
536
+ local _codex_ready=0
537
+ local _codex_wait=0
538
+ while (( _codex_wait < 30 )); do
539
+ sleep 1
540
+ local _pane_text
541
+ _pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
542
+ if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
543
+ _codex_ready=1
544
+ log_debug "Verifier codex TUI ready after ${_codex_wait}s"
545
+ break
546
+ fi
547
+ (( _codex_wait++ ))
548
+ done
549
+ if (( ! _codex_ready )); then
550
+ log_error "Verifier codex TUI not ready after 30s"
551
+ return 1
552
+ fi
553
+
554
+ sleep 1
555
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
556
+ paste_to_pane "$pane_id" "$verifier_instruction"
557
+ tmux send-keys -t "$pane_id" C-m
558
+ log_debug "Verifier codex instruction sent"
559
+
560
+ # Submit loop — verify codex started working
561
+ local submit_attempts=0
562
+ while (( submit_attempts < 15 )); do
563
+ sleep 2
564
+ local vs_check
565
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
566
+ if echo "$vs_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
567
+ log_debug "Verifier codex started working after $((submit_attempts + 1)) checks"
568
+ break
569
+ fi
570
+ if (( submit_attempts == 8 )); then
571
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
572
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
573
+ sleep 0.1
574
+ paste_to_pane "$pane_id" "$verifier_instruction"
575
+ tmux send-keys -t "$pane_id" C-m
576
+ fi
577
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
578
+ sleep 0.3
579
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
580
+ (( submit_attempts++ ))
581
+ done
582
+ return 0
583
+ }
584
+
585
+ # launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
586
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
587
+ # Returns: 0 on success
588
+ launch_verifier_claude() {
589
+ local pane_id="$1"
590
+ local prompt_file="$2"
591
+ local iter="$3"
592
+ local verifier_launch="$4"
593
+
594
+ log " Launching Verifier claude in pane $pane_id..."
595
+ paste_to_pane "$pane_id" "$verifier_launch"
596
+ tmux send-keys -t "$pane_id" C-m
597
+
598
+ if ! wait_for_pane_ready "$pane_id" 30; then
599
+ log_error "Verifier failed to start"
600
+ return 1
601
+ fi
602
+
603
+ sleep 3
604
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
605
+ paste_to_pane "$pane_id" "$verifier_instruction"
606
+ tmux send-keys -t "$pane_id" C-m
607
+ log_debug "Verifier instruction sent directly"
608
+
609
+ # Submit loop — verify verifier started working
610
+ local submit_attempts=0
611
+ while (( submit_attempts < 15 )); do
612
+ sleep 2
613
+ local vs_check
614
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
615
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
616
+ log_debug "Verifier started working after $((submit_attempts + 1)) checks"
617
+ break
618
+ fi
619
+ if (( submit_attempts == 8 )); then
620
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
621
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
622
+ sleep 0.1
623
+ paste_to_pane "$pane_id" "$verifier_instruction"
624
+ tmux send-keys -t "$pane_id" C-m
625
+ fi
626
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
627
+ sleep 0.3
628
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
629
+ (( submit_attempts++ ))
630
+ done
631
+ return 0
632
+ }
633
+
634
+ # handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
635
+ # On exit: check done-claim, auto-generate iter-signal.
636
+ # Args: $1=iteration $2=signal_file
637
+ # Returns: 0 (signal generated), 1 (error)
638
+ handle_worker_exit_codex() {
639
+ local iter="$1"
640
+ local signal_file="$2"
641
+
642
+ log " Codex worker process exited. Checking for done-claim..."
643
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
644
+ local dc_us_id
645
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
646
+ log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
647
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
648
+ _emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim"
649
+ else
650
+ log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
651
+ local current_us
652
+ current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
653
+ local mem_us
654
+ mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
655
+ [[ -n "$mem_us" ]] && current_us="$mem_us"
656
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
657
+ _emit_a4_fallback_audit "$current_us" "$iter" "codex_exit_no_done_claim"
658
+ fi
659
+ return 0
660
+ }
661
+
662
+ # handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
663
+ # Args: $1=pane_id $2=iteration $3=trigger_file
664
+ # Returns: 0 (restarted), 1 (max restarts exceeded)
665
+ handle_worker_exit_claude() {
666
+ local pane_id="$1"
667
+ local iter="$2"
668
+ local trigger_file="$3"
669
+
670
+ log_error "Worker exited without writing signal file"
671
+ if restart_worker "$pane_id" "$iter" "$trigger_file"; then
672
+ return 0
673
+ else
674
+ return 1
675
+ fi
676
+ }
677
+
678
+ # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
679
+ replace_worker_pane() {
680
+ local old_pane="$1"
681
+ local role="$2" # "worker" or "verifier"
682
+
683
+ log " Replacing dead $role pane $old_pane..."
684
+ tmux kill-pane -t "$old_pane" 2>/dev/null
685
+
686
+ # Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
687
+ local new_pane
688
+ if [[ "$role" == "verifier" ]]; then
689
+ # Verifier goes below worker: split vertically from worker pane
690
+ if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
691
+ new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
692
+ else
693
+ # Fallback: worker pane also dead, split horizontally from leader
694
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
695
+ fi
696
+ else
697
+ # Worker goes above verifier: split vertically before verifier pane
698
+ if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
699
+ new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
700
+ else
701
+ # Fallback: verifier pane also dead, split horizontally from leader
702
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
703
+ fi
704
+ fi
705
+
706
+ log " New $role pane: $new_pane (replaced $old_pane)"
707
+ log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
708
+
709
+ # Update session-config.json with new pane ID
710
+ if [[ -f "$SESSION_CONFIG" ]]; then
711
+ jq --arg role "$role" --arg pane "$new_pane" \
712
+ '.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
713
+ log_debug "Updated session-config.json: $role pane → $new_pane"
714
+ fi
715
+
716
+ echo "$new_pane"
717
+ }
718
+
719
+ # =============================================================================
720
+ # Dependency Checks
721
+ # =============================================================================
722
+
723
+ # --- governance.md s7 step 1: Validate prerequisites before starting ---
724
+ check_dependencies() {
725
+ local missing=0
726
+
727
+ if ! command -v tmux >/dev/null 2>&1; then
728
+ log_error "tmux is required but not found. Install with: brew install tmux"
729
+ missing=1
730
+ fi
731
+
732
+ # claude required only when claude engine is used for Worker or Verifier execution;
733
+ # codex-only campaigns can run without claude — generate_sv_report degrades gracefully
734
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
735
+ if ! command -v claude >/dev/null 2>&1; then
736
+ log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
737
+ missing=1
738
+ fi
739
+ fi
740
+
741
+ if ! command -v jq >/dev/null 2>&1; then
742
+ log_error "jq is required but not found. Install with: brew install jq"
743
+ missing=1
744
+ fi
745
+
746
+ # Codex binary required only when engine=codex or consensus verification is enabled
747
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
748
+ if ! command -v codex >/dev/null 2>&1; then
749
+ log_error "codex CLI not found. Install: npm install -g @openai/codex"
750
+ missing=1
751
+ fi
752
+ fi
753
+
754
+ if (( missing )); then
755
+ exit 1
756
+ fi
757
+
758
+ # Resolve full path to claude binary when claude engine is in use
759
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
760
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
761
+ log " Claude binary: $CLAUDE_BIN"
762
+ fi
763
+
764
+ # Resolve codex binary if needed
765
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
766
+ CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
767
+ log " Codex binary: $CODEX_BIN"
768
+ fi
769
+ }
770
+
771
+ # =============================================================================
772
+ # Session Management (tmux pattern: pane IDs)
773
+ # =============================================================================
774
+
775
+ # --- governance.md s7 step 1: Check for existing sessions ---
776
+ check_existing_sessions() {
777
+ local current_session
778
+ current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
779
+ local existing
780
+ existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
781
+ if [[ -n "$existing" ]]; then
782
+ log_error "Existing tmux session(s) found for slug '$SLUG':"
783
+ echo "$existing" | while read -r s; do
784
+ echo " - $s"
785
+ done
786
+ echo ""
787
+ echo "Kill existing session first:"
788
+ echo " tmux kill-session -t <session-name>"
789
+ exit 1
790
+ fi
791
+ }
792
+
793
+ # --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
794
+ create_session() {
795
+ log "Creating tmux session: $SESSION_NAME"
796
+
797
+ # tmux split-pane pattern
798
+ if [[ -n "${TMUX:-}" ]]; then
799
+ # Inside tmux: split CURRENT pane in place
800
+ # Current pane stays as-is (leader/user stays here)
801
+ # Worker/Verifier appear on the RIGHT, user sees them immediately
802
+ LEADER_PANE=$(tmux display-message -p '#{pane_id}')
803
+ SESSION_NAME=$(tmux display-message -p '#{session_name}')
804
+ log " Splitting current pane in session: $SESSION_NAME"
805
+
806
+ # -h off current pane → right column (worker)
807
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
808
+ # -v off worker → stacked below on right (verifier)
809
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
810
+ else
811
+ # Outside tmux: wrap current terminal into a new tmux session and attach
812
+ # tmux pattern: user sees panes immediately, no separate attach needed
813
+ # US-025 R13 P0: verify tmux new-session exit code; if collision + RLP_BACKGROUND,
814
+ # disambiguate with -bg-<epoch>-<pid> suffix and a residual has-session loop.
815
+ if ! tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" 2>/dev/null; then
816
+ if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then
817
+ if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
818
+ SESSION_NAME="${SESSION_NAME}-bg-$(date +%s)-$$"
819
+ while tmux has-session -t "$SESSION_NAME" 2>/dev/null; do
820
+ SESSION_NAME="${SESSION_NAME}-$(awk 'BEGIN{srand();print int(1000+rand()*9000)}')"
821
+ done
822
+ tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" || {
823
+ log_error "tmux new-session retry failed for $SESSION_NAME"
824
+ exit 1
825
+ }
826
+ else
827
+ log_error "tmux new-session failed: session $SESSION_NAME already exists (set RLP_BACKGROUND=1 to auto-rename)"
828
+ exit 1
829
+ fi
830
+ else
831
+ log_error "tmux new-session failed and session does not exist: $SESSION_NAME"
832
+ exit 1
833
+ fi
834
+ fi
835
+ # destroy-unattached off keeps the session alive when no tmux client is attached.
836
+ # Best-effort only: it does NOT survive manual `tmux kill-session` or tmux server restart.
837
+ # If either happens, R12 (lifecycle monitor) detects it and writes infra_failure BLOCKED.
838
+ if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
839
+ tmux set-option -t "$SESSION_NAME" destroy-unattached off 2>/dev/null
840
+ fi
841
+ LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
842
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
843
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
844
+
845
+ fi
846
+
847
+ # Set pane titles and enable border labels for visual distinction
848
+ local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
849
+ local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
850
+ [[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
851
+ tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
852
+ tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
853
+ tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
854
+ # Color-coded pane borders: green=leader, blue=worker, yellow=verifier
855
+ tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
856
+ tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
857
+ tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
858
+ # Show pane titles in border
859
+ tmux set-option pane-border-status top 2>/dev/null
860
+ tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
861
+
862
+ log " Leader pane: $LEADER_PANE"
863
+ log " Worker pane: $WORKER_PANE"
864
+ log " Verifier pane: $VERIFIER_PANE"
865
+
866
+ # US-024 R12 P0: lifecycle check site #1 — verify all panes/session alive after creation.
867
+ _r12_check_lifecycle "create_session"
868
+
869
+ # AC12: Capture baseline commit before writing session config
870
+ BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
871
+
872
+ # Truncate cost-log for fresh run (previous data in versioned campaign reports)
873
+ > "$COST_LOG"
874
+
875
+ # v5.7 §4.2: WITH_SELF_VERIFICATION=1 is hard-rejected at script entry now,
876
+ # so by the time we reach create_session() the flag is guaranteed to be 0.
877
+ # The legacy "NOTE: Agent-mode only; disabling" log line was removed because
878
+ # the deprecation banner at startup is more honest (we exit 2, we don't
879
+ # silently disable).
880
+
881
+ # Write session config (atomic write)
882
+ echo '{
883
+ "session_name": "'"$SESSION_NAME"'",
884
+ "slug": "'"$SLUG"'",
885
+ "created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
886
+ "baseline_commit": "'"$BASELINE_COMMIT"'",
887
+ "panes": {
888
+ "leader": "'"$LEADER_PANE"'",
889
+ "worker": "'"$WORKER_PANE"'",
890
+ "verifier": "'"$VERIFIER_PANE"'"
891
+ },
892
+ "pid": '$$',
893
+ "root": "'"$ROOT"'",
894
+ "models": {
895
+ "worker": "'"$WORKER_MODEL"'",
896
+ "verifier": "'"$VERIFIER_MODEL"'"
897
+ },
898
+ "engines": {
899
+ "worker": "'"$WORKER_ENGINE"'",
900
+ "verifier": "'"$VERIFIER_ENGINE"'",
901
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
902
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
903
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
904
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
905
+ },
906
+ "verification": {
907
+ "verify_mode": "'"$VERIFY_MODE"'",
908
+ "consensus_mode": "'"$CONSENSUS_MODE"'"
909
+ },
910
+ "config": {
911
+ "max_iter": '"$MAX_ITER"',
912
+ "poll_interval": '"$POLL_INTERVAL"',
913
+ "iter_timeout": '"$ITER_TIMEOUT"',
914
+ "heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
915
+ "max_restarts": '"$MAX_RESTARTS"',
916
+ "idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
917
+ "max_nudges": '"$MAX_NUDGES"',
918
+ "cb_threshold": '"$CB_THRESHOLD"',
919
+ "effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
920
+ "with_self_verification": '"$WITH_SELF_VERIFICATION"',
921
+ "with_self_verification_requested": '"$WITH_SELF_VERIFICATION_REQUESTED"',
922
+ "sv_skipped_reason": "'"$SV_SKIPPED_REASON"'",
923
+ "lane_mode": "'"$LANE_MODE"'",
924
+ "autonomous_mode": '"$AUTONOMOUS_MODE"'
925
+ }
926
+ }' | atomic_write "$SESSION_CONFIG"
927
+
928
+ log " Session config: $SESSION_CONFIG"
929
+ }
930
+
931
+ # =============================================================================
932
+ # Copy-Mode Guard (tmux pattern)
933
+ # =============================================================================
934
+
935
+ # --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
936
+ check_copy_mode() {
937
+ local pane_id="$1"
938
+ local in_mode
939
+ in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
940
+ if [[ "$in_mode" -eq 1 ]]; then
941
+ return 1 # pane is in copy mode, cannot send keys
942
+ fi
943
+ return 0
944
+ }
945
+
946
+ # =============================================================================
947
+ # Verification-Based Send Retry (tmux pattern)
948
+ # =============================================================================
949
+
950
+ # --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
951
+ paste_to_pane() {
952
+ local pane_id="$1"
953
+ local text="$2"
954
+ local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
955
+ echo -n "$text" > "$tmpbuf"
956
+ tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
957
+ tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
958
+ rm -f "$tmpbuf"
959
+ }
960
+
961
+ # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
962
+ safe_send_keys() {
963
+ local pane_id="$1"
964
+ local text="$2"
965
+
966
+ # --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
967
+
968
+ # Guard: copy-mode captures keys; skip entirely
969
+ if ! check_copy_mode "$pane_id"; then
970
+ log_debug " Pane $pane_id in copy mode, skipping send"
971
+ return 1
972
+ fi
973
+
974
+ # Check for trust prompt and auto-dismiss
975
+ local initial_capture
976
+ initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
977
+ local pane_busy=0
978
+ if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
979
+ pane_busy=1
980
+ fi
981
+ if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
982
+ log_debug " Trust prompt detected, dismissing"
983
+ tmux send-keys -t "$pane_id" C-m
984
+ sleep 0.12
985
+ fi
986
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
987
+ if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
988
+ log_debug " Permission prompt detected, auto-approving"
989
+ tmux send-keys -t "$pane_id" C-m
990
+ sleep 0.3
991
+ fi
992
+ # Auto-dismiss codex update prompt (select Skip)
993
+ if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
994
+ log_debug " Codex update prompt detected, selecting Skip"
995
+ tmux send-keys -t "$pane_id" "2" C-m
996
+ sleep 0.2
997
+ fi
998
+ # Send text via buffer paste (reliable for long strings)
999
+ log_debug " Pasting text to pane $pane_id (${#text} chars)"
1000
+ paste_to_pane "$pane_id" "$text"
1001
+
1002
+ # Allow input buffer to settle (tmux: 150ms)
1003
+ sleep 0.15
1004
+
1005
+ # Submit: up to 6 rounds of C-m double-press
1006
+ local round=0
1007
+ while (( round < 6 )); do
1008
+ sleep 0.1
1009
+ if (( round == 0 && pane_busy )); then
1010
+ # Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
1011
+ tmux send-keys -t "$pane_id" C-m
1012
+ else
1013
+ tmux send-keys -t "$pane_id" C-m
1014
+ sleep 0.2
1015
+ tmux send-keys -t "$pane_id" C-m
1016
+ fi
1017
+ sleep 0.14
1018
+
1019
+ # Check if text was consumed
1020
+ local check_capture
1021
+ check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
1022
+ if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
1023
+ log_debug " Text consumed after round $((round + 1))"
1024
+ return 0
1025
+ fi
1026
+ sleep 0.14
1027
+ (( round++ ))
1028
+ done
1029
+
1030
+ # Safety gate: copy-mode check
1031
+ if ! check_copy_mode "$pane_id"; then
1032
+ log_debug " Copy mode activated during send, aborting"
1033
+ return 1
1034
+ fi
1035
+
1036
+ # Adaptive fallback: C-u clear line, resend (tmux pattern)
1037
+ log_debug " Adaptive retry — clearing line and resending"
1038
+ tmux send-keys -t "$pane_id" C-u
1039
+ sleep 0.08
1040
+ if ! check_copy_mode "$pane_id"; then
1041
+ return 1
1042
+ fi
1043
+ paste_to_pane "$pane_id" "$text"
1044
+ sleep 0.12
1045
+ local retry_round=0
1046
+ while (( retry_round < 4 )); do
1047
+ tmux send-keys -t "$pane_id" C-m
1048
+ sleep 0.18
1049
+ tmux send-keys -t "$pane_id" C-m
1050
+ sleep 0.14
1051
+ local retry_capture
1052
+ retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
1053
+ if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
1054
+ log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
1055
+ return 0
1056
+ fi
1057
+ (( retry_round++ ))
1058
+ done
1059
+
1060
+ # Fail-open: one last nudge
1061
+ if ! check_copy_mode "$pane_id"; then
1062
+ return 1
1063
+ fi
1064
+ tmux send-keys -t "$pane_id" C-m
1065
+ sleep 0.12
1066
+ tmux send-keys -t "$pane_id" C-m
1067
+ log_debug " Fail-open — text may or may not have been submitted"
1068
+ return 0
1069
+ }
1070
+
1071
+ # =============================================================================
1072
+ # Wait for Pane Ready (tmux pattern: paneLooksReady)
1073
+ # =============================================================================
1074
+
1075
+ wait_for_pane_ready() {
1076
+ local pane_id="$1"
1077
+ local timeout="${2:-10}" # tmux default: 10s
1078
+ local start=$(date +%s)
1079
+ log " Waiting for pane $pane_id ready..."
1080
+ while (( $(date +%s) - start < timeout )); do
1081
+ local captured
1082
+ captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
1083
+
1084
+ # Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
1085
+ if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
1086
+ log " Trust prompt detected, auto-dismissing..."
1087
+ tmux send-keys -t "$pane_id" C-m
1088
+ sleep 0.12
1089
+ tmux send-keys -t "$pane_id" C-m
1090
+ sleep 2
1091
+ continue
1092
+ fi
1093
+
1094
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
1095
+ if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
1096
+ log " Permission prompt detected, auto-approving..."
1097
+ tmux send-keys -t "$pane_id" C-m
1098
+ sleep 0.5
1099
+ continue
1100
+ fi
1101
+
1102
+ # Auto-dismiss codex update prompt (select Skip = option 2)
1103
+ if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
1104
+ log " Codex update prompt detected, selecting Skip..."
1105
+ tmux send-keys -t "$pane_id" "2" C-m
1106
+ sleep 0.5
1107
+ continue
1108
+ fi
1109
+
1110
+ # tmux paneLooksReady: check each line for prompt char at line start
1111
+ local ready=0
1112
+ echo "$captured" | while IFS= read -r line; do
1113
+ local trimmed="${line## }"
1114
+ if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
1115
+ ready=1
1116
+ break
1117
+ fi
1118
+ done 2>/dev/null
1119
+
1120
+ # Also check via grep as fallback
1121
+ if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
1122
+ ready=1
1123
+ fi
1124
+
1125
+ if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
1126
+ # Check no active task running
1127
+ if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
1128
+ log " Pane $pane_id is ready."
1129
+ return 0
1130
+ fi
1131
+ fi
1132
+ sleep 0.25
1133
+ done
1134
+ # Timeout — return success anyway (fail-open, let safe_send_keys handle it)
1135
+ log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
1136
+ return 0
1137
+ }
1138
+
1139
+ # =============================================================================
1140
+ # Heartbeat Monitoring (tmux pattern)
1141
+ # =============================================================================
1142
+
1143
+ # --- governance.md s7 step 5+6: Check heartbeat freshness ---
1144
+ check_heartbeat() {
1145
+ local hb_file="$1"
1146
+ local threshold="$HEARTBEAT_STALE_THRESHOLD"
1147
+
1148
+ if [[ ! -f "$hb_file" ]]; then
1149
+ return 1
1150
+ fi
1151
+
1152
+ local hb_epoch now_epoch
1153
+ # Read epoch seconds directly (avoids timezone parsing bugs)
1154
+ hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
1155
+
1156
+ if [[ -z "$hb_epoch" ]]; then
1157
+ return 1
1158
+ fi
1159
+
1160
+ now_epoch=$(date +%s)
1161
+ (( now_epoch - hb_epoch < threshold ))
1162
+ }
1163
+
1164
+ # Check if heartbeat indicates process has exited
1165
+ check_heartbeat_exited() {
1166
+ local hb_file="$1"
1167
+ if [[ ! -f "$hb_file" ]]; then
1168
+ return 1
1169
+ fi
1170
+ local hb_status
1171
+ hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
1172
+ [[ "$hb_status" == "exited" ]]
1173
+ }
1174
+
1175
+ # =============================================================================
1176
+ # Idle Pane Nudging (tmux pattern)
1177
+ # =============================================================================
1178
+
1179
+ # --- v5.7 §4.13.a: Mid-execution permission-prompt auto-dismiss (Bug 4 fix) ---
1180
+ # claude CLI v2.1.114+ surfaces TUI-layer prompts ("Do you want to create...")
1181
+ # even with --dangerously-skip-permissions on certain Write paths. Without this
1182
+ # helper, Workers/Verifiers hang until IDLE_NUDGE_THRESHOLD timeout.
1183
+ #
1184
+ # Window-bounded match (codex Critic v5.7): require both a prompt phrase AND a
1185
+ # TUI affordance marker on the SAME, PREVIOUS, or NEXT line. Whole-capture dual
1186
+ # grep would let unrelated text trigger Enter (R-V5-9 false-positive).
1187
+ # Per-pane 3-second debounce prevents rapid double-Enter.
1188
+ zmodload zsh/datetime 2>/dev/null || true
1189
+ _now_s() { print -- "${EPOCHSECONDS:-$(date +%s)}"; }
1190
+
1191
+ typeset -gA LAST_AUTO_APPROVE_TS
1192
+ # v5.7 §4.16: track when each pane FIRST entered a prompt-stuck state.
1193
+ # Cleared on first capture without prompt visible. Used for bounded
1194
+ # prompt-stall escalation (BLOCKED `prompt_stall`) so alive-but-stuck
1195
+ # Workers can't infinite-wait (codex Critic HIGH finding).
1196
+ typeset -gA PANE_PROMPT_STUCK_SINCE
1197
+ typeset -gA PANE_DISMISS_FAILED_COUNT
1198
+ PROMPT_STALL_TIMEOUT="${PROMPT_STALL_TIMEOUT:-300}" # 5 min default
1199
+ PROMPT_DISMISS_FAIL_LIMIT="${PROMPT_DISMISS_FAIL_LIMIT:-20}" # ~100s of fruitless dismiss attempts
1200
+
1201
+ # v5.7 §4.17: generic no-progress timeout (codex Critic HIGH — closes the gap
1202
+ # where an undetected prompt or alive-but-frozen Worker bypasses Layer 4).
1203
+ # Independent of prompt detection: if pane content stops changing for this many
1204
+ # seconds AND signal file still missing, write BLOCKED `infra_failure` reason
1205
+ # `worker_no_progress` so silent infinite-wait is impossible.
1206
+ PROGRESS_NO_CHANGE_TIMEOUT="${PROGRESS_NO_CHANGE_TIMEOUT:-600}" # 10 min default
1207
+ typeset -gA PANE_LAST_CHANGE_TS # epoch when content last changed
1208
+ typeset -gA PANE_LAST_CONTENT_FOR_PROGRESS # captured content for diff
1209
+
1210
+ # v0.14.1: codex post-work idle UI grace. When a verifier pane shows codex's
1211
+ # "Worked for Xm Ys" idle line at byte-stasis time, grant one extra
1212
+ # CODEX_IDLE_GRACE_S (default 120s) before BLOCK. Per-pane bookkeeping to
1213
+ # avoid granting it repeatedly. Bug Report #3 (BOS 2026-05-04).
1214
+ CODEX_IDLE_GRACE_S="${CODEX_IDLE_GRACE_S:-120}"
1215
+ typeset -gA PANE_CODEX_IDLE_GRACED
1216
+ # v0.14.2: per-verifier-pane trace flag — log the verdict-lookup outcome
1217
+ # exactly once per byte-stasis transition. Bug Report #4 (BOS 2026-05-05).
1218
+ typeset -gA PANE_VERIFIER_TRACE_LOGGED
1219
+
1220
+ # v5.7 §4.17: default-No prompt detection. Pressing Enter on these means
1221
+ # CANCEL/REJECT, not approve — so we BLOCK with traceability instead of
1222
+ # silently auto-dismissing the wrong way.
1223
+ typeset -g _DEFAULT_NO_RE='\[y/N\]|\(yes/no, default no\)|default[: ]+no|^[[:space:]]*N\)'
1224
+
1225
+ # v5.7 §4.16: broadened prompt detection (codex Critic MEDIUM).
1226
+ # v5.7 §4.20 (E2E real-claude-CLI finding): claude v2.1.114+ uses new trust
1227
+ # prompt format ("Quick safety check: Is this a project you ... trust?")
1228
+ # and a numbered picker with `❯` cursor adjacent to the digit ("❯1.Yes").
1229
+ # Old patterns ("Do you trust") missed it entirely → Worker hung 5min until
1230
+ # iter-timeout. Adds: Quick safety check|trust this (folder|directory) for
1231
+ # PROMPT_RE; ❯\s*\d+\. (zero-or-more space) and `Enter to confirm` / `1\.
1232
+ # (Yes|No)` for AFFORDANCE_RE.
1233
+ typeset -g _PROMPT_RE='Do you (want to|trust)|Confirm execution|Are you sure|Continue\?|Proceed\?|Allow this|Approve this|Press y to|Choose an option|Select \[|Quick safety check|trust this (folder|directory)|Is this a project you'
1234
+ typeset -g _AFFORDANCE_RE='\(y/n\)|\[Y/n\]|\[y/N\]|\(yes/no|❯[[:space:]]*[0-9]+\.|(^|[[:space:]])1\) (Yes|No)|(^|[[:space:]])[YyNn]\)|press (y|enter) to|Enter to confirm'
1235
+
1236
+ # v5.7 §4.18 (E2E real-tmux + omc benchmarking): "active task" markers used
1237
+ # to distinguish a Worker that is busy producing output (and may legitimately
1238
+ # print "(y/n)" inside its body text) from a Worker that is *idle at an
1239
+ # unrecognized prompt*. Mirrors omc-team's `paneHasActiveTask` heuristic
1240
+ # (src/team/tmux-session.ts:659). When ANY of these markers is in the recent
1241
+ # pane tail, the Worker is alive — auto_dismiss must NOT fast-fail on a
1242
+ # suspected-unknown prompt because the affordance text is just transcript.
1243
+ typeset -g _ACTIVE_TASK_RE='esc to interrupt|background terminal running|^[[:space:]]*[·✻][[:space:]]+[A-Za-z]+(\.{3}|…)'
1244
+
1245
+ auto_dismiss_prompts() {
1246
+ local pane_id="$1"
1247
+ local now
1248
+ now=$(_now_s)
1249
+ local last=${LAST_AUTO_APPROVE_TS[$pane_id]:-0}
1250
+
1251
+ local capture
1252
+ # v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt wraps
1253
+ # to ~30 lines on narrow panes. -S -10 missed the question header. -50
1254
+ # covers the full prompt.
1255
+ capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null) || return 0
1256
+
1257
+ # v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt is
1258
+ # multi-line and wraps narrowly, so per-line PROMPT_RE+AFFORDANCE adjacency
1259
+ # misses it. Special-case the signature ("Quick safety check ... Enter to
1260
+ # confirm" with `❯N.Yes` cursor on option 1). This is default-Yes — Enter
1261
+ # approves trust.
1262
+ # §4.21.b: tmux narrow-pane wrap breaks the question phrase across lines
1263
+ # (`Quick safety\n check`). Normalize all whitespace to single spaces so
1264
+ # substring matching works regardless of pane width.
1265
+ local _norm_capture="${capture//[$'\n\r\t']/ }"
1266
+ while [[ "$_norm_capture" == *" "* ]]; do _norm_capture="${_norm_capture// / }"; done
1267
+ if { [[ "$_norm_capture" == *"Quick safety check"* ]] || [[ "$_norm_capture" == *"trust this folder"* ]] || [[ "$_norm_capture" == *"trust this directory"* ]]; } \
1268
+ && [[ "$_norm_capture" == *"Enter to confirm"* ]] \
1269
+ && [[ "$_norm_capture" =~ '❯ ?[0-9]+\. ?Yes' ]]; then
1270
+ if (( now - last >= 3 )); then
1271
+ log " Claude v2.x trust prompt detected in pane $pane_id, auto-approving (Enter)"
1272
+ log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
1273
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1274
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1275
+ fi
1276
+ return 0
1277
+ fi
1278
+ # Older claude trust prompt format (omc-team parity).
1279
+ if [[ "$_norm_capture" == *"Do you trust the contents of this directory"* ]] \
1280
+ && { [[ "$_norm_capture" =~ 'Yes,[[:space:]]*continue' ]] || [[ "$_norm_capture" == *"Press enter to continue"* ]]; }; then
1281
+ if (( now - last >= 3 )); then
1282
+ log " Claude (legacy) trust prompt detected in pane $pane_id, auto-approving (Enter)"
1283
+ log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
1284
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1285
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1286
+ fi
1287
+ return 0
1288
+ fi
1289
+
1290
+ local -a lines
1291
+ lines=("${(@f)capture}")
1292
+ local i n=${#lines[@]} prompt_visible=0
1293
+ # v5.7 §4.23 (E2E real-claude-CLI finding): tmux narrow-pane wrap breaks
1294
+ # multi-line prompts (e.g. "Do you want to\nmake this edit to\nfile.md?\n
1295
+ # ❯ 1. Yes") so PROMPT+AFFORDANCE±1 line-adjacency misses them. Fix: run
1296
+ # the match against the LAST 15 normalized lines (whitespace collapsed)
1297
+ # — where the active prompt sits — as a single string. PROMPT_RE +
1298
+ # AFFORDANCE_RE both present → auto-Enter unless DEFAULT_NO_RE present
1299
+ # (BLOCK). §4.17.b is preserved: full-capture default-No scan protects
1300
+ # against scrollback contamination.
1301
+ local _tail_start=$((n > 15 ? n - 14 : 1))
1302
+ local _tail_normalized=""
1303
+ for ((i=_tail_start; i <= n; i++)); do
1304
+ _tail_normalized+="${lines[i]} "
1305
+ done
1306
+ while [[ "$_tail_normalized" == *" "* ]]; do _tail_normalized="${_tail_normalized// / }"; done
1307
+ local default_no_seen=0
1308
+ local sample_pattern="${_tail_normalized:0:120}"
1309
+ if [[ "$_tail_normalized" =~ $_PROMPT_RE ]] && [[ "$_tail_normalized" =~ $_AFFORDANCE_RE ]]; then
1310
+ prompt_visible=1
1311
+ fi
1312
+ # Default-No scan: full capture, not just tail (scrollback contamination guard).
1313
+ if [[ "$capture" =~ $_DEFAULT_NO_RE ]]; then
1314
+ default_no_seen=1
1315
+ fi
1316
+
1317
+ if (( default_no_seen )); then
1318
+ # v5.7 §4.17 + §4.17.b: default-No prompts ([y/N], "default: no") cannot
1319
+ # be auto-Enter'd safely — pressing Enter would CANCEL the operation.
1320
+ # If the pane has ANY default-No prompt visible (even alongside older
1321
+ # default-Yes prompts in scrollback), BLOCK with traceability.
1322
+ log_error "Default-No prompt detected in pane $pane_id — cannot safely auto-dismiss"
1323
+ log_debug "[GOV] default_no_prompt_detected=true pane=$pane_id action=block"
1324
+ write_blocked_sentinel \
1325
+ "Pane shows a default-No / explicit-No-default permission prompt. Auto-Enter would CANCEL the operation rather than approve it. Operator must manually respond with 'y' or extend prompt-handling logic. Pattern: $sample_pattern" \
1326
+ "${CURRENT_US:-ALL}" \
1327
+ "infra_failure"
1328
+ return 0
1329
+ fi
1330
+
1331
+ if (( prompt_visible )); then
1332
+ # All visible prompts are default-Yes-equivalent — safe to auto-Enter.
1333
+ if [[ -z "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
1334
+ PANE_PROMPT_STUCK_SINCE[$pane_id]=$now
1335
+ fi
1336
+ if (( now - last >= 3 )); then
1337
+ log " Permission prompt detected in pane $pane_id, auto-approving (Enter)"
1338
+ log_debug "[FLOW] permission_prompt_auto_approved=true pane=$pane_id"
1339
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1340
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1341
+ PANE_DISMISS_FAILED_COUNT[$pane_id]=$((${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0} + 1))
1342
+ fi
1343
+ return 0
1344
+ fi
1345
+
1346
+ # v5.7 §4.18: unknown-prompt fast-fail (E2E + omc benchmarking finding).
1347
+ # If pane has an affordance marker (y/n bracket etc.) but NO recognized
1348
+ # PROMPT_RE phrasing, the Worker is likely awaiting an unknown variant of
1349
+ # a yes/no prompt. omc-team's principle (tmux-session.ts:639): never
1350
+ # auto-Enter on unknown prompts — pressing Enter could approve OR cancel
1351
+ # depending on default. BLOCK immediately so the operator can extend the
1352
+ # PROMPT_RE catalog, instead of waiting 10 min for the freeze timeout.
1353
+ #
1354
+ # False-positive guard: skip if any "active task" marker is present
1355
+ # (esc to interrupt / background terminal / spinner) — that means the
1356
+ # Worker is producing output and the affordance text is just transcript.
1357
+ local active=0
1358
+ local affordance_seen=0
1359
+ local sample=""
1360
+ for ((i=1; i <= n; i++)); do
1361
+ if [[ "${lines[i]}" =~ $_ACTIVE_TASK_RE ]]; then
1362
+ active=1
1363
+ break
1364
+ fi
1365
+ done
1366
+ if (( ! active )); then
1367
+ # Only check the last 5 non-empty lines (where an idle prompt would sit).
1368
+ local -a tail_lines
1369
+ tail_lines=()
1370
+ local k
1371
+ for ((k=n; k >= 1 && ${#tail_lines[@]} < 5; k--)); do
1372
+ [[ -z "${lines[k]}" ]] && continue
1373
+ tail_lines=("${lines[k]}" "${tail_lines[@]}")
1374
+ done
1375
+ for line in "${tail_lines[@]}"; do
1376
+ if [[ "$line" =~ $_AFFORDANCE_RE ]]; then
1377
+ affordance_seen=1
1378
+ sample="${line:0:120}"
1379
+ break
1380
+ fi
1381
+ done
1382
+ fi
1383
+ if (( affordance_seen )); then
1384
+ # Re-check default-No (could be the active prompt's bracket — must BLOCK).
1385
+ local default_no_in_tail=0
1386
+ for line in "${tail_lines[@]}"; do
1387
+ if [[ "$line" =~ $_DEFAULT_NO_RE ]]; then
1388
+ default_no_in_tail=1
1389
+ break
1390
+ fi
1391
+ done
1392
+ local reason
1393
+ if (( default_no_in_tail )); then
1394
+ reason="Pane shows a default-No affordance ([y/N], 'default: no') but the surrounding prompt phrasing is not in PROMPT_RE. Auto-Enter would CANCEL. Operator must respond manually or extend PROMPT_RE. Sample: $sample"
1395
+ else
1396
+ reason="Pane shows a y/n affordance marker without a recognized prompt phrasing — likely an unknown CLI prompt variant. Refusing to guess auto-Enter (which could be the wrong default). Operator must respond manually or extend PROMPT_RE. Sample: $sample"
1397
+ fi
1398
+ log_error "Unknown-prompt affordance detected in pane $pane_id — fast-fail BLOCK"
1399
+ log_debug "[GOV] unknown_prompt_detected=true pane=$pane_id action=block default_no=$default_no_in_tail"
1400
+ write_blocked_sentinel "$reason" "${CURRENT_US:-ALL}" "infra_failure"
1401
+ return 0
1402
+ fi
1403
+ # No prompt visible — clear stall tracking so re-entry is fresh.
1404
+ if [[ -n "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
1405
+ log_debug "[FLOW] prompt_cleared=true pane=$pane_id"
1406
+ # zsh: unset assoc-array member via reset to empty + delete key.
1407
+ PANE_PROMPT_STUCK_SINCE[$pane_id]=""
1408
+ PANE_DISMISS_FAILED_COUNT[$pane_id]=""
1409
+ unset "PANE_PROMPT_STUCK_SINCE[$pane_id]"
1410
+ unset "PANE_DISMISS_FAILED_COUNT[$pane_id]"
1411
+ fi
1412
+ }
1413
+
1414
+ # v5.7 §4.16: bounded prompt-stall escalation (codex Critic HIGH finding).
1415
+ # Closes the "alive process → extend indefinitely" gap: if a pane stays in
1416
+ # prompt-visible state for PROMPT_STALL_TIMEOUT (default 5min) OR
1417
+ # auto_dismiss has tried PROMPT_DISMISS_FAIL_LIMIT times without progress,
1418
+ # write BLOCKED `prompt_stall` so the campaign exits with traceability
1419
+ # instead of infinite-waiting.
1420
+ #
1421
+ # Returns 0 if pane is fine; returns 1 (and writes BLOCKED sentinel) if
1422
+ # stall threshold exceeded — caller should propagate the failure.
1423
+ check_prompt_stall() {
1424
+ local pane_id="$1"
1425
+ local us_id="${2:-${CURRENT_US:-ALL}}"
1426
+ local stuck_since=${PANE_PROMPT_STUCK_SINCE[$pane_id]:-0}
1427
+ (( stuck_since == 0 )) && return 0
1428
+ local now
1429
+ now=$(_now_s)
1430
+ local stuck_for=$(( now - stuck_since ))
1431
+ local fail_count=${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0}
1432
+
1433
+ if (( stuck_for >= PROMPT_STALL_TIMEOUT )) || (( fail_count >= PROMPT_DISMISS_FAIL_LIMIT )); then
1434
+ log_error "Pane $pane_id stuck on prompt for ${stuck_for}s ($fail_count dismiss attempts) — escalating to BLOCKED"
1435
+ log_debug "[GOV] iter=${ITERATION:-0} prompt_stall_escalated=true pane=$pane_id stuck_for=${stuck_for}s dismiss_attempts=$fail_count threshold=${PROMPT_STALL_TIMEOUT}s"
1436
+ write_blocked_sentinel \
1437
+ "Pane stuck on TUI prompt for ${stuck_for}s after ${fail_count} dismiss attempts. Auto-dismiss patterns may need to be widened (see ~/.claude/ralph-desk/known-prompts.txt convention) or the underlying claude CLI prompt is genuinely unsupported. No documentation produced for this iteration." \
1438
+ "$us_id" \
1439
+ "infra_failure"
1440
+ return 1
1441
+ fi
1442
+ return 0
1443
+ }
1444
+
1445
+ # v0.14.1 / v0.14.2: codex post-work idle UI detector. The codex CLI shows
1446
+ # a status line like "─ Worked for 5m 36s ──" + a "› " prompt + "Context
1447
+ # X% left" / model + suggestion ("Improve documentation in @filename")
1448
+ # after it finishes the verifier task and is waiting for the next user
1449
+ # input. This is NOT a permission prompt — it is a successful idle state.
1450
+ # The byte-stasis check below mistook this for "frozen" and BLOCKED a
1451
+ # verifier whose verdict file was already on disk. v0.14.2 Bug Report #4
1452
+ # observed the v0.14.1 patterns being too narrow (BOS 12th launch had
1453
+ # extra horizontal-rule wrapping that broke the strict dash-bracket regex)
1454
+ # — relaxed below to multiple independent markers; ANY one fires idle.
1455
+ is_codex_idle_ui() {
1456
+ local pane_text="$1"
1457
+ # 1. "Worked for Xm Ys" — most reliable codex idle marker.
1458
+ print -- "$pane_text" | grep -qE 'Worked for [0-9]+m [0-9]+s' && return 0
1459
+ # 2. "Context X% left" status bar — appears whenever codex is alive +
1460
+ # waiting at the prompt; captures the case where horizontal rules
1461
+ # above were stripped by tmux capture truncation.
1462
+ print -- "$pane_text" | grep -qE 'Context [0-9]+%[[:space:]]*left' && return 0
1463
+ # 3. codex model + branch line (e.g. "gpt-5.5 high · feature/...") —
1464
+ # only printed alongside the idle prompt, never during work.
1465
+ print -- "$pane_text" | grep -qE 'gpt-[0-9]+(\.[0-9]+)? (low|medium|high|xhigh) ·' && return 0
1466
+ # 4. codex default-suggestion prompt prefix at line start. v0.14.1 had
1467
+ # only "›" but BOS Bug #4 showed the leading character can be wrapped
1468
+ # by tmux narrowness — also accept the suggestion phrases verbatim.
1469
+ print -- "$pane_text" | grep -qE 'Improve documentation in @|Summarize recent commits|Explain (this )?code' && return 0
1470
+ return 1
1471
+ }
1472
+
1473
+ # v0.14.2 Bug Report #4 H1: codex sometimes lands the verdict at the
1474
+ # pre-v0.13.0 legacy path (`<root>/.claude/ralph-desk/memos/...`) instead
1475
+ # of `.rlp-desk/memos/`, even when the prompt instructs otherwise. When
1476
+ # we observe the legacy file with valid JSON, atomically rename it into
1477
+ # place so the rest of the pipeline (harvest + analytics + sentinels)
1478
+ # sees a single canonical path. Best-effort: any failure leaves the file
1479
+ # untouched and the campaign keeps polling.
1480
+ _migrate_legacy_verdict() {
1481
+ [[ -n "${LEGACY_VERDICT_FILE:-}" && -f "$LEGACY_VERDICT_FILE" ]] || return 1
1482
+ jq -e . "$LEGACY_VERDICT_FILE" >/dev/null 2>&1 || return 1
1483
+ log "Verdict file found at legacy path ${LEGACY_VERDICT_FILE} — moving to ${VERDICT_FILE}"
1484
+ log_debug "[GOV] iter=${ITERATION:-0} legacy_verdict_migrated=true from=${LEGACY_VERDICT_FILE} to=${VERDICT_FILE}"
1485
+ mkdir -p "$(dirname "$VERDICT_FILE")" 2>/dev/null
1486
+ mv -f "$LEGACY_VERDICT_FILE" "$VERDICT_FILE" 2>/dev/null && return 0
1487
+ return 1
1488
+ }
1489
+
1490
+ # v0.14.1 / v0.14.2: verdict-aware short-circuit. When the pane being
1491
+ # polled is the verifier pane AND a valid verdict file already exists on
1492
+ # disk (canonical path OR legacy path that we then auto-migrate), the
1493
+ # verifier has finished its work — the harvest step (run_single_verifier
1494
+ # / consensus loop) is the one that should observe the verdict, not the
1495
+ # generic no-progress watcher. Returning 0 here lets the outer loop keep
1496
+ # polling instead of escalating BLOCKED. Bug Reports #3 (BOS 2026-05-04)
1497
+ # + #4 (BOS 2026-05-05).
1498
+ _verifier_pane_has_verdict() {
1499
+ local pane_id="$1"
1500
+ [[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]] || return 1
1501
+ # Canonical path first.
1502
+ if [[ -n "${VERDICT_FILE:-}" && -f "$VERDICT_FILE" ]]; then
1503
+ jq -e . "$VERDICT_FILE" >/dev/null 2>&1 && return 0
1504
+ fi
1505
+ # v0.14.2 Fix-D: codex may have written to the legacy path. Try to
1506
+ # migrate; success means the canonical file is now in place.
1507
+ _migrate_legacy_verdict && return 0
1508
+ return 1
1509
+ }
1510
+
1511
+ # v0.14.5 Bug Report #6 Fix-M (worker mirror of Fix-A/Fix-D):
1512
+ # Worker (claude sonnet 1m) writes commit + iter-signal.json verify signal
1513
+ # then claude CLI parks at its idle prompt. check_no_progress observes
1514
+ # byte-stasis on the worker pane and would BLOCK after 600s even though
1515
+ # the signal is on disk. When the pane is the worker pane AND a valid
1516
+ # iter-signal is on disk, defer to the harvest step (poll_for_signal in
1517
+ # run_single_worker) instead of escalating BLOCKED.
1518
+ _worker_pane_has_signal() {
1519
+ local pane_id="$1"
1520
+ [[ -n "${WORKER_PANE:-}" && "$pane_id" == "${WORKER_PANE}" ]] || return 1
1521
+ [[ -n "${SIGNAL_FILE:-}" && -s "$SIGNAL_FILE" ]] || return 1
1522
+ jq -e . "$SIGNAL_FILE" >/dev/null 2>&1 || return 1
1523
+ local iter_field us_field status_field
1524
+ iter_field=$(jq -r '.iteration // empty' "$SIGNAL_FILE" 2>/dev/null)
1525
+ us_field=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1526
+ status_field=$(jq -r '.status // empty' "$SIGNAL_FILE" 2>/dev/null)
1527
+ [[ "$iter_field" =~ ^[0-9]+$ ]] || return 1
1528
+ [[ -n "$us_field" ]] || return 1
1529
+ [[ "$status_field" == "verify" || "$status_field" == "verify_partial" ]] || return 1
1530
+ return 0
1531
+ }
1532
+
1533
+ # v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout — independent
1534
+ # of prompt detection. Closes the gap where an undetected prompt or alive-
1535
+ # but-frozen Worker can bypass Layer 4 and infinite-wait.
1536
+ #
1537
+ # Strategy: capture pane content each call, hash/compare to last; if
1538
+ # unchanged for PROGRESS_NO_CHANGE_TIMEOUT (default 10min), write BLOCKED.
1539
+ # Returns 0 if pane is making progress (or first call); 1 (and writes
1540
+ # BLOCKED) if no-progress threshold exceeded.
1541
+ check_no_progress() {
1542
+ local pane_id="$1"
1543
+ local us_id="${2:-${CURRENT_US:-ALL}}"
1544
+ local now
1545
+ now=$(_now_s)
1546
+ local capture
1547
+ capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null) || return 0
1548
+
1549
+ # v0.14.1 Fix-A / v0.14.2 Fix-D: codex verifier writes verdict, then
1550
+ # sits at "Worked for Xm Ys" idle UI. byte-stasis would BLOCK after
1551
+ # 600s even though the verdict is on disk. Check both canonical and
1552
+ # legacy verdict paths — auto-migrate legacy if found — and defer to
1553
+ # the harvest step when the pane is a verifier pane.
1554
+ if _verifier_pane_has_verdict "$pane_id"; then
1555
+ PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
1556
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1557
+ return 0
1558
+ fi
1559
+ # v0.14.5 Bug Report #6 Fix-M: claude worker finishes (commit + iter-signal
1560
+ # write) then parks at its idle prompt. byte-stasis would BLOCK after 600s
1561
+ # even though the signal is on disk. Worker mirror of the verifier branch
1562
+ # above — defer to poll_for_signal harvest when SIGNAL_FILE is valid.
1563
+ if _worker_pane_has_signal "$pane_id"; then
1564
+ PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
1565
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1566
+ log_debug "[GOV] iter=${ITERATION:-0} worker_progress_check=signal_present pane=$pane_id signal=${SIGNAL_FILE}"
1567
+ return 0
1568
+ fi
1569
+ # v0.14.2: root-cause tracing for Bug Report #4. When the watcher is
1570
+ # examining a verifier pane that does NOT have a verdict yet, log once
1571
+ # per byte-stasis transition so post-mortem can tell whether the
1572
+ # verdict was missing entirely vs. the idle-UI grace was the gating
1573
+ # factor. Idempotent flag lives in PANE_VERIFIER_TRACE_LOGGED.
1574
+ if [[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]]; then
1575
+ if [[ -z "${PANE_VERIFIER_TRACE_LOGGED[$pane_id]:-}" ]]; then
1576
+ PANE_VERIFIER_TRACE_LOGGED[$pane_id]=1
1577
+ log_debug "[GOV] iter=${ITERATION:-0} verifier_progress_check=miss pane=$pane_id verdict_canonical=${VERDICT_FILE} verdict_canonical_exists=$([[ -f "$VERDICT_FILE" ]] && echo true || echo false) verdict_legacy=${LEGACY_VERDICT_FILE:-unset} verdict_legacy_exists=$([[ -f "${LEGACY_VERDICT_FILE:-/nonexistent}" ]] && echo true || echo false)"
1578
+ fi
1579
+ fi
1580
+
1581
+ local last_content="${PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]:-}"
1582
+ if [[ "$capture" != "$last_content" ]]; then
1583
+ PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
1584
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1585
+ return 0
1586
+ fi
1587
+
1588
+ local last_change=${PANE_LAST_CHANGE_TS[$pane_id]:-0}
1589
+ if (( last_change == 0 )); then
1590
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1591
+ return 0
1592
+ fi
1593
+
1594
+ local frozen_for=$(( now - last_change ))
1595
+ if (( frozen_for >= PROGRESS_NO_CHANGE_TIMEOUT )); then
1596
+ # v0.14.1 Fix-B: even without a verdict file, codex sometimes parks at
1597
+ # its idle UI mid-run (e.g. partial-write window before atomic mv).
1598
+ # Grant one-time +CODEX_IDLE_GRACE_S grace before escalating so we do
1599
+ # not BLOCK at the exact second the verdict is being mv'd into place.
1600
+ if is_codex_idle_ui "$capture"; then
1601
+ local already_graced="${PANE_CODEX_IDLE_GRACED[$pane_id]:-0}"
1602
+ if (( already_graced == 0 )); then
1603
+ PANE_CODEX_IDLE_GRACED[$pane_id]=1
1604
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1605
+ log "Pane $pane_id at codex idle UI for ${frozen_for}s — granting +${CODEX_IDLE_GRACE_S}s grace before BLOCK escalation"
1606
+ log_debug "[GOV] iter=${ITERATION:-0} codex_idle_grace=true pane=$pane_id grace_s=${CODEX_IDLE_GRACE_S}"
1607
+ return 0
1608
+ fi
1609
+ fi
1610
+ log_error "Pane $pane_id has not changed for ${frozen_for}s — alive but frozen. Escalating to BLOCKED."
1611
+ log_debug "[GOV] iter=${ITERATION:-0} no_progress_escalated=true pane=$pane_id frozen_for=${frozen_for}s threshold=${PROGRESS_NO_CHANGE_TIMEOUT}s"
1612
+ write_blocked_sentinel \
1613
+ "Pane content has been unchanged for ${frozen_for}s (>= ${PROGRESS_NO_CHANGE_TIMEOUT}s threshold). Worker process may be alive but stuck on an undetected prompt, hung network call, or genuine deadlock. No documentation produced; manual inspection required." \
1614
+ "$us_id" \
1615
+ "infra_failure"
1616
+ return 1
1617
+ fi
1618
+ return 0
1619
+ }
1620
+
1621
+ # --- governance.md s7 step 5+6: Nudge idle panes ---
1622
+ check_and_nudge_idle_pane() {
1623
+ local pane_id="$1"
1624
+ local nudge_count_var="$2"
1625
+
1626
+ # v5.7 §4.13.a: auto-dismiss permission prompts before idle check.
1627
+ # Otherwise Worker hangs at "Do you want to create..." until nudge timeout.
1628
+ auto_dismiss_prompts "$pane_id"
1629
+
1630
+ local current_content
1631
+ current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
1632
+
1633
+ if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
1634
+ local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
1635
+ local now
1636
+ now=$(date +%s)
1637
+ if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
1638
+ # A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
1639
+ local _nudge_capture
1640
+ _nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
1641
+ if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
1642
+ log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
1643
+ else
1644
+ local count=${(P)nudge_count_var}
1645
+ if (( count < MAX_NUDGES )); then
1646
+ log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
1647
+ safe_send_keys "$pane_id" ""
1648
+ (( count++ ))
1649
+ eval "$nudge_count_var=$count"
1650
+ fi
1651
+ fi
1652
+ fi
1653
+ else
1654
+ LAST_PANE_CONTENT[$pane_id]="$current_content"
1655
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
1656
+ fi
1657
+ }
1658
+
1659
+ # =============================================================================
1660
+ # Exponential Backoff Restart (tmux pattern)
1661
+ # =============================================================================
1662
+
1663
+ # --- governance.md s7 step 5: Restart dead workers with backoff ---
1664
+ restart_worker() {
1665
+ local pane_id="$1"
1666
+ local iter="$2"
1667
+ local trigger_file="$3"
1668
+
1669
+ # Codex workers are 1-shot exec; restart is not applicable
1670
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1671
+ log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
1672
+ return 1
1673
+ fi
1674
+
1675
+ local restart_count="${WORKER_RESTARTS[$iter]:-0}"
1676
+
1677
+ if (( restart_count >= MAX_RESTARTS )); then
1678
+ log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
1679
+ return 1 # caller writes BLOCKED
1680
+ fi
1681
+
1682
+ # Exponential backoff: 5s, 10s, 20s, 60s (cap)
1683
+ local -a delays=(5 10 20 60)
1684
+ local delay=${delays[$((restart_count + 1))]:-60}
1685
+ log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
1686
+ sleep "$delay"
1687
+
1688
+ # Kill existing claude, wait for shell prompt
1689
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
1690
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
1691
+ sleep 2
1692
+
1693
+ # Re-launch worker (tmux interactive pattern)
1694
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1695
+ safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
1696
+ else
1697
+ safe_send_keys "$pane_id" "$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
1698
+ fi
1699
+ WORKER_RESTARTS[$iter]=$((restart_count + 1))
1700
+ return 0
1701
+ }
1702
+
1703
+ # =============================================================================
1704
+ # Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
1705
+ # =============================================================================
1706
+
1707
+ # Per-US PRD injection helper
1708
+ # Substitutes the full PRD path with a per-US split path in the Worker prompt base.
1709
+ # Falls back to the full PRD with a stderr warning if the split file is missing.
1710
+ # Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
1711
+ inject_per_us_prd() {
1712
+ local prompt_base="$1"
1713
+ local full_prd="$2"
1714
+ local per_us_prd="${3:-}"
1715
+
1716
+ if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
1717
+ sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
1718
+ else
1719
+ if [[ -n "$per_us_prd" ]]; then
1720
+ echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
1721
+ fi
1722
+ cat "$prompt_base"
1723
+ fi
1724
+ }
1725
+
1726
+ # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
1727
+ # NEVER send prompt content through tmux send-keys.
1728
+ # Write payloads to files, send only short trigger commands (<200 chars).
1729
+ write_worker_trigger() {
1730
+ local iter="$1"
1731
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
1732
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
1733
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
1734
+
1735
+ # Build the worker prompt: base prompt + iteration context
1736
+ local contract
1737
+ contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
1738
+
1739
+ # Check for fix contract from previous verifier failure
1740
+ local prev_iter=$((iter - 1))
1741
+ local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
1742
+
1743
+ # Compute next unverified US before prompt assembly (required for per-US PRD injection)
1744
+ local next_us=""
1745
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1746
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
1747
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
1748
+ next_us="$us"
1749
+ break
1750
+ fi
1751
+ done
1752
+ fi
1753
+
1754
+ {
1755
+ # Per-US PRD injection: substitute full PRD path with per-US split path when available
1756
+ local per_us_prd=""
1757
+ [[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
1758
+ inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
1759
+ echo ""
1760
+ echo "---"
1761
+ echo "## Iteration Context"
1762
+ echo "- **Iteration**: $iter"
1763
+ echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
1764
+ echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
1765
+ if (( _PRD_CHANGED )); then
1766
+ echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
1767
+ fi
1768
+
1769
+ # Include fix contract if previous verifier failed
1770
+ if [[ -f "$fix_contract_file" ]]; then
1771
+ echo ""
1772
+ echo "---"
1773
+ echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
1774
+ echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
1775
+ echo "Do NOT just resubmit — actually change the code to address each issue."
1776
+ echo ""
1777
+ cat "$fix_contract_file"
1778
+ fi
1779
+
1780
+ # Per-US mode: tell Worker exactly which US to work on
1781
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1782
+ if [[ -n "$next_us" ]]; then
1783
+ echo ""
1784
+ echo "---"
1785
+ echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
1786
+ echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
1787
+ echo "The Leader has determined that **${next_us}** is the next unverified story."
1788
+ echo "You MUST implement ONLY **${next_us}** in this iteration."
1789
+ echo "Do NOT implement any other user stories."
1790
+ # Per-US test-spec injection: point Worker to scoped test-spec if available
1791
+ local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
1792
+ if [[ -f "$per_us_test_spec" ]]; then
1793
+ echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
1794
+ else
1795
+ echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1796
+ fi
1797
+ echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
1798
+ echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
1799
+ echo ""
1800
+ echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
1801
+ elif [[ -n "$VERIFIED_US" ]]; then
1802
+ # All individual US verified — this is the final full verify iteration
1803
+ echo ""
1804
+ echo "---"
1805
+ echo "## FINAL VERIFICATION ITERATION"
1806
+ echo "All individual US have been verified: $VERIFIED_US"
1807
+ echo "Run all tests and verification commands to confirm everything works together."
1808
+ echo "Signal verify with us_id=\"ALL\" for the final full verification."
1809
+ fi
1810
+ elif [[ "$VERIFY_MODE" = "batch" ]]; then
1811
+ echo ""
1812
+ echo "---"
1813
+ if [[ -n "$VERIFIED_US" ]]; then
1814
+ echo "## BATCH MODE — CONTINUE FROM PARTIAL PROGRESS"
1815
+ echo "The following US have already been verified: **$VERIFIED_US**"
1816
+ echo "- Do NOT re-implement these — they are done."
1817
+ echo "- Focus ONLY on the remaining unverified user stories."
1818
+ echo '- Signal verify with us_id="ALL" when the remaining stories are complete.'
1819
+ else
1820
+ echo "## BATCH MODE OVERRIDE"
1821
+ echo "Ignore any per-US signal instructions above. In batch mode:"
1822
+ echo "- Implement ALL user stories in this iteration"
1823
+ echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
1824
+ echo "- Do NOT signal verify after individual stories"
1825
+ fi
1826
+ fi
1827
+
1828
+ # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1829
+ if (( AUTONOMOUS_MODE )); then
1830
+ echo ""
1831
+ echo "---"
1832
+ echo "## AUTONOMOUS MODE"
1833
+ echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1834
+ echo "**Resolution priority**: PRD > test-spec > context > memory"
1835
+ echo "If documents disagree, follow PRD and proceed. Log any conflict you find by"
1836
+ echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1837
+ echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1838
+ echo "Do NOT wait for human input. Keep working."
1839
+ fi
1840
+ } | atomic_write "$prompt_file"
1841
+
1842
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1843
+ # Engine-specific launch command (expanded at write time)
1844
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1845
+ local engine_cmd="${CODEX_BIN:-codex} \\
1846
+ -m $WORKER_CODEX_MODEL \\
1847
+ -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
1848
+ --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1849
+ \"\$(cat $prompt_file)\""
1850
+ local engine_comment="# Run codex with fresh context (fallback trigger — TUI primary launch via launch_worker_codex)"
1851
+ else
1852
+ local engine_cmd
1853
+ engine_cmd=$(build_claude_cmd print "$WORKER_MODEL" "$prompt_file" "$output_log" "$WORKER_EFFORT")
1854
+ local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 5)"
1855
+ fi
1856
+
1857
+ {
1858
+ cat <<TRIGGER_EOF
1859
+ #!/bin/zsh
1860
+ # Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
1861
+ # DO NOT use exec here -- it breaks heartbeat cleanup
1862
+
1863
+ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
1864
+
1865
+ # Background heartbeat writer (tmux pattern)
1866
+ (
1867
+ while true; do
1868
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1869
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1870
+ sleep 15
1871
+ done
1872
+ ) &
1873
+ HEARTBEAT_PID=\$!
1874
+
1875
+ $engine_comment
1876
+ $engine_cmd
1877
+
1878
+ # Cleanup heartbeat writer
1879
+ kill \$HEARTBEAT_PID 2>/dev/null
1880
+ wait \$HEARTBEAT_PID 2>/dev/null
1881
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1882
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1883
+ TRIGGER_EOF
1884
+ } | atomic_write "$trigger_file"
1885
+ chmod +x "$trigger_file"
1886
+
1887
+ log " Worker prompt: $prompt_file"
1888
+ log " Worker trigger: $trigger_file"
1889
+ }
1890
+
1891
+ write_verifier_trigger() {
1892
+ local iter="$1"
1893
+ local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
1894
+ local verifier_model="${3:-$VERIFIER_MODEL}"
1895
+ local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
1896
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1897
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1898
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
1899
+
1900
+ # Read us_id from iter-signal.json for per-US scoping
1901
+ local us_id=""
1902
+ if [[ -f "$SIGNAL_FILE" ]]; then
1903
+ us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1904
+ fi
1905
+
1906
+ # Build verifier prompt from base with US scope
1907
+ {
1908
+ cat "$VERIFIER_PROMPT_BASE"
1909
+ echo ""
1910
+ echo "---"
1911
+ echo "## Verification Context"
1912
+ echo "- **Iteration**: $iter"
1913
+ echo "- **Done Claim**: $DONE_CLAIM_FILE"
1914
+ echo "- **Verify Mode**: $VERIFY_MODE"
1915
+ if [[ -n "$us_id" ]]; then
1916
+ if [[ "$us_id" = "ALL" ]]; then
1917
+ echo "- **Scope**: FULL VERIFY — check ALL acceptance criteria from the PRD"
1918
+ else
1919
+ echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
1920
+ fi
1921
+ if [[ -n "$VERIFIED_US" ]]; then
1922
+ echo "- **Previously verified US**: $VERIFIED_US"
1923
+ echo "- **Note**: Skip re-verifying the above US. Focus on unverified stories."
1924
+ fi
1925
+ fi
1926
+
1927
+ # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1928
+ if (( AUTONOMOUS_MODE )); then
1929
+ echo ""
1930
+ echo "---"
1931
+ echo "## AUTONOMOUS MODE"
1932
+ echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1933
+ echo "**Resolution priority**: PRD > test-spec > context > memory"
1934
+ echo "If documents disagree, follow PRD and proceed. Log any conflict by"
1935
+ echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1936
+ echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1937
+ echo "Do NOT wait for human input. Keep verifying."
1938
+ fi
1939
+ } | atomic_write "$prompt_file"
1940
+
1941
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1942
+ # Engine-specific launch command (expanded at write time)
1943
+ if [[ "$verifier_engine" = "codex" ]]; then
1944
+ local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
1945
+ -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
1946
+ --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1947
+ \"\$(cat $prompt_file)\" \\
1948
+ > >(tee $output_log) 2>&1"
1949
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 7) — process substitution preserves tty"
1950
+ else
1951
+ local engine_cmd
1952
+ engine_cmd=$(build_claude_cmd print "$verifier_model" "$prompt_file" "$output_log" "$VERIFIER_EFFORT")
1953
+ local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 7)"
1954
+ fi
1955
+
1956
+ {
1957
+ cat <<TRIGGER_EOF
1958
+ #!/bin/zsh
1959
+ # Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
1960
+ # DO NOT use exec here -- it breaks heartbeat cleanup
1961
+
1962
+ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
1963
+
1964
+ # Background heartbeat writer (tmux pattern)
1965
+ (
1966
+ while true; do
1967
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1968
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1969
+ sleep 15
1970
+ done
1971
+ ) &
1972
+ HEARTBEAT_PID=\$!
1973
+
1974
+ $engine_comment
1975
+ $engine_cmd
1976
+
1977
+ # Cleanup heartbeat writer
1978
+ kill \$HEARTBEAT_PID 2>/dev/null
1979
+ wait \$HEARTBEAT_PID 2>/dev/null
1980
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1981
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1982
+ TRIGGER_EOF
1983
+ } | atomic_write "$trigger_file"
1984
+ chmod +x "$trigger_file"
1985
+
1986
+ log " Verifier prompt: $prompt_file"
1987
+ log " Verifier trigger: $trigger_file"
1988
+ }
1989
+
1990
+ # =============================================================================
1991
+ # Cleanup (trap handler)
1992
+ # =============================================================================
1993
+
1994
+ cleanup() {
1995
+ log "Cleaning up..."
1996
+
1997
+ # Remove lockfile
1998
+ if (( LOCKFILE_ACQUIRED )); then
1999
+ rm -f "$LOCKFILE_PATH" 2>/dev/null
2000
+ else
2001
+ log_debug "cleanup: lockfile not owned by this process, skipping removal"
2002
+ fi
2003
+
2004
+ # US-026 R14 P0: remove project-scoped runner lockfile if owned by this slug
2005
+ if [[ -f "$RUNNER_LOCKFILE_PATH" ]]; then
2006
+ local own_slug
2007
+ own_slug=$(jq -r '.slug' "$RUNNER_LOCKFILE_PATH" 2>/dev/null)
2008
+ if [[ "$own_slug" == "$SLUG" ]]; then
2009
+ rm -rf "$RUNNER_LOCKDIR" "$RUNNER_LOCKFILE_PATH" 2>/dev/null
2010
+ fi
2011
+ fi
2012
+
2013
+ # Kill claude processes then kill panes
2014
+ log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
2015
+ if [[ -n "${WORKER_PANE:-}" ]]; then
2016
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
2017
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
2018
+ fi
2019
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
2020
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2021
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2022
+ fi
2023
+ sleep 2
2024
+ # Kill panes on completion
2025
+ if [[ -n "${WORKER_PANE:-}" ]]; then
2026
+ tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
2027
+ fi
2028
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
2029
+ tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
2030
+ fi
2031
+ log " Panes cleaned up."
2032
+
2033
+ # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
2034
+ setopt local_options nonomatch 2>/dev/null
2035
+ rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
2036
+
2037
+ # AC4: Generate campaign report on all terminal states (always-on)
2038
+ generate_campaign_report
2039
+
2040
+ # US-001: Generate SV report after campaign report (tmux mode)
2041
+ generate_sv_report
2042
+
2043
+ # Print summary
2044
+ local end_time
2045
+ end_time=$(date +%s)
2046
+ local elapsed=$(( end_time - START_TIME ))
2047
+ local minutes=$(( elapsed / 60 ))
2048
+ local seconds=$(( elapsed % 60 ))
2049
+
2050
+ local final_status="UNKNOWN"
2051
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
2052
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
2053
+ else final_status="TIMEOUT"; fi
2054
+
2055
+ # --- Update metadata.json with final status ---
2056
+ if [[ -f "$METADATA_FILE" ]]; then
2057
+ jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
2058
+ '.campaign_status = $status | .end_time = $end_time' \
2059
+ "$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
2060
+ fi
2061
+
2062
+ if (( DEBUG )); then
2063
+ local end_ts=$(date +%s)
2064
+ local elapsed=$((end_ts - START_TIME))
2065
+
2066
+ log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
2067
+
2068
+ # --- Validation ---
2069
+ log_debug "[FLOW] === Execution Validation ==="
2070
+
2071
+ # 1. Did the correct verify mode run?
2072
+ log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
2073
+
2074
+ # 2. Per-US: were all US individually verified?
2075
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2076
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2077
+ local expected_us=""
2078
+ if [[ -f "$prd_file" ]]; then
2079
+ expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2080
+ fi
2081
+ local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
2082
+ local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
2083
+
2084
+ if [[ "$final_status" = "COMPLETE" ]]; then
2085
+ if (( verified_count >= expected_count )); then
2086
+ log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
2087
+ else
2088
+ log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
2089
+ fi
2090
+ else
2091
+ log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
2092
+ fi
2093
+ fi
2094
+
2095
+ # 3. Consensus: were both engines used?
2096
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
2097
+ if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
2098
+ log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
2099
+ else
2100
+ log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
2101
+ fi
2102
+ fi
2103
+
2104
+ # 4. Engine match: did the configured engines actually run?
2105
+ local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
2106
+ local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
2107
+ log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
2108
+
2109
+ # 5. Fix loops: how many fix contracts were generated?
2110
+ local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
2111
+ log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
2112
+
2113
+ # 6. Circuit breakers: any triggered?
2114
+ local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
2115
+ log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
2116
+
2117
+ # 7. Overall result
2118
+ log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
2119
+ fi
2120
+
2121
+ echo ""
2122
+ echo "============================================================"
2123
+ echo " Ralph Desk Tmux Runner - Session Complete"
2124
+ echo "============================================================"
2125
+ echo " Session: $SESSION_NAME"
2126
+ echo " Slug: $SLUG"
2127
+ echo " Iterations: $ITERATION / $MAX_ITER"
2128
+ echo " Elapsed: ${minutes}m ${seconds}s"
2129
+ echo ""
2130
+
2131
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
2132
+ echo " Final State: COMPLETE"
2133
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then
2134
+ echo " Final State: BLOCKED"
2135
+ else
2136
+ echo " Final State: STOPPED (interrupted or timeout)"
2137
+ fi
2138
+
2139
+ echo ""
2140
+ echo " Tmux session left alive for inspection:"
2141
+ echo " tmux attach -t $SESSION_NAME"
2142
+ echo " tmux kill-session -t $SESSION_NAME"
2143
+ echo "============================================================"
2144
+ }
2145
+
2146
+ # =============================================================================
2147
+ # Poll Loop (used for both Worker and Verifier)
2148
+ # =============================================================================
2149
+
2150
+ # --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
2151
+ poll_for_signal() {
2152
+ local signal_file="$1"
2153
+ local heartbeat_file="$2"
2154
+ local pane_id="$3"
2155
+ local trigger_file="$4"
2156
+ local role="$5" # "worker" or "verifier"
2157
+ local nudge_count=0
2158
+ local api_retry_count=0
2159
+ local poll_start
2160
+ poll_start=$(date +%s)
2161
+
2162
+ # Initialize idle tracking for this pane
2163
+ LAST_PANE_CONTENT[$pane_id]=""
2164
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
2165
+
2166
+ while true; do
2167
+ local now
2168
+ now=$(date +%s)
2169
+ local elapsed=$(( now - poll_start ))
2170
+
2171
+ # Per-iteration timeout check
2172
+ if (( elapsed >= ITER_TIMEOUT )); then
2173
+ log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
2174
+ return 1 # timeout
2175
+ fi
2176
+
2177
+ # Check if signal file appeared
2178
+ if [[ -f "$signal_file" ]]; then
2179
+ log " Signal file detected: $signal_file"
2180
+ return 0 # success
2181
+ fi
2182
+
2183
+ # A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
2184
+ # ONLY for Worker polling — Verifier waits for verdict file, not done-claim
2185
+ #
2186
+ # v5.7 §4.14 (Bug 5 fix, CRITICAL): if Worker pane shows a pending TUI
2187
+ # permission prompt (`Do you want to ...` with `(y/n)` / `❯ 1.` affordance),
2188
+ # Worker is NOT done — it's stuck mid-write after the first done-claim pass.
2189
+ # Suspending A4 fallback in this case prevents premature Verifier dispatch
2190
+ # against partial Worker output. auto_dismiss_prompts() will already have
2191
+ # tried to clear the prompt; if it's still visible the worker is in a
2192
+ # multi-prompt sequence and needs more time, not an A4 short-circuit.
2193
+ if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
2194
+ local _a4_capture
2195
+ _a4_capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null || true)
2196
+ local -a _a4_lines
2197
+ _a4_lines=("${(@f)_a4_capture}")
2198
+ local _a4_i _a4_n=${#_a4_lines[@]} _a4_blocked=0
2199
+ for ((_a4_i=1; _a4_i <= _a4_n; _a4_i++)); do
2200
+ if [[ "${_a4_lines[_a4_i]}" =~ $_PROMPT_RE ]]; then
2201
+ local _a4_prev="${_a4_lines[_a4_i-1]:-}"
2202
+ local _a4_cur="${_a4_lines[_a4_i]}"
2203
+ local _a4_next="${_a4_lines[_a4_i+1]:-}"
2204
+ if [[ "$_a4_prev" =~ $_AFFORDANCE_RE || "$_a4_cur" =~ $_AFFORDANCE_RE || "$_a4_next" =~ $_AFFORDANCE_RE ]]; then
2205
+ _a4_blocked=1
2206
+ break
2207
+ fi
2208
+ fi
2209
+ done
2210
+ if (( _a4_blocked )); then
2211
+ log " Worker pane has pending permission prompt — A4 fallback suspended (Bug 5 guard)"
2212
+ log_debug "[GOV] iter=$ITERATION a4_fallback_suspended=true reason=worker_prompt_pending pane=$pane_id"
2213
+ # Continue polling; do NOT auto-generate signal. auto_dismiss_prompts will
2214
+ # try to dismiss on the next loop iteration.
2215
+ else
2216
+ local dc_us_id
2217
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
2218
+ if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
2219
+ log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
2220
+ log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
2221
+ echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
2222
+ _emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4"
2223
+ return 0
2224
+ fi
2225
+ fi
2226
+ fi
2227
+
2228
+ # API transient-error recovery with bounded backoff
2229
+ local pane_output_for_retry
2230
+ pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
2231
+ local is_api_text_retry=0
2232
+ if [[ -n "$pane_output_for_retry" ]] &&
2233
+ ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
2234
+ || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
2235
+ || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
2236
+ || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
2237
+ || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
2238
+ is_api_text_retry=1
2239
+ fi
2240
+
2241
+ if (( is_api_text_retry )) || is_api_error "$pane_id"; then
2242
+ (( api_retry_count++ ))
2243
+ log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
2244
+ if (( api_retry_count >= _API_MAX_RETRIES )); then
2245
+ log_error "API unavailable after ${_API_MAX_RETRIES} retries"
2246
+ write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries" "" "infra_failure"
2247
+ return 2
2248
+ fi
2249
+ # A5: If pane shows "queued messages" or rate-limit corruption, restart pane
2250
+ if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
2251
+ log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
2252
+ log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
2253
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
2254
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
2255
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
2256
+ fi
2257
+ sleep "$_API_RETRY_INTERVAL_S"
2258
+ continue
2259
+ else
2260
+ api_retry_count=0
2261
+ fi
2262
+
2263
+ # Check heartbeat freshness (tmux pattern)
2264
+ if [[ -f "$heartbeat_file" ]]; then
2265
+ if check_heartbeat_exited "$heartbeat_file"; then
2266
+ # Process exited but no signal file -- give a brief grace period
2267
+ sleep 3
2268
+ if [[ -f "$signal_file" ]]; then
2269
+ log " Signal file detected after process exit: $signal_file"
2270
+ return 0
2271
+ fi
2272
+ # Dispatch to engine-specific exit handler
2273
+ if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
2274
+ handle_worker_exit_codex "$ITERATION" "$signal_file"
2275
+ return 0
2276
+ fi
2277
+ # Claude path (or verifier of any engine)
2278
+ if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
2279
+ # Reset poll timer for the restart
2280
+ poll_start=$(date +%s)
2281
+ nudge_count=0
2282
+ LAST_PANE_CONTENT[$pane_id]=""
2283
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
2284
+ sleep "$POLL_INTERVAL"
2285
+ continue
2286
+ else
2287
+ return 1 # max restarts exceeded
2288
+ fi
2289
+ fi
2290
+
2291
+ if ! check_heartbeat "$heartbeat_file"; then
2292
+ log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
2293
+ (( HEARTBEAT_STALE_COUNT++ ))
2294
+ # Circuit breaker: 3 consecutive heartbeat stale events
2295
+ if (( HEARTBEAT_STALE_COUNT >= 3 )); then
2296
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
2297
+ log_error "Circuit breaker: 3 consecutive heartbeat stale events"
2298
+ return 1
2299
+ fi
2300
+ # Attempt restart
2301
+ if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
2302
+ poll_start=$(date +%s)
2303
+ nudge_count=0
2304
+ continue
2305
+ else
2306
+ return 1
2307
+ fi
2308
+ else
2309
+ # Heartbeat is fresh, reset stale counter
2310
+ HEARTBEAT_STALE_COUNT=0
2311
+ fi
2312
+ fi
2313
+
2314
+ # Dead pane detection during poll: check if claude/codex process died
2315
+ local poll_cmd
2316
+ poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
2317
+ # Dead pane detection — delegates to check_dead_pane() for engine-aware logic
2318
+ if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
2319
+ log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
2320
+ log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
2321
+ # Return failure so caller can handle recovery
2322
+ return 1
2323
+ fi
2324
+
2325
+ # v5.7 §4.13.a: window-bounded prompt auto-dismiss (replaces broad inline grep).
2326
+ # check_and_nudge_idle_pane also calls auto_dismiss_prompts internally, but
2327
+ # we keep this explicit call so dismiss happens BEFORE the idle/nudge check
2328
+ # and is logged with iter context.
2329
+ auto_dismiss_prompts "$pane_id"
2330
+
2331
+ # v5.7 §4.16: bounded prompt-stall escalation. If pane has been prompt-stuck
2332
+ # for PROMPT_STALL_TIMEOUT (5min default) or dismiss attempts exceed
2333
+ # PROMPT_DISMISS_FAIL_LIMIT, write BLOCKED `infra_failure` and exit the poll.
2334
+ # Closes the "alive process = infinite extend" gap (codex Critic HIGH).
2335
+ if ! check_prompt_stall "$pane_id"; then
2336
+ return 2 # signal: hard-failed, do not retry
2337
+ fi
2338
+
2339
+ # v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout. Catches
2340
+ # undetected prompts, hung network calls, or any other alive-but-frozen
2341
+ # state. PROGRESS_NO_CHANGE_TIMEOUT defaults to 10 minutes. Independent
2342
+ # of regex prompt detection — fires whenever pane content is byte-equal
2343
+ # for too long even when Worker process is "alive".
2344
+ if ! check_no_progress "$pane_id"; then
2345
+ return 2 # hard-failed, infra_failure recorded
2346
+ fi
2347
+
2348
+ # Idle pane nudging (tmux pattern)
2349
+ check_and_nudge_idle_pane "$pane_id" "nudge_count"
2350
+
2351
+ sleep "$POLL_INTERVAL"
2352
+ done
2353
+ }
2354
+
2355
+ # =============================================================================
2356
+ # Consensus Verification (run two verifiers sequentially in same pane)
2357
+ # =============================================================================
2358
+
2359
+ # --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
2360
+ run_single_verifier() {
2361
+ local iter="$1"
2362
+ local engine="$2" # claude|codex
2363
+ local model="$3" # model for this verifier
2364
+ local suffix="$4" # "-claude" or "-codex"
2365
+ local verdict_dest="$5" # where to copy the verdict file
2366
+
2367
+ # Write trigger for this engine
2368
+ write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
2369
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
2370
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
2371
+
2372
+ # Clean previous Verifier session (with dead pane detection)
2373
+ local verifier_cmd
2374
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2375
+ if [[ -z "$verifier_cmd" ]]; then
2376
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
2377
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
2378
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
2379
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
2380
+ log " New verifier pane: $VERIFIER_PANE"
2381
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
2382
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
2383
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2384
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2385
+ sleep 0.2
2386
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
2387
+ sleep 0.3
2388
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2389
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2390
+ sleep 0.5
2391
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2392
+ sleep 2
2393
+ fi
2394
+ # Always ensure clean shell state before launching new verifier
2395
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2396
+ # Clear pane to avoid residual text interference
2397
+ tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
2398
+ sleep 0.5
2399
+
2400
+ # Remove previous verdict file
2401
+ rm -f "$VERDICT_FILE" 2>/dev/null
2402
+
2403
+ # Launch verifier — dispatch to engine-specific function
2404
+ local verifier_launch
2405
+ if [[ "$engine" = "codex" ]]; then
2406
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2407
+ launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
2408
+ log_debug "Verifier$suffix codex TUI dispatched"
2409
+ else
2410
+ verifier_launch="$(build_claude_cmd tui "$model" "" "" "$VERIFIER_EFFORT")"
2411
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
2412
+ log_error "Verifier$suffix failed to start"
2413
+ return 1
2414
+ fi
2415
+ log_debug "Verifier$suffix claude dispatched"
2416
+ fi
2417
+
2418
+ # Poll for verdict
2419
+ if [[ "$engine" = "codex" ]]; then
2420
+ # Codex exec: file poll + short grace period after verdict detected
2421
+ log " Polling for verify-verdict.json ($suffix, codex TUI)..."
2422
+ local codex_poll_start
2423
+ codex_poll_start=$(date +%s)
2424
+ local _verdict_detected_at=0
2425
+ while true; do
2426
+ # Wait for verdict file with valid JSON
2427
+ if [[ -f "$VERDICT_FILE" ]] && jq . "$VERDICT_FILE" >/dev/null 2>&1; then
2428
+ if (( _verdict_detected_at == 0 )); then
2429
+ _verdict_detected_at=$(date +%s)
2430
+ log " Verdict file detected. Grace period (30s) for codex to finalize..."
2431
+ fi
2432
+ # Grace period: 30s after verdict detection, proceed regardless of pane state
2433
+ local _grace_elapsed=$(( $(date +%s) - _verdict_detected_at ))
2434
+ if (( _grace_elapsed >= 30 )); then
2435
+ log " Grace period complete. Proceeding."
2436
+ break
2437
+ fi
2438
+ # Early exit: if pane returned to shell, no need to wait
2439
+ local _pane_cmd
2440
+ _pane_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null || echo "")
2441
+ if [[ "$_pane_cmd" = "zsh" || "$_pane_cmd" = "bash" || -z "$_pane_cmd" ]]; then
2442
+ log " Codex verifier$suffix process exited. Proceeding."
2443
+ break
2444
+ fi
2445
+ fi
2446
+ local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
2447
+ if (( codex_elapsed >= ITER_TIMEOUT )); then
2448
+ if (( _verdict_detected_at > 0 )); then
2449
+ log " Codex verifier$suffix timed out waiting, but verdict exists. Proceeding."
2450
+ break
2451
+ fi
2452
+ log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
2453
+ return 1
2454
+ fi
2455
+ sleep "$POLL_INTERVAL"
2456
+ done
2457
+ else
2458
+ # Claude: use full poll_for_signal with heartbeat/nudge
2459
+ log " Polling for verify-verdict.json ($suffix)..."
2460
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
2461
+ local verifier_poll_rc=$?
2462
+ if (( verifier_poll_rc == 2 )); then
2463
+ return 1
2464
+ fi
2465
+ log_error "Verifier$suffix poll failed"
2466
+ return 1
2467
+ fi
2468
+ fi
2469
+
2470
+ # Copy verdict to destination
2471
+ cp "$VERDICT_FILE" "$verdict_dest"
2472
+ log " Verifier$suffix verdict saved to $verdict_dest"
2473
+ return 0
2474
+ }
2475
+
2476
+ # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
2477
+ # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
2478
+ # Sets FAILED_US global on failure.
2479
+ run_sequential_final_verify() {
2480
+ local iter="$1"
2481
+ FAILED_US=""
2482
+
2483
+ log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
2484
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
2485
+
2486
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
2487
+ log " Final verify: checking $us..."
2488
+
2489
+ # Temporarily override signal file to scope verifier to this US
2490
+ local orig_signal
2491
+ orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
2492
+ echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
2493
+
2494
+ # Write scoped verifier trigger
2495
+ write_verifier_trigger "$iter"
2496
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
2497
+
2498
+ # Clean verifier pane
2499
+ local verifier_cmd
2500
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2501
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2502
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
2503
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
2504
+ fi
2505
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2506
+
2507
+ # Launch verifier
2508
+ local verifier_launch
2509
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2510
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2511
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
2512
+ else
2513
+ verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
2514
+ launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
2515
+ log_error "Failed to launch verifier for $us"
2516
+ FAILED_US="$us"
2517
+ return 1
2518
+ }
2519
+ fi
2520
+
2521
+ # Poll for verdict
2522
+ rm -f "$VERDICT_FILE"
2523
+ local poll_rc=0
2524
+ poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
2525
+ if (( poll_rc != 0 )); then
2526
+ log_error "Verifier poll failed for $us (rc=$poll_rc)"
2527
+ FAILED_US="$us"
2528
+ return 1
2529
+ fi
2530
+
2531
+ # Check verdict
2532
+ local verdict
2533
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
2534
+ if [[ "$verdict" != "pass" ]]; then
2535
+ FAILED_US="$us"
2536
+ log " Sequential final verify FAILED at $us"
2537
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
2538
+ return 1
2539
+ fi
2540
+ log " Sequential final verify: $us PASSED"
2541
+
2542
+ # Archive per-US final verdict
2543
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
2544
+ done
2545
+
2546
+ # Integration check: run tests if VERIFICATION_CMD is set
2547
+ if [[ -n "${VERIFICATION_CMD:-}" ]]; then
2548
+ log " Running integration test suite after sequential verify..."
2549
+ log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
2550
+ if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
2551
+ log " Integration test suite FAILED"
2552
+ FAILED_US="integration"
2553
+ return 2
2554
+ fi
2555
+ log " Integration test suite PASSED"
2556
+ fi
2557
+
2558
+ log " Sequential final verify: ALL PASSED"
2559
+ return 0
2560
+ }
2561
+
2562
+ # --- US-005: Determine whether consensus verification should run for this signal ---
2563
+ # Returns 0 (use consensus) or 1 (single engine).
2564
+ # Uses unified CONSENSUS_MODE: off|all|final-only
2565
+ _should_use_consensus() {
2566
+ local signal_us_id="${1:-}"
2567
+ case "$CONSENSUS_MODE" in
2568
+ all) return 0 ;;
2569
+ final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
2570
+ off|*) return 1 ;;
2571
+ esac
2572
+ }
2573
+
2574
+ # --- US-004: Run consensus verification (claude + codex sequentially) ---
2575
+ run_consensus_verification() {
2576
+ local iter="$1"
2577
+ local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
2578
+ local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
2579
+
2580
+ CONSENSUS_ROUND=0
2581
+ CLAUDE_VERDICT=""
2582
+ CODEX_VERDICT=""
2583
+
2584
+ while (( CONSENSUS_ROUND < 6 )); do
2585
+ (( CONSENSUS_ROUND++ ))
2586
+ log " Consensus round $CONSENSUS_ROUND/6..."
2587
+
2588
+ # Run claude verifier first
2589
+ local _claude_t0=$(date +%s)
2590
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
2591
+ log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
2592
+ return 1
2593
+ fi
2594
+ ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
2595
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
2596
+ # A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
2597
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
2598
+ log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
2599
+ log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
2600
+ rm -f "$claude_verdict_file" 2>/dev/null
2601
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
2602
+ log_error "Claude verifier retry also failed"
2603
+ return 1
2604
+ fi
2605
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
2606
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
2607
+ log_error "Claude verdict still null after retry — consensus cannot proceed"
2608
+ return 1
2609
+ fi
2610
+ fi
2611
+ log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
2612
+
2613
+ # consensus-fail-fast removed (complexity vs value too low)
2614
+
2615
+ # Run codex verifier second
2616
+ local _codex_t0=$(date +%s)
2617
+ if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
2618
+ log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
2619
+ return 1
2620
+ fi
2621
+ ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
2622
+ CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
2623
+ log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
2624
+
2625
+ log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
2626
+ local _combined_action="retry"
2627
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
2628
+ elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
2629
+ fi
2630
+ log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
2631
+
2632
+ # Both pass → success
2633
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
2634
+ # Create merged verdict with per-engine details
2635
+ {
2636
+ echo '{'
2637
+ echo ' "verdict": "pass",'
2638
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2639
+ echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
2640
+ echo ' "recommended_state_transition": "complete",'
2641
+ echo ' "consensus": {'
2642
+ echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
2643
+ echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
2644
+ echo ' "round": '"$CONSENSUS_ROUND"
2645
+ echo ' }'
2646
+ echo '}'
2647
+ } | atomic_write "$VERDICT_FILE"
2648
+ return 0
2649
+ fi
2650
+
2651
+ # Consensus disagreement
2652
+ log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
2653
+
2654
+ # NOTE: pre_existing_failure heuristic was removed (v0.3.5).
2655
+ # It used unreliable grep-in-description string matching to classify
2656
+ # consensus failures as "pre-existing", bypassing the consensus rule.
2657
+ # Consensus disagreement now ALWAYS flows to fix contract.
2658
+ # Codex CLI crash (no verdict file) is handled upstream via run_single_verifier return 1 → BLOCKED.
2659
+
2660
+ # --- Consensus disagreement: build fix contract ---
2661
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
2662
+ {
2663
+ echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
2664
+ echo ""
2665
+ echo "## Claude Verdict: $CLAUDE_VERDICT"
2666
+ if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
2667
+ echo "### Claude Issues"
2668
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
2669
+ fi
2670
+ echo ""
2671
+ echo "## Codex Verdict: $CODEX_VERDICT"
2672
+ if [[ "$CODEX_VERDICT" = "fail" ]]; then
2673
+ echo "### Codex Issues"
2674
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
2675
+ fi
2676
+ echo ""
2677
+ echo "## Traceability"
2678
+ echo "Only changes that resolve a listed issue are allowed."
2679
+ } | atomic_write "$fix_contract"
2680
+
2681
+ log " Combined fix contract: $fix_contract"
2682
+
2683
+ # If this is not the last round, the caller will dispatch the Worker with the fix contract
2684
+ # For now, write a fail verdict so the main loop can handle the fix loop
2685
+ if (( CONSENSUS_ROUND < 6 )); then
2686
+ # Create a merged fail verdict for the main loop — include issues from BOTH verdicts
2687
+ local merged_issues="[]"
2688
+ local claude_issues codex_issues
2689
+ claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
2690
+ codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
2691
+ merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
2692
+ {
2693
+ echo '{'
2694
+ echo ' "verdict": "fail",'
2695
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2696
+ echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
2697
+ echo ' "issues": '"$merged_issues"','
2698
+ echo ' "recommended_state_transition": "continue",'
2699
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
2700
+ echo '}'
2701
+ } | atomic_write "$VERDICT_FILE"
2702
+ return 2 # special return: consensus disagreement, needs retry
2703
+ fi
2704
+ done
2705
+
2706
+ # Max consensus rounds exceeded — include issues from both verdicts
2707
+ log_error "Consensus failed after 6 rounds"
2708
+ local final_claude_issues final_codex_issues final_merged_issues
2709
+ final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
2710
+ final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
2711
+ final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
2712
+ {
2713
+ echo '{'
2714
+ echo ' "verdict": "fail",'
2715
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2716
+ echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
2717
+ echo ' "issues": '"$final_merged_issues"','
2718
+ echo ' "recommended_state_transition": "blocked",'
2719
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
2720
+ echo '}'
2721
+ } | atomic_write "$VERDICT_FILE"
2722
+ return 1
2723
+ }
2724
+
2725
+ # =============================================================================
2726
+ # Main Leader Loop
2727
+ # =============================================================================
2728
+
2729
+ main() {
2730
+ # --- US-026 R14 P0: project-scoped runner lockfile (mkdir atomic) ---
2731
+ # Prevents duplicate runners on the same project root regardless of slug.
2732
+ # Different ROOT_HASH allows independent parallel runners across projects.
2733
+ mkdir -p "$(dirname "$RUNNER_LOCKFILE_PATH")" 2>/dev/null
2734
+ if ! mkdir "$RUNNER_LOCKDIR" 2>/dev/null; then
2735
+ local existing existing_slug
2736
+ existing=$(jq -r '.pid' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo 0)
2737
+ existing_slug=$(jq -r '.slug // "unknown"' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo unknown)
2738
+ if [[ "$existing" -gt 0 ]] && kill -0 "$existing" 2>/dev/null; then
2739
+ echo "duplicate rlp-desk runner detected on this project root. existing pid=$existing slug=$existing_slug, this attempt slug=$SLUG. exiting." >&2
2740
+ echo " Recover with: rm -rf '$RUNNER_LOCKDIR' '$RUNNER_LOCKFILE_PATH' (only if pid $existing is confirmed dead)" >&2
2741
+ exit 1
2742
+ fi
2743
+ rm -rf "$RUNNER_LOCKDIR"
2744
+ mkdir "$RUNNER_LOCKDIR" 2>/dev/null || {
2745
+ echo "failed to acquire runner lock after stale cleanup; another wrapper raced ahead. exit 1" >&2
2746
+ exit 1
2747
+ }
2748
+ echo "stale runner lockfile cleaned (pid $existing dead) — acquired" >&2
2749
+ fi
2750
+ printf '{"pid":%s,"slug":"%s","root":"%s","started_at":"%s"}\n' \
2751
+ "$$" "$SLUG" "$ROOT" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$RUNNER_LOCKFILE_PATH"
2752
+
2753
+ # --- Lockfile: prevent duplicate execution ---
2754
+ local lockfile="$LOCKFILE_PATH"
2755
+ mkdir -p "$(dirname "$lockfile")" 2>/dev/null
2756
+ if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
2757
+ local lock_pid
2758
+ lock_pid=$(cat "$lockfile" 2>/dev/null)
2759
+ if kill -0 "$lock_pid" 2>/dev/null; then
2760
+ log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
2761
+ exit 1
2762
+ fi
2763
+ # Stale lock — overwrite
2764
+ log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
2765
+ echo $$ > "$lockfile"
2766
+ LOCKFILE_ACQUIRED=1
2767
+ else
2768
+ LOCKFILE_ACQUIRED=1
2769
+ fi
2770
+ # US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty on exit.
2771
+ trap '_emit_final_cost_log; cleanup' EXIT INT TERM
2772
+ mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
2773
+
2774
+ # --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
2775
+ mkdir -p "$ANALYTICS_DIR" 2>/dev/null
2776
+
2777
+ # --- debug.log versioning (in analytics dir, --debug only) ---
2778
+ if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
2779
+ local dbg_n=1
2780
+ while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
2781
+ (( dbg_n++ ))
2782
+ done
2783
+ mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
2784
+ fi
2785
+
2786
+ # --- campaign.jsonl versioning (always-on) ---
2787
+ if [[ -f "$CAMPAIGN_JSONL" ]]; then
2788
+ local cj_n=1
2789
+ while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
2790
+ (( cj_n++ ))
2791
+ done
2792
+ mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
2793
+ fi
2794
+
2795
+ # --- metadata.json: always write at campaign start (cross-project identification) ---
2796
+ jq -n \
2797
+ --arg slug "$SLUG" \
2798
+ --arg project_root "$ROOT" \
2799
+ --arg project_name "$(basename "$ROOT")" \
2800
+ --arg campaign_status "running" \
2801
+ --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
2802
+ --arg end_time "" \
2803
+ --arg worker_model "$WORKER_MODEL" \
2804
+ --arg verifier_model "$VERIFIER_MODEL" \
2805
+ --argjson debug "$DEBUG" \
2806
+ --argjson with_sv "$WITH_SELF_VERIFICATION" \
2807
+ --argjson with_sv_requested "$WITH_SELF_VERIFICATION_REQUESTED" \
2808
+ --arg sv_skipped_reason "$SV_SKIPPED_REASON" \
2809
+ --arg lane_mode "$LANE_MODE" \
2810
+ --argjson consensus "${VERIFY_CONSENSUS:-0}" \
2811
+ '{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, with_self_verification_requested: $with_sv_requested, sv_skipped_reason: $sv_skipped_reason, lane_mode: $lane_mode, consensus: $consensus}' \
2812
+ > "$METADATA_FILE"
2813
+
2814
+ # --- Startup ---
2815
+ log "Ralph Desk Tmux Runner starting..."
2816
+ log " Slug: $SLUG"
2817
+ log " Root: $ROOT"
2818
+ log " Max iterations: $MAX_ITER"
2819
+ log " Worker model: $WORKER_MODEL"
2820
+ log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
2821
+ log " Verify mode: $VERIFY_MODE"
2822
+ log " Consensus mode: $CONSENSUS_MODE"
2823
+ log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
2824
+ log " Poll interval: ${POLL_INTERVAL}s"
2825
+ log " Iter timeout: ${ITER_TIMEOUT}s"
2826
+ # --- Debug: Log execution plan ---
2827
+ if (( DEBUG )); then
2828
+ # Extract US IDs from PRD
2829
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2830
+ local us_list=""
2831
+ if [[ -f "$prd_file" ]]; then
2832
+ us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2833
+ fi
2834
+ local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
2835
+
2836
+ log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
2837
+ log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
2838
+ log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
2839
+ log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
2840
+ log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION (requested=$WITH_SELF_VERIFICATION_REQUESTED skipped=${SV_SKIPPED_REASON:-none}) debug=$DEBUG"
2841
+
2842
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2843
+ # Build expected flow
2844
+ local expected_flow=""
2845
+ for us in $(echo "$us_list" | tr ',' ' '); do
2846
+ expected_flow="${expected_flow}worker->verify($us)->"
2847
+ done
2848
+ expected_flow="${expected_flow}verify(ALL)->COMPLETE"
2849
+ log_debug "[OPTION] expected_flow=$expected_flow"
2850
+ else
2851
+ log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
2852
+ fi
2853
+
2854
+ if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
2855
+ log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
2856
+ fi
2857
+ fi
2858
+
2859
+ # Extract US list for per-US sequencing
2860
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2861
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2862
+ if [[ -f "$prd_file" ]]; then
2863
+ US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2864
+ fi
2865
+
2866
+ # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
2867
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
2868
+ if [[ -f "$memory_file" ]]; then
2869
+ local completed_us
2870
+ completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
2871
+ if [[ -n "$completed_us" ]]; then
2872
+ VERIFIED_US="$completed_us"
2873
+ log " Loaded completed stories from memory: $VERIFIED_US"
2874
+ log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
2875
+ fi
2876
+ fi
2877
+
2878
+ # D1: Fallback — restore verified_us from status.json if memory had none
2879
+ if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
2880
+ local status_verified
2881
+ status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
2882
+ if [[ -n "$status_verified" ]]; then
2883
+ VERIFIED_US="$status_verified"
2884
+ log " Restored verified_us from status.json: $VERIFIED_US"
2885
+ log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
2886
+ fi
2887
+ fi
2888
+ fi
2889
+
2890
+ # Initialize PRD snapshot state for live update detection
2891
+ PREV_PRD_HASH=$(compute_prd_hash)
2892
+ PREV_PRD_US_LIST=$(count_prd_us)
2893
+
2894
+ # Dependency checks
2895
+ check_dependencies
2896
+
2897
+ # Print security warning (governance.md s7: --dangerously-skip-permissions)
2898
+ print_security_warning
2899
+
2900
+ # Validate scaffold
2901
+ validate_scaffold
2902
+
2903
+ # Check for existing sessions
2904
+ check_existing_sessions
2905
+
2906
+ # Create tmux session with pane IDs (governance.md s7 step 1)
2907
+ create_session
2908
+
2909
+ # Set trap for cleanup on exit/error
2910
+ # US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty.
2911
+ trap '_emit_final_cost_log; cleanup' EXIT
2912
+
2913
+ # Initialize context hash for stale detection
2914
+ PREV_CONTEXT_HASH=$(compute_context_hash)
2915
+
2916
+ # --- governance.md s7: Leader Loop ---
2917
+ local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
2918
+
2919
+ for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
2920
+ # US-024 R12 P0: lifecycle check site #2 — verify session/panes alive at iter entry.
2921
+ _r12_check_lifecycle "iter_start"
2922
+ log ""
2923
+ log "========== Iteration $ITERATION / $MAX_ITER =========="
2924
+ local ITER_START_TIME
2925
+ ITER_START_TIME=$(date +%s)
2926
+ local _iter_contract=""
2927
+ _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
2928
+ log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
2929
+
2930
+ # --- governance.md s7 step 1: Check sentinels ---
2931
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
2932
+ log "COMPLETE sentinel found. Campaign succeeded."
2933
+ update_status "complete" "complete"
2934
+ return 0
2935
+ fi
2936
+ if [[ -f "$BLOCKED_SENTINEL" ]]; then
2937
+ log "BLOCKED sentinel found. Campaign blocked."
2938
+ update_status "blocked" "blocked"
2939
+ return 1
2940
+ fi
2941
+
2942
+ # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
2943
+ rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
2944
+ rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
2945
+
2946
+ # --- Clean previous claude session in panes (one-shot lifecycle) ---
2947
+ # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
2948
+ if (( ITERATION > 1 )); then
2949
+ # Send C-c first (in case claude is mid-task), then /exit
2950
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
2951
+ sleep 1
2952
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
2953
+ sleep 2
2954
+ # Wait for shell prompt before proceeding
2955
+ wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
2956
+ fi
2957
+
2958
+ # Reset per-iteration state
2959
+ local worker_nudge_count=0
2960
+ local verifier_nudge_count=0
2961
+ ITER_VERIFIER_START=""
2962
+ ITER_VERIFIER_END=""
2963
+
2964
+ # --- US-004: detect PRD changes for live update + re-split ---
2965
+ check_prd_update
2966
+
2967
+ # --- governance.md s7 step 4: Build worker prompt + trigger ---
2968
+ write_worker_trigger "$ITERATION"
2969
+ local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
2970
+
2971
+ # AC1: capture worker start timestamp
2972
+ ITER_WORKER_START=$(date +%s)
2973
+
2974
+ update_status "worker" "running"
2975
+
2976
+ # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
2977
+ log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
2978
+
2979
+ local worker_launch
2980
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2981
+ worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2982
+ if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2983
+ write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
2984
+ update_status "blocked" "worker_start_failed"
2985
+ return 1
2986
+ fi
2987
+ else
2988
+ worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
2989
+ if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2990
+ write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
2991
+ update_status "blocked" "worker_start_failed"
2992
+ return 1
2993
+ fi
2994
+ fi
2995
+
2996
+ # --- governance.md s7 step 5+6: Poll for Worker completion ---
2997
+ # US-024 R12 P0: lifecycle check site #3 — verify panes alive after worker dispatch, before wait-loop.
2998
+ _r12_check_lifecycle "post_send"
2999
+ log " Polling for iter-signal.json..."
3000
+ local worker_poll_done=0
3001
+ while (( ! worker_poll_done )); do
3002
+ local worker_poll_rc=0
3003
+ if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
3004
+ worker_poll_done=1
3005
+ log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
3006
+ else
3007
+ worker_poll_rc=$?
3008
+ if (( worker_poll_rc == 2 )); then
3009
+ return 1
3010
+ fi
3011
+ # Check if Worker is still actively running (not stuck)
3012
+ local worker_cmd
3013
+ worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
3014
+ if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
3015
+ # Process alive — extend indefinitely (no hard ceiling kill)
3016
+ # Stale-context breaker and nudge system handle truly stuck workers
3017
+ local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
3018
+ local ceiling_exceeded=""
3019
+ if (( iter_elapsed >= HARD_CEILING )); then
3020
+ ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
3021
+ log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
3022
+ log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
3023
+ fi
3024
+ log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
3025
+ log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
3026
+ log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
3027
+ update_status "worker" "slow"
3028
+ # Loop continues — re-poll same iteration
3029
+ else
3030
+ # Worker is truly dead/stuck
3031
+ (( MONITOR_FAILURE_COUNT++ ))
3032
+ log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
3033
+ if (( MONITOR_FAILURE_COUNT >= 3 )); then
3034
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
3035
+ write_blocked_sentinel "3 consecutive monitor failures (worker not active)" "" "infra_failure"
3036
+ update_status "blocked" "monitor_failures"
3037
+ return 1
3038
+ fi
3039
+ log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3) — will retry"
3040
+ update_status "worker" "poll_failed"
3041
+ log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd retry=true"
3042
+ # v0.14.3 P0-5 (Bug Report #5): previously this branch wrote BLOCKED
3043
+ # unconditionally even at counter 1/3, so a single transient
3044
+ # worker-dead detection halted the campaign in 5s instead of
3045
+ # honoring the 3-strike circuit breaker above (L3001-3006). Removed
3046
+ # the unconditional sentinel write; the loop now continues so the
3047
+ # next polling tick can either confirm the dead state (counter
3048
+ # eventually reaches 3 → BLOCKED) or recover (worker resumes →
3049
+ # MONITOR_FAILURE_COUNT reset on success at L3025).
3050
+ fi
3051
+ fi
3052
+ done
3053
+
3054
+ if [[ ! -f "$SIGNAL_FILE" ]]; then
3055
+ log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
3056
+ # No signal — monitor failure, go to next iteration
3057
+ continue
3058
+ fi
3059
+
3060
+ # Reset monitor failure count on success
3061
+ MONITOR_FAILURE_COUNT=0
3062
+
3063
+ # AC1: capture worker end timestamp; reset consensus timing
3064
+ ITER_WORKER_END=$(date +%s)
3065
+ ITER_VERIFIER_CLAUDE_DURATION_S=""
3066
+ ITER_VERIFIER_CODEX_DURATION_S=""
3067
+
3068
+ # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
3069
+ local signal_status
3070
+ signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
3071
+ local signal_summary
3072
+ signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
3073
+
3074
+ log " Worker signal: status=$signal_status summary=\"$signal_summary\""
3075
+
3076
+ # Read us_id early for EXEC logging (also used later in verify branch)
3077
+ local signal_us_id_early=""
3078
+ signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3079
+ log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
3080
+
3081
+ case "$signal_status" in
3082
+ continue)
3083
+ # --- governance.md s7 step 6: continue -> go to step 8 ---
3084
+ log " Worker requests continue. Moving to next iteration."
3085
+ update_status "worker" "continue"
3086
+ ;;
3087
+ verify_partial)
3088
+ # US-019 R7 P1-G: Worker explicitly verified a subset of ACs and deferred the rest.
3089
+ # Verifier evaluates only verified_acs. Malformed (empty verified_acs) downgrades to blocked.
3090
+ local vp_count
3091
+ vp_count=$(jq -r '.verified_acs // [] | length' "$SIGNAL_FILE" 2>/dev/null || echo 0)
3092
+ if [[ "$vp_count" -eq 0 ]]; then
3093
+ log " Worker signal verify_partial but verified_acs is empty — downgrading to blocked (verify_partial_malformed)."
3094
+ local vp_us_id
3095
+ vp_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3096
+ write_blocked_sentinel "verify_partial_malformed: empty verified_acs" "${vp_us_id:-${CURRENT_US:-ALL}}" "mission_abort"
3097
+ update_status "blocked" "verify_partial_malformed"
3098
+ break
3099
+ fi
3100
+ log " Worker signal verify_partial (verified_acs count=$vp_count). Routing to verify path."
3101
+ signal_status="verify"
3102
+ ;&
3103
+ verify)
3104
+ # --- governance.md s7 step 7: Execute Verifier ---
3105
+ # Read us_id from signal for per-US scoping
3106
+ local signal_us_id=""
3107
+ signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3108
+ log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
3109
+
3110
+ # AC1: capture verifier start timestamp
3111
+ ITER_VERIFIER_START=$(date +%s)
3112
+
3113
+ update_status "verifier" "running"
3114
+
3115
+ # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
3116
+ if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
3117
+ log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
3118
+ local seq_rc=0
3119
+ run_sequential_final_verify "$ITERATION" || seq_rc=$?
3120
+ if (( seq_rc == 0 )); then
3121
+ write_complete_sentinel "Sequential final verify passed (all US verified individually)"
3122
+ update_status "complete" "pass"
3123
+ write_campaign_jsonl "$ITERATION" "ALL" "pass"
3124
+ return 0
3125
+ else
3126
+ # Sequential verify failed — fall through to fix loop with failed US
3127
+ log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
3128
+ signal_us_id="${FAILED_US:-ALL}"
3129
+ # Synthesize a fail verdict for the fix loop
3130
+ echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
3131
+ fi
3132
+ fi
3133
+
3134
+ # --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
3135
+ local use_consensus=0
3136
+ _should_use_consensus "$signal_us_id" && use_consensus=1
3137
+
3138
+ # --- Consensus vs single verification ---
3139
+ if (( use_consensus )); then
3140
+ # US-004: Run consensus verification (claude + codex sequentially)
3141
+ local consensus_rc=0
3142
+ run_consensus_verification "$ITERATION" || consensus_rc=$?
3143
+
3144
+ if (( consensus_rc == 2 )); then
3145
+ # Consensus disagreement — treat as fail, fix loop will handle
3146
+ log " Consensus disagreement, treating as fail."
3147
+ elif (( consensus_rc != 0 )); then
3148
+ # Consensus verification failed entirely
3149
+ log_error "Consensus verification failed"
3150
+ write_blocked_sentinel "Consensus verification failed after max rounds" "" "repeat_axis"
3151
+ update_status "blocked" "consensus_failed"
3152
+ return 1
3153
+ fi
3154
+ else
3155
+ # Standard single-engine verification
3156
+ write_verifier_trigger "$ITERATION"
3157
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
3158
+
3159
+ # Step 7a: Clean previous Verifier session (with dead pane detection)
3160
+ local verifier_cmd
3161
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
3162
+ if [[ -z "$verifier_cmd" ]]; then
3163
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
3164
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
3165
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
3166
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
3167
+ log " New verifier pane: $VERIFIER_PANE"
3168
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
3169
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
3170
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
3171
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
3172
+ sleep 0.2
3173
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
3174
+ sleep 0.3
3175
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
3176
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
3177
+ sleep 0.5
3178
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
3179
+ sleep 2
3180
+ fi
3181
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
3182
+
3183
+ local verifier_launch
3184
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3185
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
3186
+ else
3187
+ verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
3188
+ fi
3189
+ log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
3190
+
3191
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3192
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
3193
+ else
3194
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
3195
+ update_status "verifier" "start_failed"
3196
+ continue
3197
+ fi
3198
+ fi
3199
+
3200
+ # Poll for verify-verdict.json
3201
+ log " Polling for verify-verdict.json..."
3202
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
3203
+ local verifier_poll_rc=$?
3204
+ if (( verifier_poll_rc == 2 )); then
3205
+ return 1
3206
+ fi
3207
+ log_error "Verifier poll failed"
3208
+ # Verifier is dead/stuck — BLOCK and let user decide
3209
+ write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection." "" "infra_failure"
3210
+ update_status "blocked" "verifier_dead"
3211
+ return 1
3212
+ fi
3213
+ fi
3214
+
3215
+ # AC1: capture verifier end timestamp
3216
+ ITER_VERIFIER_END=$(date +%s)
3217
+
3218
+ # --- governance.md s7 step 7: Read verdict via jq ---
3219
+ local verdict
3220
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
3221
+ local recommended
3222
+ recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
3223
+ local verdict_summary
3224
+ verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3225
+
3226
+ log " Verifier: verdict=$verdict recommended=$recommended"
3227
+ log " Verifier summary: \"$verdict_summary\""
3228
+ local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
3229
+ log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
3230
+
3231
+ case "$verdict" in
3232
+ pass)
3233
+ CONSECUTIVE_FAILURES=0
3234
+ CONSENSUS_ROUND=0
3235
+ _SAME_US_FAIL_COUNT=0
3236
+ _LAST_FAILED_US=""
3237
+ if (( _MODEL_UPGRADED )); then
3238
+ log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
3239
+ log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
3240
+ WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
3241
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
3242
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
3243
+ WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
3244
+ fi
3245
+ _MODEL_UPGRADED=0
3246
+ fi
3247
+
3248
+ # --- Verified US tracking (both per-us and batch modes) ---
3249
+ if [[ -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
3250
+ # Add this US to verified list
3251
+ if [[ -n "$VERIFIED_US" ]]; then
3252
+ VERIFIED_US="${VERIFIED_US},${signal_us_id}"
3253
+ else
3254
+ VERIFIED_US="$signal_us_id"
3255
+ fi
3256
+ log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
3257
+ log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
3258
+ update_status "verifier" "pass_us"
3259
+ # Worker will do next US on next iteration
3260
+ elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
3261
+ # Final full verify passed or complete recommended
3262
+ write_complete_sentinel "$verdict_summary"
3263
+ update_status "complete" "pass"
3264
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
3265
+ return 0
3266
+ else
3267
+ log " Verifier passed but did not recommend complete. Continuing."
3268
+ update_status "verifier" "pass_continue"
3269
+ fi
3270
+ ;;
3271
+ fail)
3272
+ # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
3273
+
3274
+ # Parse per_us_results from verdict to track partial progress (batch + per-us)
3275
+ local _prev_verified="$VERIFIED_US"
3276
+ if jq -e '.per_us_results' "$VERDICT_FILE" &>/dev/null; then
3277
+ local _newly_passed
3278
+ _newly_passed=$(jq -r '.per_us_results | to_entries[] | select(.value == "pass") | .key' "$VERDICT_FILE" 2>/dev/null)
3279
+ for _pus in $(echo "$_newly_passed"); do
3280
+ if ! echo ",$VERIFIED_US," | grep -q ",$_pus,"; then
3281
+ if [[ -n "$VERIFIED_US" ]]; then
3282
+ VERIFIED_US="${VERIFIED_US},${_pus}"
3283
+ else
3284
+ VERIFIED_US="$_pus"
3285
+ fi
3286
+ log " Partial progress: $_pus passed (overall FAIL). Verified so far: $VERIFIED_US"
3287
+ fi
3288
+ done
3289
+ log_debug "[FLOW] iter=$ITERATION partial_progress prev=$_prev_verified now=$VERIFIED_US"
3290
+ fi
3291
+
3292
+ # Partial progress resets consecutive failures (progress was made)
3293
+ if [[ "$VERIFIED_US" != "$_prev_verified" ]]; then
3294
+ CONSECUTIVE_FAILURES=0
3295
+ log " Progress detected — consecutive_failures reset to 0"
3296
+ log_debug "[GOV] iter=$ITERATION consecutive_failures_reset=partial_progress"
3297
+ fi
3298
+
3299
+ (( CONSECUTIVE_FAILURES++ ))
3300
+ record_us_failure "${signal_us_id:-unknown}"
3301
+ check_model_upgrade "${signal_us_id:-unknown}"
3302
+
3303
+ # Mid-CB warning: alert at halfway point (governance §8 early warning)
3304
+ if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
3305
+ log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
3306
+ log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
3307
+ fi
3308
+ local verdict_summary_fail
3309
+ verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3310
+ log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
3311
+
3312
+ # Extract issues from verdict for next Worker's fix contract
3313
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
3314
+ {
3315
+ echo "# Fix Contract (from Verifier iteration $ITERATION)"
3316
+ echo ""
3317
+ if [[ -n "$VERIFIED_US" ]]; then
3318
+ echo "## Verified US (do NOT re-implement these)"
3319
+ echo "$VERIFIED_US" | tr ',' '\n' | sed 's/^/- /'
3320
+ echo ""
3321
+ echo "**Focus ONLY on unverified user stories. The above are already verified.**"
3322
+ echo ""
3323
+ fi
3324
+ echo "## Summary"
3325
+ echo "$verdict_summary_fail"
3326
+ echo ""
3327
+ echo "## Issues (from verify-verdict.json)"
3328
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
3329
+ echo ""
3330
+ echo "## Next Iteration Contract"
3331
+ jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
3332
+ } | atomic_write "$fix_contract"
3333
+ log " Fix contract: $fix_contract"
3334
+ log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
3335
+
3336
+ # Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
3337
+ if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
3338
+ # For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
3339
+ _ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
3340
+ if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
3341
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
3342
+ log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
3343
+ write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
3344
+ else
3345
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
3346
+ log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
3347
+ write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
3348
+ fi
3349
+ update_status "blocked" "consecutive_failures"
3350
+ return 1
3351
+ fi
3352
+
3353
+ update_status "verifier" "fail"
3354
+ ;;
3355
+ request_info)
3356
+ # --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
3357
+ local verdict_summary_ri
3358
+ verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3359
+ log " Verifier requests info (degraded in tmux lean mode)."
3360
+ log " Questions: \"$verdict_summary_ri\""
3361
+ log " Treating as soft fail — Worker will see verdict in next iteration."
3362
+ update_status "verifier" "request_info"
3363
+ ;;
3364
+ blocked)
3365
+ local _verdict_cat
3366
+ _verdict_cat=$(_classify_cross_us_or_metric "$verdict_summary")
3367
+ write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary" "" "$_verdict_cat"
3368
+ update_status "blocked" "verifier_blocked"
3369
+ return 1
3370
+ ;;
3371
+ *)
3372
+ log_error "Unknown verdict: $verdict"
3373
+ update_status "verifier" "unknown_verdict"
3374
+ ;;
3375
+ esac
3376
+ ;;
3377
+ blocked)
3378
+ # --- governance.md s7 step 6: blocked -> write sentinel ---
3379
+ local _signal_cat
3380
+ _signal_cat=$(_classify_cross_us_or_metric "$signal_summary")
3381
+ write_blocked_sentinel "Worker reported blocked: $signal_summary" "" "$_signal_cat"
3382
+ update_status "blocked" "worker_blocked"
3383
+ return 1
3384
+ ;;
3385
+ *)
3386
+ log_error "Unknown signal status: $signal_status"
3387
+ update_status "worker" "unknown_status"
3388
+ ;;
3389
+ esac
3390
+
3391
+ # --- step 7d: Archive iteration artifacts before cleanup ---
3392
+ archive_iter_artifacts "$ITERATION"
3393
+
3394
+ # --- AC5: Write per-iteration cost estimate ---
3395
+ write_cost_log "$ITERATION"
3396
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
3397
+
3398
+ # --- governance.md s7 step 8: Write result log ---
3399
+ write_result_log "$ITERATION" "$signal_status"
3400
+
3401
+ # --- governance.md s7 step 8: Circuit breaker - stale context check ---
3402
+ if ! check_stale_context; then
3403
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
3404
+ write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)" "" "context_limit"
3405
+ update_status "blocked" "stale_context"
3406
+ return 1
3407
+ fi
3408
+
3409
+ # --- governance.md s7 step 8: Update status ---
3410
+ update_status "idle" "${signal_status:-unknown}"
3411
+ done
3412
+
3413
+ # Max iterations reached
3414
+ log "Max iterations ($MAX_ITER) reached."
3415
+ update_status "timeout" "max_iter"
3416
+ return 1
3417
+ }
3418
+
3419
+ # =============================================================================
3420
+ # Entry Point
3421
+ # =============================================================================
3422
+
3423
+ # --- CLI: parse --worker-model / --verifier-model flags ---
3424
+ # These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
3425
+ # Format: "model:reasoning" → codex engine; "model-name" → claude engine
3426
+ _cli_i=1
3427
+ while (( _cli_i <= $# )); do
3428
+ case "${@[$_cli_i]}" in
3429
+ --worker-model)
3430
+ (( _cli_i++ ))
3431
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
3432
+ WORKER_ENGINE="${_cli_parsed%% *}"
3433
+ _cli_rest="${_cli_parsed#* }"
3434
+ WORKER_MODEL="${_cli_rest%% *}"
3435
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
3436
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
3437
+ WORKER_CODEX_REASONING="${_cli_rest##* }"
3438
+ elif [[ "$_cli_rest" == *" "* ]]; then
3439
+ WORKER_EFFORT="${_cli_rest##* }"
3440
+ fi
3441
+ ;;
3442
+ --verifier-model)
3443
+ (( _cli_i++ ))
3444
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
3445
+ VERIFIER_ENGINE="${_cli_parsed%% *}"
3446
+ _cli_rest="${_cli_parsed#* }"
3447
+ VERIFIER_MODEL="${_cli_rest%% *}"
3448
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3449
+ VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
3450
+ VERIFIER_CODEX_REASONING="${_cli_rest##* }"
3451
+ elif [[ "$_cli_rest" == *" "* ]]; then
3452
+ VERIFIER_EFFORT="${_cli_rest##* }"
3453
+ fi
3454
+ ;;
3455
+ --lock-worker-model)
3456
+ LOCK_WORKER_MODEL=1
3457
+ ;;
3458
+ --autonomous)
3459
+ AUTONOMOUS_MODE=1
3460
+ ;;
3461
+ --lane-strict)
3462
+ # P1-E opt-in: lane mtime audit escalates to BLOCKED instead of WARN.
3463
+ # See governance §7¾.
3464
+ LANE_MODE="strict"
3465
+ ;;
3466
+ --test-density-strict)
3467
+ # US-018 R6 P1-F opt-in: AC with < 3 tests fails init (exit 1) instead of WARN.
3468
+ # See governance §7f.
3469
+ TEST_DENSITY_MODE="strict"
3470
+ ;;
3471
+ --final-verifier-model)
3472
+ (( _cli_i++ ))
3473
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
3474
+ FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
3475
+ _cli_rest="${_cli_parsed#* }"
3476
+ FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
3477
+ if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
3478
+ FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
3479
+ FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
3480
+ elif [[ "$_cli_rest" == *" "* ]]; then
3481
+ FINAL_VERIFIER_EFFORT="${_cli_rest##* }"
3482
+ fi
3483
+ ;;
3484
+ --consensus)
3485
+ (( _cli_i++ ))
3486
+ CONSENSUS_MODE="${@[$_cli_i]:-off}"
3487
+ ;;
3488
+ --consensus-model)
3489
+ (( _cli_i++ ))
3490
+ CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:medium}"
3491
+ ;;
3492
+ --final-consensus-model)
3493
+ (( _cli_i++ ))
3494
+ FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:high}"
3495
+ ;;
3496
+ --final-consensus)
3497
+ # Legacy: map to new --consensus final-only
3498
+ CONSENSUS_MODE="final-only"
3499
+ ;;
3500
+ --verify-consensus)
3501
+ # Legacy: map to new --consensus all
3502
+ CONSENSUS_MODE="all"
3503
+ ;;
3504
+ esac
3505
+ (( _cli_i++ ))
3506
+ done
3507
+ unset _cli_i _cli_parsed _cli_rest
3508
+
3509
+ # Require tmux — tmux mode only works inside an active tmux session
3510
+ if [[ -z "${TMUX:-}" ]]; then
3511
+ echo "ERROR: tmux mode requires running inside a tmux session."
3512
+ echo ""
3513
+ echo " Start tmux first, then retry:"
3514
+ echo " tmux"
3515
+ echo " LOOP_NAME=$SLUG $0"
3516
+ echo ""
3517
+ echo " Or use Agent() mode instead (no tmux needed):"
3518
+ echo " /rlp-desk run $SLUG"
3519
+ exit 1
3520
+ fi
3521
+
3522
+ main "$@"