@ai-dev-methodologies/rlp-desk 0.14.3 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3490 @@
1
+ #!/bin/zsh
2
+ set -uo pipefail
3
+ # NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
4
+ # because the main loop uses explicit error checks throughout.
5
+
6
+ # =============================================================================
7
+ # Ralph Desk Tmux Runner
8
+ #
9
+ # Implements the Leader loop from governance.md section 7 as a shell script.
10
+ # Uses tmux proven patterns: write-then-notify, pane IDs (%N),
11
+ # copy-mode guards, verification-based retry, heartbeat monitoring,
12
+ # idle pane nudging, exponential backoff restarts, atomic file writes.
13
+ #
14
+ # Usage:
15
+ # LOOP_NAME=<slug> ./run_ralph_desk.zsh
16
+ #
17
+ # Required env:
18
+ # LOOP_NAME - slug identifier for the campaign
19
+ #
20
+ # Optional env:
21
+ # ROOT - project root (default: $PWD)
22
+ # MAX_ITER - max iterations (default: 20)
23
+ # WORKER_MODEL - claude model for Worker (default: sonnet)
24
+ # VERIFIER_MODEL - claude model for Verifier (default: opus)
25
+ # POLL_INTERVAL - seconds between signal checks (default: 5)
26
+ # ITER_TIMEOUT - per-iteration timeout in seconds (default: 600)
27
+ # HEARTBEAT_STALE_THRESHOLD - seconds before heartbeat is stale (default: 120)
28
+ # MAX_RESTARTS - max restart attempts per worker (default: 3)
29
+ # IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
30
+ # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
+ #
32
+ # Per-role codex config:
33
+ # WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.5)
34
+ # WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
35
+ # VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.5)
36
+ # VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
37
+ #
38
+ # Consensus scope:
39
+ # CONSENSUS_SCOPE - when consensus applies (default: all)
40
+ # all=every verify, final-only=final ALL only
41
+ #
42
+ # Dependencies: tmux, claude CLI, jq
43
+ # Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
44
+ # =============================================================================
45
+
46
+ # --- Environment Variables ---
47
+ SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
48
+ ROOT="${ROOT:-$PWD}"
49
+ MAX_ITER="${MAX_ITER:-20}"
50
+ WORKER_MODEL="${WORKER_MODEL:-haiku}"
51
+ VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
52
+ FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
53
+ POLL_INTERVAL="${POLL_INTERVAL:-5}"
54
+ ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
55
+ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
56
+ MAX_RESTARTS="${MAX_RESTARTS:-3}"
57
+ IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
58
+ MAX_NUDGES="${MAX_NUDGES:-3}"
59
+ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
60
+ WITH_SELF_VERIFICATION_REQUESTED="$WITH_SELF_VERIFICATION" # preserves original user intent for traceability (governance §1f)
61
+ SV_SKIPPED_REASON="" # set when SV is disabled despite user request
62
+
63
+ # v0.14.0 — zsh runner restored as primary tmux mode path.
64
+ # v5.7 §4.2's deprecation gate (rejected --flywheel/--flywheel-guard/
65
+ # --with-self-verification) is removed: the Node port shipped without
66
+ # zsh-equivalent safety nets (heartbeat, copy-mode guard, prompt-stall,
67
+ # no-progress, stale-context, claude model upgrade chain, etc.), so the
68
+ # Node leader is now reserved for `--mode agent` (LLM-driven) only.
69
+ # `--mode tmux` invocations from src/node/run.mjs delegate here as a
70
+ # subprocess via env vars. zsh continues to honor FLYWHEEL,
71
+ # FLYWHEEL_GUARD, WITH_SELF_VERIFICATION.
72
+ AUTONOMOUS_MODE="${AUTONOMOUS_MODE:-0}" # 1=don't stop on ambiguity, PRD is authoritative
73
+ # P1-E Lane enforcement: WARN-only by default; --lane-strict opts into BLOCKED
74
+ # escalation. governance §7¾. The opt-in defaults to "warn"; "strict" trips
75
+ # BLOCKED with reason_category=infra_failure + recoverable=true (downgrade
76
+ # from terminal_alert) so an inaccurate mtime audit cannot terminally kill a
77
+ # campaign.
78
+ LANE_MODE="${LANE_MODE:-warn}"
79
+ # US-018 R6 P1-F Test density: WARN by default; --test-density-strict turns
80
+ # init exit non-zero when any AC has < 3 tests (governance §7f).
81
+ TEST_DENSITY_MODE="${TEST_DENSITY_MODE:-warn}"
82
+ # US-021 R9 P2-I consecutive_blocks circuit breaker (governance §8). When the
83
+ # same canonical block reason fires N times in a row the runner writes
84
+ # .sisyphus/mission-abort.json and exits non-zero so contract defects don't
85
+ # silently loop. infra_failure category and the very first iteration are exempt.
86
+ BLOCK_CB_THRESHOLD="${BLOCK_CB_THRESHOLD:-3}"
87
+ CONSECUTIVE_BLOCKS=0
88
+ LAST_BLOCK_REASON=""
89
+
90
+ # US-021 R9 P2-I: track repeated same-reason blocks. infra_failure category and
91
+ # the very first iteration are exempt (mission setup blocks shouldn't trip
92
+ # the abort). Returns 0 if loop should continue, 1 (after writing
93
+ # mission-abort.json) if the threshold is reached.
94
+ # US-023 R11 P2-K: guarantee at least one cost-log.jsonl entry per campaign.
95
+ # An empty cost-log can mean either "no usage recorded" or "logging broken" —
96
+ # we make the distinction observable by always emitting a final entry on exit
97
+ # (idempotent via COST_LOG_FINAL_WRITTEN). Wired into the existing cleanup trap.
98
+ COST_LOG_FINAL_WRITTEN=0
99
+ _emit_final_cost_log() {
100
+ if [[ "${COST_LOG_FINAL_WRITTEN:-0}" -ne 0 ]]; then
101
+ return 0
102
+ fi
103
+ COST_LOG_FINAL_WRITTEN=1
104
+ if [[ -n "${ITERATION:-}" && -n "${LOGS_DIR:-}" ]]; then
105
+ write_cost_log "${ITERATION:-0}" 2>/dev/null || true
106
+ fi
107
+ }
108
+
109
+ # US-024 R12 P0: tmux pane/session lifecycle monitor.
110
+ # Single authoritative timeout: 5 attempts × 1s sleep = 5s budget.
111
+ # Invoked at 3 sites: create_session post-finish, main loop iter entry, and
112
+ # every send-keys/paste post-action before the wait-loop. Writes infra_failure
113
+ # BLOCKED sentinel and exits 1 when any pane or the session is dead beyond budget.
114
+ _r12_check_lifecycle() {
115
+ local site="${1:-unknown}"
116
+ local _attempts=0
117
+ while ! _verify_session_alive "$SESSION_NAME" || \
118
+ ! _verify_pane_alive "$LEADER_PANE" || \
119
+ ! _verify_pane_alive "$WORKER_PANE" || \
120
+ ! _verify_pane_alive "$VERIFIER_PANE"; do
121
+ (( _attempts++ ))
122
+ if (( _attempts >= 5 )); then
123
+ log_error "[r12:$site] tmux session/pane dead after 5x1s polling (5s authoritative budget). session=$SESSION_NAME panes leader=$LEADER_PANE worker=$WORKER_PANE verifier=$VERIFIER_PANE"
124
+ tmux list-panes -a -F '#{session_name}:#{pane_id} dead=#{pane_dead}' 2>&1 | head -20 >> "${DEBUG_LOG:-/dev/null}"
125
+ write_blocked_sentinel "tmux session/pane dead during $site" "${CURRENT_US:-ALL}" "infra_failure"
126
+ exit 1
127
+ fi
128
+ sleep 1
129
+ done
130
+ return 0
131
+ }
132
+
133
+ _check_consecutive_blocks() {
134
+ local reason="$1"
135
+ local category="${2:-metric_failure}"
136
+ local iter="${3:-${ITERATION:-0}}"
137
+ if [[ "$category" == "infra_failure" ]] || (( iter <= 1 )); then
138
+ LAST_BLOCK_REASON=""
139
+ CONSECUTIVE_BLOCKS=0
140
+ return 0
141
+ fi
142
+ local canonical
143
+ canonical=$(_canonical_block_reason "$reason" 2>/dev/null)
144
+ if [[ "$canonical" == "$LAST_BLOCK_REASON" && -n "$canonical" ]]; then
145
+ CONSECUTIVE_BLOCKS=$((CONSECUTIVE_BLOCKS + 1))
146
+ else
147
+ CONSECUTIVE_BLOCKS=1
148
+ LAST_BLOCK_REASON="$canonical"
149
+ fi
150
+ if (( CONSECUTIVE_BLOCKS >= BLOCK_CB_THRESHOLD )); then
151
+ local abort_dir="$DESK/.sisyphus"
152
+ mkdir -p "$abort_dir" 2>/dev/null
153
+ local abort_file="$abort_dir/mission-abort.json"
154
+ printf '{"reason":"consecutive_blocks","count":%s,"last_reason":"%s","threshold":%s,"timestamp":"%s"}\n' \
155
+ "$CONSECUTIVE_BLOCKS" "$canonical" "$BLOCK_CB_THRESHOLD" \
156
+ "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$abort_file"
157
+ log_error "Mission abort: same canonical block reason '$canonical' repeated $CONSECUTIVE_BLOCKS times (>= $BLOCK_CB_THRESHOLD)"
158
+ return 1
159
+ fi
160
+ return 0
161
+ }
162
+
163
+ # --- Engine Selection (auto-detect from model format) ---
164
+ # claude models (haiku/sonnet/opus) with :effort → claude engine + effort
165
+ # codex models (gpt-*/spark) with :reasoning → codex engine + reasoning
166
+ # plain name → claude engine (no effort/reasoning)
167
+ _auto_detect_engine() {
168
+ local model_var="$1" engine_var="$2" codex_model_var="$3" codex_reasoning_var="$4" effort_var="${5:-}"
169
+ local model_val="${(P)model_var}"
170
+ if [[ "$model_val" == *:* ]]; then
171
+ local model_part="${model_val%%:*}"
172
+ local level_part="${model_val##*:}"
173
+ case "$model_part" in
174
+ haiku|sonnet|opus)
175
+ # Claude model with effort — keep engine as claude, store effort
176
+ eval "$engine_var=claude"
177
+ eval "$model_var=$model_part"
178
+ [[ -n "$effort_var" ]] && eval "$effort_var=$level_part"
179
+ ;;
180
+ *)
181
+ # Codex model with reasoning
182
+ [[ "$model_part" == "spark" ]] && model_part="gpt-5.3-codex-spark"
183
+ eval "$engine_var=codex"
184
+ eval "$model_var=$model_part"
185
+ [[ -n "$codex_model_var" ]] && eval "$codex_model_var=$model_part"
186
+ [[ -n "$codex_reasoning_var" ]] && eval "$codex_reasoning_var=$level_part"
187
+ ;;
188
+ esac
189
+ fi
190
+ }
191
+
192
+ WORKER_ENGINE="${WORKER_ENGINE:-claude}"
193
+ VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}"
194
+ FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
195
+
196
+ # Effort levels for Claude models (set by _auto_detect_engine or CLI --worker-model opus:max)
197
+ WORKER_EFFORT="${WORKER_EFFORT:-}"
198
+ VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
199
+ FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
200
+
201
+ # Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
202
+ _auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
203
+ _auto_detect_engine VERIFIER_MODEL VERIFIER_ENGINE VERIFIER_CODEX_MODEL VERIFIER_CODEX_REASONING VERIFIER_EFFORT
204
+ _auto_detect_engine FINAL_VERIFIER_MODEL FINAL_VERIFIER_ENGINE "" "" FINAL_VERIFIER_EFFORT
205
+ WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.5}"
206
+ WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
207
+ VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.5}"
208
+ VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
209
+ CODEX_BIN="" # resolved by check_dependencies when engine=codex
210
+
211
+ # --- Verify Mode ---
212
+ VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
213
+ # Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
214
+ CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
215
+ CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.5:medium}" # per-US cross-verifier (lighter)
216
+ FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.5:high}" # final cross-verifier (stricter)
217
+ # Legacy compat: map old flags to CONSENSUS_MODE
218
+ if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
219
+ CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
220
+ elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
221
+ CONSENSUS_MODE="final-only"
222
+ fi
223
+ CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
224
+ CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
225
+ # Effective CB threshold: doubled when consensus mode active
226
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
227
+ EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
228
+ else
229
+ EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
230
+ fi
231
+ _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
232
+ _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
233
+
234
+ # --- Derived Paths ---
235
+ DESK="$ROOT/${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
236
+ # v0.13.0: legacy detection — refuse to run when .claude/ralph-desk/ is still
237
+ # present. init mode auto-migrates; run mode protects in-flight campaigns.
238
+ if [[ -d "$ROOT/.claude/ralph-desk" ]]; then
239
+ print -u2 "ERROR: Legacy .claude/ralph-desk/ detected at $ROOT/.claude/ralph-desk."
240
+ print -u2 "Run mode does not auto-migrate to protect in-flight campaigns."
241
+ print -u2 "Run: mv .claude/ralph-desk ${RLP_DESK_RUNTIME_DIR:-.rlp-desk} then re-run."
242
+ exit 1
243
+ fi
244
+ # US-026 R14 P0: project-root-hashed runner lockfile prevents duplicate runner spawns
245
+ # on the same project root while allowing parallel runs across different projects.
246
+ # shasum is mac-default; sha1sum on Linux; cksum is POSIX-final fallback.
247
+ ROOT_HASH=$(printf '%s' "$ROOT" | { shasum 2>/dev/null || sha1sum 2>/dev/null || cksum; } | awk '{print substr($1,1,8)}')
248
+ RUNNER_LOCKFILE_PATH="$DESK/logs/.rlp-desk-runner-$ROOT_HASH.lock"
249
+ RUNNER_LOCKDIR="${RUNNER_LOCKFILE_PATH}.d"
250
+ PROMPTS_DIR="$DESK/prompts"
251
+ CONTEXT_DIR="$DESK/context"
252
+ MEMOS_DIR="$DESK/memos"
253
+ LOGS_DIR="$DESK/logs/$SLUG"
254
+ RUNTIME_DIR="$LOGS_DIR/runtime"
255
+ PRD_FILE="$DESK/plans/prd-$SLUG.md"
256
+ TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
257
+ # --- Analytics Directory (v5.7 §4.11.b: project-local) ---
258
+ # Was previously $HOME/.claude/ralph-desk/analytics/<slug>--<hash> (cross-project
259
+ # rollup). With v0.12.0 the canonical location is project-local; cross-project
260
+ # rollup is the Leader's responsibility via ~/.claude/ralph-desk/registry.jsonl
261
+ # (Worker/Verifier prompts never reference the registry path — see §4.11.c).
262
+ ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
263
+ ANALYTICS_DIR="$DESK/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
264
+ CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
265
+ METADATA_FILE="$ANALYTICS_DIR/metadata.json"
266
+ WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
267
+ VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
268
+ CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
269
+ MEMORY_FILE="$MEMOS_DIR/${SLUG}-memory.md"
270
+ SIGNAL_FILE="$MEMOS_DIR/${SLUG}-iter-signal.json"
271
+ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
272
+ VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
273
+ # v0.14.2 Bug Report #4: codex sometimes writes the verdict file to the
274
+ # pre-v0.13.0 legacy path despite the prompt instructing otherwise (CWD
275
+ # heuristics inside the codex CLI). Track the legacy path so the no-progress
276
+ # watcher and the harvest step can both fall back to it before BLOCKing the
277
+ # campaign. Auto-migration logic lives in _migrate_legacy_verdict().
278
+ LEGACY_VERDICT_FILE="$ROOT/.claude/ralph-desk/memos/${SLUG}-verify-verdict.json"
279
+ COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
280
+ BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
281
+ LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
282
+ STATUS_FILE="$RUNTIME_DIR/status.json"
283
+ SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
284
+ WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
285
+ VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
286
+ COST_LOG="$LOGS_DIR/cost-log.jsonl"
287
+
288
+ # --- Session Naming ---
289
+ TIMESTAMP=$(date +%Y%m%d-%H%M%S)
290
+ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
291
+
292
+ # --- State Tracking ---
293
+ typeset -A LAST_PANE_CONTENT
294
+ typeset -A PANE_IDLE_SINCE
295
+ typeset -A WORKER_RESTARTS
296
+ typeset -A US_FAIL_HISTORY
297
+ STALE_CONTEXT_COUNT=0
298
+ HEARTBEAT_STALE_COUNT=0
299
+ MONITOR_FAILURE_COUNT=0
300
+ CONSECUTIVE_FAILURES=0
301
+ PREV_CONTEXT_HASH=""
302
+ PREV_PRD_HASH=""
303
+ PREV_PRD_US_LIST=""
304
+ _PRD_CHANGED=0
305
+ ITERATION=0
306
+ START_TIME=$(date +%s)
307
+ BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
308
+ CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
309
+ SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
310
+ VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
311
+ CONSENSUS_ROUND=0 # current consensus round for current US
312
+ US_LIST="" # comma-separated US IDs from PRD (per-us mode)
313
+ LOCKFILE_ACQUIRED=0
314
+ LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
315
+ _SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
316
+ _LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
317
+ _MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
318
+ _ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
319
+ _ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
320
+
321
+ # =============================================================================
322
+ # Utility Functions
323
+ # =============================================================================
324
+
325
+ DEBUG="${DEBUG:-0}"
326
+ DEBUG_LOG="$ANALYTICS_DIR/debug.log"
327
+
328
+ # Source shared business logic
329
+ LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
330
+ source "$LIB_DIR/lib_ralph_desk.zsh"
331
+
332
+ # A16: Warn if running in foreground (may conflict with Claude Code pane)
333
+ if [[ -z "${RLP_BACKGROUND:-}" ]]; then
334
+ echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
335
+ echo " Recommended: launch via Bash tool with run_in_background: true" >&2
336
+ echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
337
+ fi
338
+
339
+ # check_dead_pane() — determine if pane command indicates a dead/exited process
340
+ # Engine-aware: bash is normal for codex workers (trigger runs in bash),
341
+ # but indicates dead pane for claude workers.
342
+ # Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
343
+ # Returns: 0 if dead, 1 if alive
344
+ check_dead_pane() {
345
+ local poll_cmd="$1"
346
+ local engine="${2:-claude}"
347
+ local role="${3:-worker}"
348
+
349
+ if [[ -z "$poll_cmd" ]]; then
350
+ return 0 # empty = dead
351
+ elif [[ "$poll_cmd" == "zsh" ]]; then
352
+ return 0 # bare zsh = dead
353
+ elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
354
+ return 0 # bash = dead for claude (codex uses bash trigger)
355
+ fi
356
+ return 1 # alive
357
+ }
358
+
359
+ # launch_worker_codex() — launch codex Worker TUI, send instruction, verify submission
360
+ # Matches launch_worker_claude() pattern for consistent tmux-visible execution.
361
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
362
+ # Returns: 0 on success, 1 on fatal failure
363
+ launch_worker_codex() {
364
+ local pane_id="$1"
365
+ local prompt_file="$2"
366
+ local iter="$3"
367
+ local worker_launch="$4"
368
+
369
+ log " Launching Worker codex TUI in pane $pane_id..."
370
+ # Clean pane before launch: kill any lingering process, ensure fresh shell
371
+ local _pre_cmd
372
+ _pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
373
+ if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
374
+ log_debug "Worker pane has lingering process ($_pre_cmd), cleaning..."
375
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
376
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
377
+ fi
378
+ paste_to_pane "$pane_id" "$worker_launch"
379
+ tmux send-keys -t "$pane_id" C-m
380
+
381
+ # Wait for codex TUI prompt (›) instead of shell prompt
382
+ local _codex_ready=0
383
+ local _codex_wait=0
384
+ while (( _codex_wait < 30 )); do
385
+ sleep 1
386
+ local _pane_text
387
+ _pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
388
+ if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
389
+ _codex_ready=1
390
+ log_debug "Worker codex TUI ready after ${_codex_wait}s"
391
+ break
392
+ fi
393
+ (( _codex_wait++ ))
394
+ done
395
+ if (( ! _codex_ready )); then
396
+ log_error "Worker codex TUI not ready after 30s"
397
+ return 1
398
+ fi
399
+
400
+ # Send instruction to codex TUI
401
+ sleep 1
402
+ local worker_instruction="Read and execute the instructions in $prompt_file"
403
+ paste_to_pane "$pane_id" "$worker_instruction"
404
+ tmux send-keys -t "$pane_id" C-m
405
+ log_debug "Worker codex instruction sent (${#worker_instruction} chars)"
406
+
407
+ # Submit loop — verify codex started working
408
+ local submit_attempts=0
409
+ while (( submit_attempts < 15 )); do
410
+ sleep 2
411
+ local pane_check
412
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
413
+ if echo "$pane_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
414
+ log_debug "Worker codex started working after $((submit_attempts + 1)) checks"
415
+ break
416
+ fi
417
+ if (( submit_attempts == 8 )); then
418
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
419
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
420
+ sleep 0.1
421
+ paste_to_pane "$pane_id" "$worker_instruction"
422
+ tmux send-keys -t "$pane_id" C-m
423
+ fi
424
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
425
+ sleep 0.3
426
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
427
+ (( submit_attempts++ ))
428
+ done
429
+ return 0
430
+ }
431
+
432
+ # launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
433
+ # Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
434
+ # restart recovery on submit failure.
435
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
436
+ # Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
437
+ launch_worker_claude() {
438
+ local pane_id="$1"
439
+ local prompt_file="$2"
440
+ local iter="$3"
441
+ local worker_launch="$4"
442
+
443
+ log " Launching Worker claude in pane $pane_id..."
444
+ paste_to_pane "$pane_id" "$worker_launch"
445
+ tmux send-keys -t "$pane_id" C-m
446
+
447
+ # Wait for claude TUI to be ready
448
+ if ! wait_for_pane_ready "$pane_id" 30; then
449
+ log_error "Worker claude failed to start"
450
+ return 1
451
+ fi
452
+
453
+ # Send instruction to claude TUI
454
+ sleep 3
455
+ local worker_instruction="Read and execute the instructions in $prompt_file"
456
+ paste_to_pane "$pane_id" "$worker_instruction"
457
+ tmux send-keys -t "$pane_id" C-m
458
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
459
+
460
+ # 15-iteration submit loop — verify claude started working
461
+ local submit_attempts=0
462
+ while (( submit_attempts < 15 )); do
463
+ sleep 2
464
+ local pane_check
465
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
466
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
467
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
468
+ log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
469
+ break
470
+ fi
471
+ # Every 3 failed attempts, re-send full instruction
472
+ if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
473
+ log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
474
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
475
+ sleep 0.2
476
+ paste_to_pane "$pane_id" "$worker_instruction"
477
+ sleep 0.15
478
+ tmux send-keys -t "$pane_id" C-m
479
+ sleep 1
480
+ fi
481
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
482
+ sleep 0.3
483
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
484
+ (( submit_attempts++ ))
485
+ done
486
+
487
+ # If 15 attempts failed, restart claude and retry
488
+ if (( submit_attempts >= 15 )); then
489
+ log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
490
+ log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
491
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
492
+ sleep 0.5
493
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
494
+ sleep 2
495
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
496
+ paste_to_pane "$pane_id" "$worker_launch"
497
+ tmux send-keys -t "$pane_id" C-m
498
+ if wait_for_pane_ready "$pane_id" 30; then
499
+ sleep 3
500
+ paste_to_pane "$pane_id" "$worker_instruction"
501
+ tmux send-keys -t "$pane_id" C-m
502
+ log " Worker restarted and instruction re-sent"
503
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
504
+ else
505
+ log_error "Worker restart failed — pane not ready"
506
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
507
+ fi
508
+ fi
509
+
510
+ return 0
511
+ }
512
+
513
+ # launch_verifier_codex() — launch codex Verifier TUI, send instruction, verify submission
514
+ # Matches launch_verifier_claude() pattern for consistent tmux-visible execution.
515
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
516
+ # Returns: 0 on success
517
+ launch_verifier_codex() {
518
+ local pane_id="$1"
519
+ local prompt_file="$2"
520
+ local iter="$3"
521
+ local verifier_launch="$4"
522
+
523
+ log " Launching Verifier codex TUI in pane $pane_id..."
524
+ # Clean pane before launch: kill any lingering process, ensure fresh shell
525
+ local _pre_cmd
526
+ _pre_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null || echo "")
527
+ if [[ "$_pre_cmd" != "zsh" && "$_pre_cmd" != "bash" && -n "$_pre_cmd" ]]; then
528
+ log_debug "Verifier pane has lingering process ($_pre_cmd), cleaning..."
529
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
530
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 1
531
+ fi
532
+ paste_to_pane "$pane_id" "$verifier_launch"
533
+ tmux send-keys -t "$pane_id" C-m
534
+
535
+ # Wait for codex TUI prompt (›) instead of shell prompt
536
+ local _codex_ready=0
537
+ local _codex_wait=0
538
+ while (( _codex_wait < 30 )); do
539
+ sleep 1
540
+ local _pane_text
541
+ _pane_text=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
542
+ if echo "$_pane_text" | grep -q '›' 2>/dev/null; then
543
+ _codex_ready=1
544
+ log_debug "Verifier codex TUI ready after ${_codex_wait}s"
545
+ break
546
+ fi
547
+ (( _codex_wait++ ))
548
+ done
549
+ if (( ! _codex_ready )); then
550
+ log_error "Verifier codex TUI not ready after 30s"
551
+ return 1
552
+ fi
553
+
554
+ sleep 1
555
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
556
+ paste_to_pane "$pane_id" "$verifier_instruction"
557
+ tmux send-keys -t "$pane_id" C-m
558
+ log_debug "Verifier codex instruction sent"
559
+
560
+ # Submit loop — verify codex started working
561
+ local submit_attempts=0
562
+ while (( submit_attempts < 15 )); do
563
+ sleep 2
564
+ local vs_check
565
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
566
+ if echo "$vs_check" | grep -qi "working\|thinking\|Exploring\|Running\|reading\|searching\|editing\|writing" 2>/dev/null; then
567
+ log_debug "Verifier codex started working after $((submit_attempts + 1)) checks"
568
+ break
569
+ fi
570
+ if (( submit_attempts == 8 )); then
571
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
572
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
573
+ sleep 0.1
574
+ paste_to_pane "$pane_id" "$verifier_instruction"
575
+ tmux send-keys -t "$pane_id" C-m
576
+ fi
577
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
578
+ sleep 0.3
579
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
580
+ (( submit_attempts++ ))
581
+ done
582
+ return 0
583
+ }
584
+
585
+ # launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
586
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
587
+ # Returns: 0 on success
588
+ launch_verifier_claude() {
589
+ local pane_id="$1"
590
+ local prompt_file="$2"
591
+ local iter="$3"
592
+ local verifier_launch="$4"
593
+
594
+ log " Launching Verifier claude in pane $pane_id..."
595
+ paste_to_pane "$pane_id" "$verifier_launch"
596
+ tmux send-keys -t "$pane_id" C-m
597
+
598
+ if ! wait_for_pane_ready "$pane_id" 30; then
599
+ log_error "Verifier failed to start"
600
+ return 1
601
+ fi
602
+
603
+ sleep 3
604
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
605
+ paste_to_pane "$pane_id" "$verifier_instruction"
606
+ tmux send-keys -t "$pane_id" C-m
607
+ log_debug "Verifier instruction sent directly"
608
+
609
+ # Submit loop — verify verifier started working
610
+ local submit_attempts=0
611
+ while (( submit_attempts < 15 )); do
612
+ sleep 2
613
+ local vs_check
614
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
615
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
616
+ log_debug "Verifier started working after $((submit_attempts + 1)) checks"
617
+ break
618
+ fi
619
+ if (( submit_attempts == 8 )); then
620
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
621
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
622
+ sleep 0.1
623
+ paste_to_pane "$pane_id" "$verifier_instruction"
624
+ tmux send-keys -t "$pane_id" C-m
625
+ fi
626
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
627
+ sleep 0.3
628
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
629
+ (( submit_attempts++ ))
630
+ done
631
+ return 0
632
+ }
633
+
634
+ # handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
635
+ # On exit: check done-claim, auto-generate iter-signal.
636
+ # Args: $1=iteration $2=signal_file
637
+ # Returns: 0 (signal generated), 1 (error)
638
+ handle_worker_exit_codex() {
639
+ local iter="$1"
640
+ local signal_file="$2"
641
+
642
+ log " Codex worker process exited. Checking for done-claim..."
643
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
644
+ local dc_us_id
645
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
646
+ log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
647
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
648
+ _emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim"
649
+ else
650
+ log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
651
+ local current_us
652
+ current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
653
+ local mem_us
654
+ mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
655
+ [[ -n "$mem_us" ]] && current_us="$mem_us"
656
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
657
+ _emit_a4_fallback_audit "$current_us" "$iter" "codex_exit_no_done_claim"
658
+ fi
659
+ return 0
660
+ }
661
+
662
+ # handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
663
+ # Args: $1=pane_id $2=iteration $3=trigger_file
664
+ # Returns: 0 (restarted), 1 (max restarts exceeded)
665
+ handle_worker_exit_claude() {
666
+ local pane_id="$1"
667
+ local iter="$2"
668
+ local trigger_file="$3"
669
+
670
+ log_error "Worker exited without writing signal file"
671
+ if restart_worker "$pane_id" "$iter" "$trigger_file"; then
672
+ return 0
673
+ else
674
+ return 1
675
+ fi
676
+ }
677
+
678
+ # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
679
+ replace_worker_pane() {
680
+ local old_pane="$1"
681
+ local role="$2" # "worker" or "verifier"
682
+
683
+ log " Replacing dead $role pane $old_pane..."
684
+ tmux kill-pane -t "$old_pane" 2>/dev/null
685
+
686
+ # Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
687
+ local new_pane
688
+ if [[ "$role" == "verifier" ]]; then
689
+ # Verifier goes below worker: split vertically from worker pane
690
+ if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
691
+ new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
692
+ else
693
+ # Fallback: worker pane also dead, split horizontally from leader
694
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
695
+ fi
696
+ else
697
+ # Worker goes above verifier: split vertically before verifier pane
698
+ if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
699
+ new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
700
+ else
701
+ # Fallback: verifier pane also dead, split horizontally from leader
702
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
703
+ fi
704
+ fi
705
+
706
+ log " New $role pane: $new_pane (replaced $old_pane)"
707
+ log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
708
+
709
+ # Update session-config.json with new pane ID
710
+ if [[ -f "$SESSION_CONFIG" ]]; then
711
+ jq --arg role "$role" --arg pane "$new_pane" \
712
+ '.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
713
+ log_debug "Updated session-config.json: $role pane → $new_pane"
714
+ fi
715
+
716
+ echo "$new_pane"
717
+ }
718
+
719
+ # =============================================================================
720
+ # Dependency Checks
721
+ # =============================================================================
722
+
723
+ # --- governance.md s7 step 1: Validate prerequisites before starting ---
724
+ check_dependencies() {
725
+ local missing=0
726
+
727
+ if ! command -v tmux >/dev/null 2>&1; then
728
+ log_error "tmux is required but not found. Install with: brew install tmux"
729
+ missing=1
730
+ fi
731
+
732
+ # claude required only when claude engine is used for Worker or Verifier execution;
733
+ # codex-only campaigns can run without claude — generate_sv_report degrades gracefully
734
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
735
+ if ! command -v claude >/dev/null 2>&1; then
736
+ log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
737
+ missing=1
738
+ fi
739
+ fi
740
+
741
+ if ! command -v jq >/dev/null 2>&1; then
742
+ log_error "jq is required but not found. Install with: brew install jq"
743
+ missing=1
744
+ fi
745
+
746
+ # Codex binary required only when engine=codex or consensus verification is enabled
747
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
748
+ if ! command -v codex >/dev/null 2>&1; then
749
+ log_error "codex CLI not found. Install: npm install -g @openai/codex"
750
+ missing=1
751
+ fi
752
+ fi
753
+
754
+ if (( missing )); then
755
+ exit 1
756
+ fi
757
+
758
+ # Resolve full path to claude binary when claude engine is in use
759
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
760
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
761
+ log " Claude binary: $CLAUDE_BIN"
762
+ fi
763
+
764
+ # Resolve codex binary if needed
765
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
766
+ CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
767
+ log " Codex binary: $CODEX_BIN"
768
+ fi
769
+ }
770
+
771
+ # =============================================================================
772
+ # Session Management (tmux pattern: pane IDs)
773
+ # =============================================================================
774
+
775
+ # --- governance.md s7 step 1: Check for existing sessions ---
776
+ check_existing_sessions() {
777
+ local current_session
778
+ current_session=$(tmux display-message -p '#{session_name}' 2>/dev/null || echo "")
779
+ local existing
780
+ existing=$(tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-${SLUG}-" | grep -v "^${current_session}$" || true)
781
+ if [[ -n "$existing" ]]; then
782
+ log_error "Existing tmux session(s) found for slug '$SLUG':"
783
+ echo "$existing" | while read -r s; do
784
+ echo " - $s"
785
+ done
786
+ echo ""
787
+ echo "Kill existing session first:"
788
+ echo " tmux kill-session -t <session-name>"
789
+ exit 1
790
+ fi
791
+ }
792
+
793
+ # --- governance.md s7 step 1: Create tmux session with pane IDs (%N) ---
794
+ create_session() {
795
+ log "Creating tmux session: $SESSION_NAME"
796
+
797
+ # tmux split-pane pattern
798
+ if [[ -n "${TMUX:-}" ]]; then
799
+ # Inside tmux: split CURRENT pane in place
800
+ # Current pane stays as-is (leader/user stays here)
801
+ # Worker/Verifier appear on the RIGHT, user sees them immediately
802
+ LEADER_PANE=$(tmux display-message -p '#{pane_id}')
803
+ SESSION_NAME=$(tmux display-message -p '#{session_name}')
804
+ log " Splitting current pane in session: $SESSION_NAME"
805
+
806
+ # -h off current pane → right column (worker)
807
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
808
+ # -v off worker → stacked below on right (verifier)
809
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
810
+ else
811
+ # Outside tmux: wrap current terminal into a new tmux session and attach
812
+ # tmux pattern: user sees panes immediately, no separate attach needed
813
+ # US-025 R13 P0: verify tmux new-session exit code; if collision + RLP_BACKGROUND,
814
+ # disambiguate with -bg-<epoch>-<pid> suffix and a residual has-session loop.
815
+ if ! tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" 2>/dev/null; then
816
+ if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then
817
+ if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
818
+ SESSION_NAME="${SESSION_NAME}-bg-$(date +%s)-$$"
819
+ while tmux has-session -t "$SESSION_NAME" 2>/dev/null; do
820
+ SESSION_NAME="${SESSION_NAME}-$(awk 'BEGIN{srand();print int(1000+rand()*9000)}')"
821
+ done
822
+ tmux new-session -d -s "$SESSION_NAME" -x 200 -y 50 -c "$ROOT" || {
823
+ log_error "tmux new-session retry failed for $SESSION_NAME"
824
+ exit 1
825
+ }
826
+ else
827
+ log_error "tmux new-session failed: session $SESSION_NAME already exists (set RLP_BACKGROUND=1 to auto-rename)"
828
+ exit 1
829
+ fi
830
+ else
831
+ log_error "tmux new-session failed and session does not exist: $SESSION_NAME"
832
+ exit 1
833
+ fi
834
+ fi
835
+ # destroy-unattached off keeps the session alive when no tmux client is attached.
836
+ # Best-effort only: it does NOT survive manual `tmux kill-session` or tmux server restart.
837
+ # If either happens, R12 (lifecycle monitor) detects it and writes infra_failure BLOCKED.
838
+ if [[ "${RLP_BACKGROUND:-0}" == "1" ]]; then
839
+ tmux set-option -t "$SESSION_NAME" destroy-unattached off 2>/dev/null
840
+ fi
841
+ LEADER_PANE=$(tmux display-message -p -t "$SESSION_NAME" '#{pane_id}')
842
+ WORKER_PANE=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
843
+ VERIFIER_PANE=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
844
+
845
+ fi
846
+
847
+ # Set pane titles and enable border labels for visual distinction
848
+ local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
849
+ local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
850
+ [[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
851
+ tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
852
+ tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
853
+ tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
854
+ # Color-coded pane borders: green=leader, blue=worker, yellow=verifier
855
+ tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
856
+ tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
857
+ tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
858
+ # Show pane titles in border
859
+ tmux set-option pane-border-status top 2>/dev/null
860
+ tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
861
+
862
+ log " Leader pane: $LEADER_PANE"
863
+ log " Worker pane: $WORKER_PANE"
864
+ log " Verifier pane: $VERIFIER_PANE"
865
+
866
+ # US-024 R12 P0: lifecycle check site #1 — verify all panes/session alive after creation.
867
+ _r12_check_lifecycle "create_session"
868
+
869
+ # AC12: Capture baseline commit before writing session config
870
+ BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
871
+
872
+ # Truncate cost-log for fresh run (previous data in versioned campaign reports)
873
+ > "$COST_LOG"
874
+
875
+ # v5.7 §4.2: WITH_SELF_VERIFICATION=1 is hard-rejected at script entry now,
876
+ # so by the time we reach create_session() the flag is guaranteed to be 0.
877
+ # The legacy "NOTE: Agent-mode only; disabling" log line was removed because
878
+ # the deprecation banner at startup is more honest (we exit 2, we don't
879
+ # silently disable).
880
+
881
+ # Write session config (atomic write)
882
+ echo '{
883
+ "session_name": "'"$SESSION_NAME"'",
884
+ "slug": "'"$SLUG"'",
885
+ "created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
886
+ "baseline_commit": "'"$BASELINE_COMMIT"'",
887
+ "panes": {
888
+ "leader": "'"$LEADER_PANE"'",
889
+ "worker": "'"$WORKER_PANE"'",
890
+ "verifier": "'"$VERIFIER_PANE"'"
891
+ },
892
+ "pid": '$$',
893
+ "root": "'"$ROOT"'",
894
+ "models": {
895
+ "worker": "'"$WORKER_MODEL"'",
896
+ "verifier": "'"$VERIFIER_MODEL"'"
897
+ },
898
+ "engines": {
899
+ "worker": "'"$WORKER_ENGINE"'",
900
+ "verifier": "'"$VERIFIER_ENGINE"'",
901
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
902
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
903
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
904
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
905
+ },
906
+ "verification": {
907
+ "verify_mode": "'"$VERIFY_MODE"'",
908
+ "consensus_mode": "'"$CONSENSUS_MODE"'"
909
+ },
910
+ "config": {
911
+ "max_iter": '"$MAX_ITER"',
912
+ "poll_interval": '"$POLL_INTERVAL"',
913
+ "iter_timeout": '"$ITER_TIMEOUT"',
914
+ "heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
915
+ "max_restarts": '"$MAX_RESTARTS"',
916
+ "idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
917
+ "max_nudges": '"$MAX_NUDGES"',
918
+ "cb_threshold": '"$CB_THRESHOLD"',
919
+ "effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
920
+ "with_self_verification": '"$WITH_SELF_VERIFICATION"',
921
+ "with_self_verification_requested": '"$WITH_SELF_VERIFICATION_REQUESTED"',
922
+ "sv_skipped_reason": "'"$SV_SKIPPED_REASON"'",
923
+ "lane_mode": "'"$LANE_MODE"'",
924
+ "autonomous_mode": '"$AUTONOMOUS_MODE"'
925
+ }
926
+ }' | atomic_write "$SESSION_CONFIG"
927
+
928
+ log " Session config: $SESSION_CONFIG"
929
+ }
930
+
931
+ # =============================================================================
932
+ # Copy-Mode Guard (tmux pattern)
933
+ # =============================================================================
934
+
935
+ # --- governance.md s7 step 5: Check pane_in_mode before every send-keys ---
936
+ check_copy_mode() {
937
+ local pane_id="$1"
938
+ local in_mode
939
+ in_mode=$(tmux display-message -p -t "$pane_id" '#{pane_in_mode}' 2>/dev/null) || return 1
940
+ if [[ "$in_mode" -eq 1 ]]; then
941
+ return 1 # pane is in copy mode, cannot send keys
942
+ fi
943
+ return 0
944
+ }
945
+
946
+ # =============================================================================
947
+ # Verification-Based Send Retry (tmux pattern)
948
+ # =============================================================================
949
+
950
+ # --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
951
+ paste_to_pane() {
952
+ local pane_id="$1"
953
+ local text="$2"
954
+ local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
955
+ echo -n "$text" > "$tmpbuf"
956
+ tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
957
+ tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
958
+ rm -f "$tmpbuf"
959
+ }
960
+
961
+ # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
962
+ safe_send_keys() {
963
+ local pane_id="$1"
964
+ local text="$2"
965
+
966
+ # --- Exact tmux sendToWorker pattern (tmux-session.js:527-626) ---
967
+
968
+ # Guard: copy-mode captures keys; skip entirely
969
+ if ! check_copy_mode "$pane_id"; then
970
+ log_debug " Pane $pane_id in copy mode, skipping send"
971
+ return 1
972
+ fi
973
+
974
+ # Check for trust prompt and auto-dismiss
975
+ local initial_capture
976
+ initial_capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
977
+ local pane_busy=0
978
+ if echo "$initial_capture" | grep -q "esc to interrupt" 2>/dev/null; then
979
+ pane_busy=1
980
+ fi
981
+ if echo "$initial_capture" | grep -q "Do you trust" 2>/dev/null; then
982
+ log_debug " Trust prompt detected, dismissing"
983
+ tmux send-keys -t "$pane_id" C-m
984
+ sleep 0.12
985
+ fi
986
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
987
+ if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
988
+ log_debug " Permission prompt detected, auto-approving"
989
+ tmux send-keys -t "$pane_id" C-m
990
+ sleep 0.3
991
+ fi
992
+ # Auto-dismiss codex update prompt (select Skip)
993
+ if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
994
+ log_debug " Codex update prompt detected, selecting Skip"
995
+ tmux send-keys -t "$pane_id" "2" C-m
996
+ sleep 0.2
997
+ fi
998
+ # Send text via buffer paste (reliable for long strings)
999
+ log_debug " Pasting text to pane $pane_id (${#text} chars)"
1000
+ paste_to_pane "$pane_id" "$text"
1001
+
1002
+ # Allow input buffer to settle (tmux: 150ms)
1003
+ sleep 0.15
1004
+
1005
+ # Submit: up to 6 rounds of C-m double-press
1006
+ local round=0
1007
+ while (( round < 6 )); do
1008
+ sleep 0.1
1009
+ if (( round == 0 && pane_busy )); then
1010
+ # Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
1011
+ tmux send-keys -t "$pane_id" C-m
1012
+ else
1013
+ tmux send-keys -t "$pane_id" C-m
1014
+ sleep 0.2
1015
+ tmux send-keys -t "$pane_id" C-m
1016
+ fi
1017
+ sleep 0.14
1018
+
1019
+ # Check if text was consumed
1020
+ local check_capture
1021
+ check_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
1022
+ if ! echo "$check_capture" | grep -qF "$text" 2>/dev/null; then
1023
+ log_debug " Text consumed after round $((round + 1))"
1024
+ return 0
1025
+ fi
1026
+ sleep 0.14
1027
+ (( round++ ))
1028
+ done
1029
+
1030
+ # Safety gate: copy-mode check
1031
+ if ! check_copy_mode "$pane_id"; then
1032
+ log_debug " Copy mode activated during send, aborting"
1033
+ return 1
1034
+ fi
1035
+
1036
+ # Adaptive fallback: C-u clear line, resend (tmux pattern)
1037
+ log_debug " Adaptive retry — clearing line and resending"
1038
+ tmux send-keys -t "$pane_id" C-u
1039
+ sleep 0.08
1040
+ if ! check_copy_mode "$pane_id"; then
1041
+ return 1
1042
+ fi
1043
+ paste_to_pane "$pane_id" "$text"
1044
+ sleep 0.12
1045
+ local retry_round=0
1046
+ while (( retry_round < 4 )); do
1047
+ tmux send-keys -t "$pane_id" C-m
1048
+ sleep 0.18
1049
+ tmux send-keys -t "$pane_id" C-m
1050
+ sleep 0.14
1051
+ local retry_capture
1052
+ retry_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -5)
1053
+ if ! echo "$retry_capture" | grep -qF "$text" 2>/dev/null; then
1054
+ log_debug " Text consumed after adaptive retry round $((retry_round + 1))"
1055
+ return 0
1056
+ fi
1057
+ (( retry_round++ ))
1058
+ done
1059
+
1060
+ # Fail-open: one last nudge
1061
+ if ! check_copy_mode "$pane_id"; then
1062
+ return 1
1063
+ fi
1064
+ tmux send-keys -t "$pane_id" C-m
1065
+ sleep 0.12
1066
+ tmux send-keys -t "$pane_id" C-m
1067
+ log_debug " Fail-open — text may or may not have been submitted"
1068
+ return 0
1069
+ }
1070
+
1071
+ # =============================================================================
1072
+ # Wait for Pane Ready (tmux pattern: paneLooksReady)
1073
+ # =============================================================================
1074
+
1075
+ wait_for_pane_ready() {
1076
+ local pane_id="$1"
1077
+ local timeout="${2:-10}" # tmux default: 10s
1078
+ local start=$(date +%s)
1079
+ log " Waiting for pane $pane_id ready..."
1080
+ while (( $(date +%s) - start < timeout )); do
1081
+ local captured
1082
+ captured=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null)
1083
+
1084
+ # Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
1085
+ if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
1086
+ log " Trust prompt detected, auto-dismissing..."
1087
+ tmux send-keys -t "$pane_id" C-m
1088
+ sleep 0.12
1089
+ tmux send-keys -t "$pane_id" C-m
1090
+ sleep 2
1091
+ continue
1092
+ fi
1093
+
1094
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
1095
+ if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
1096
+ log " Permission prompt detected, auto-approving..."
1097
+ tmux send-keys -t "$pane_id" C-m
1098
+ sleep 0.5
1099
+ continue
1100
+ fi
1101
+
1102
+ # Auto-dismiss codex update prompt (select Skip = option 2)
1103
+ if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
1104
+ log " Codex update prompt detected, selecting Skip..."
1105
+ tmux send-keys -t "$pane_id" "2" C-m
1106
+ sleep 0.5
1107
+ continue
1108
+ fi
1109
+
1110
+ # tmux paneLooksReady: check each line for prompt char at line start
1111
+ local ready=0
1112
+ echo "$captured" | while IFS= read -r line; do
1113
+ local trimmed="${line## }"
1114
+ if [[ "$trimmed" == ❯* || "$trimmed" == \>* || "$trimmed" == ›* || "$trimmed" == »* ]]; then
1115
+ ready=1
1116
+ break
1117
+ fi
1118
+ done 2>/dev/null
1119
+
1120
+ # Also check via grep as fallback
1121
+ if echo "$captured" | tail -5 | grep -qE '^\s*[❯›]' 2>/dev/null; then
1122
+ ready=1
1123
+ fi
1124
+
1125
+ if (( ready )) || echo "$captured" | tail -3 | grep -qE '^\s*[❯›>]' 2>/dev/null; then
1126
+ # Check no active task running
1127
+ if ! echo "$captured" | grep -q "esc to interrupt" 2>/dev/null; then
1128
+ log " Pane $pane_id is ready."
1129
+ return 0
1130
+ fi
1131
+ fi
1132
+ sleep 0.25
1133
+ done
1134
+ # Timeout — return success anyway (fail-open, let safe_send_keys handle it)
1135
+ log " Pane $pane_id ready timeout after ${timeout}s (proceeding anyway)"
1136
+ return 0
1137
+ }
1138
+
1139
+ # =============================================================================
1140
+ # Heartbeat Monitoring (tmux pattern)
1141
+ # =============================================================================
1142
+
1143
+ # --- governance.md s7 step 5+6: Check heartbeat freshness ---
1144
+ check_heartbeat() {
1145
+ local hb_file="$1"
1146
+ local threshold="$HEARTBEAT_STALE_THRESHOLD"
1147
+
1148
+ if [[ ! -f "$hb_file" ]]; then
1149
+ return 1
1150
+ fi
1151
+
1152
+ local hb_epoch now_epoch
1153
+ # Read epoch seconds directly (avoids timezone parsing bugs)
1154
+ hb_epoch=$(jq -r '.epoch // empty' "$hb_file" 2>/dev/null) || return 1
1155
+
1156
+ if [[ -z "$hb_epoch" ]]; then
1157
+ return 1
1158
+ fi
1159
+
1160
+ now_epoch=$(date +%s)
1161
+ (( now_epoch - hb_epoch < threshold ))
1162
+ }
1163
+
1164
+ # Check if heartbeat indicates process has exited
1165
+ check_heartbeat_exited() {
1166
+ local hb_file="$1"
1167
+ if [[ ! -f "$hb_file" ]]; then
1168
+ return 1
1169
+ fi
1170
+ local hb_status
1171
+ hb_status=$(jq -r '.status // empty' "$hb_file" 2>/dev/null)
1172
+ [[ "$hb_status" == "exited" ]]
1173
+ }
1174
+
1175
+ # =============================================================================
1176
+ # Idle Pane Nudging (tmux pattern)
1177
+ # =============================================================================
1178
+
1179
+ # --- v5.7 §4.13.a: Mid-execution permission-prompt auto-dismiss (Bug 4 fix) ---
1180
+ # claude CLI v2.1.114+ surfaces TUI-layer prompts ("Do you want to create...")
1181
+ # even with --dangerously-skip-permissions on certain Write paths. Without this
1182
+ # helper, Workers/Verifiers hang until IDLE_NUDGE_THRESHOLD timeout.
1183
+ #
1184
+ # Window-bounded match (codex Critic v5.7): require both a prompt phrase AND a
1185
+ # TUI affordance marker on the SAME, PREVIOUS, or NEXT line. Whole-capture dual
1186
+ # grep would let unrelated text trigger Enter (R-V5-9 false-positive).
1187
+ # Per-pane 3-second debounce prevents rapid double-Enter.
1188
+ zmodload zsh/datetime 2>/dev/null || true
1189
+ _now_s() { print -- "${EPOCHSECONDS:-$(date +%s)}"; }
1190
+
1191
+ typeset -gA LAST_AUTO_APPROVE_TS
1192
+ # v5.7 §4.16: track when each pane FIRST entered a prompt-stuck state.
1193
+ # Cleared on first capture without prompt visible. Used for bounded
1194
+ # prompt-stall escalation (BLOCKED `prompt_stall`) so alive-but-stuck
1195
+ # Workers can't infinite-wait (codex Critic HIGH finding).
1196
+ typeset -gA PANE_PROMPT_STUCK_SINCE
1197
+ typeset -gA PANE_DISMISS_FAILED_COUNT
1198
+ PROMPT_STALL_TIMEOUT="${PROMPT_STALL_TIMEOUT:-300}" # 5 min default
1199
+ PROMPT_DISMISS_FAIL_LIMIT="${PROMPT_DISMISS_FAIL_LIMIT:-20}" # ~100s of fruitless dismiss attempts
1200
+
1201
+ # v5.7 §4.17: generic no-progress timeout (codex Critic HIGH — closes the gap
1202
+ # where an undetected prompt or alive-but-frozen Worker bypasses Layer 4).
1203
+ # Independent of prompt detection: if pane content stops changing for this many
1204
+ # seconds AND signal file still missing, write BLOCKED `infra_failure` reason
1205
+ # `worker_no_progress` so silent infinite-wait is impossible.
1206
+ PROGRESS_NO_CHANGE_TIMEOUT="${PROGRESS_NO_CHANGE_TIMEOUT:-600}" # 10 min default
1207
+ typeset -gA PANE_LAST_CHANGE_TS # epoch when content last changed
1208
+ typeset -gA PANE_LAST_CONTENT_FOR_PROGRESS # captured content for diff
1209
+
1210
+ # v0.14.1: codex post-work idle UI grace. When a verifier pane shows codex's
1211
+ # "Worked for Xm Ys" idle line at byte-stasis time, grant one extra
1212
+ # CODEX_IDLE_GRACE_S (default 120s) before BLOCK. Per-pane bookkeeping to
1213
+ # avoid granting it repeatedly. Bug Report #3 (BOS 2026-05-04).
1214
+ CODEX_IDLE_GRACE_S="${CODEX_IDLE_GRACE_S:-120}"
1215
+ typeset -gA PANE_CODEX_IDLE_GRACED
1216
+ # v0.14.2: per-verifier-pane trace flag — log the verdict-lookup outcome
1217
+ # exactly once per byte-stasis transition. Bug Report #4 (BOS 2026-05-05).
1218
+ typeset -gA PANE_VERIFIER_TRACE_LOGGED
1219
+
1220
+ # v5.7 §4.17: default-No prompt detection. Pressing Enter on these means
1221
+ # CANCEL/REJECT, not approve — so we BLOCK with traceability instead of
1222
+ # silently auto-dismissing the wrong way.
1223
+ typeset -g _DEFAULT_NO_RE='\[y/N\]|\(yes/no, default no\)|default[: ]+no|^[[:space:]]*N\)'
1224
+
1225
+ # v5.7 §4.16: broadened prompt detection (codex Critic MEDIUM).
1226
+ # v5.7 §4.20 (E2E real-claude-CLI finding): claude v2.1.114+ uses new trust
1227
+ # prompt format ("Quick safety check: Is this a project you ... trust?")
1228
+ # and a numbered picker with `❯` cursor adjacent to the digit ("❯1.Yes").
1229
+ # Old patterns ("Do you trust") missed it entirely → Worker hung 5min until
1230
+ # iter-timeout. Adds: Quick safety check|trust this (folder|directory) for
1231
+ # PROMPT_RE; ❯\s*\d+\. (zero-or-more space) and `Enter to confirm` / `1\.
1232
+ # (Yes|No)` for AFFORDANCE_RE.
1233
+ typeset -g _PROMPT_RE='Do you (want to|trust)|Confirm execution|Are you sure|Continue\?|Proceed\?|Allow this|Approve this|Press y to|Choose an option|Select \[|Quick safety check|trust this (folder|directory)|Is this a project you'
1234
+ typeset -g _AFFORDANCE_RE='\(y/n\)|\[Y/n\]|\[y/N\]|\(yes/no|❯[[:space:]]*[0-9]+\.|(^|[[:space:]])1\) (Yes|No)|(^|[[:space:]])[YyNn]\)|press (y|enter) to|Enter to confirm'
1235
+
1236
+ # v5.7 §4.18 (E2E real-tmux + omc benchmarking): "active task" markers used
1237
+ # to distinguish a Worker that is busy producing output (and may legitimately
1238
+ # print "(y/n)" inside its body text) from a Worker that is *idle at an
1239
+ # unrecognized prompt*. Mirrors omc-team's `paneHasActiveTask` heuristic
1240
+ # (src/team/tmux-session.ts:659). When ANY of these markers is in the recent
1241
+ # pane tail, the Worker is alive — auto_dismiss must NOT fast-fail on a
1242
+ # suspected-unknown prompt because the affordance text is just transcript.
1243
+ typeset -g _ACTIVE_TASK_RE='esc to interrupt|background terminal running|^[[:space:]]*[·✻][[:space:]]+[A-Za-z]+(\.{3}|…)'
1244
+
1245
+ auto_dismiss_prompts() {
1246
+ local pane_id="$1"
1247
+ local now
1248
+ now=$(_now_s)
1249
+ local last=${LAST_AUTO_APPROVE_TS[$pane_id]:-0}
1250
+
1251
+ local capture
1252
+ # v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt wraps
1253
+ # to ~30 lines on narrow panes. -S -10 missed the question header. -50
1254
+ # covers the full prompt.
1255
+ capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null) || return 0
1256
+
1257
+ # v5.7 §4.21 (E2E real-claude-CLI finding): claude v2.x trust prompt is
1258
+ # multi-line and wraps narrowly, so per-line PROMPT_RE+AFFORDANCE adjacency
1259
+ # misses it. Special-case the signature ("Quick safety check ... Enter to
1260
+ # confirm" with `❯N.Yes` cursor on option 1). This is default-Yes — Enter
1261
+ # approves trust.
1262
+ # §4.21.b: tmux narrow-pane wrap breaks the question phrase across lines
1263
+ # (`Quick safety\n check`). Normalize all whitespace to single spaces so
1264
+ # substring matching works regardless of pane width.
1265
+ local _norm_capture="${capture//[$'\n\r\t']/ }"
1266
+ while [[ "$_norm_capture" == *" "* ]]; do _norm_capture="${_norm_capture// / }"; done
1267
+ if { [[ "$_norm_capture" == *"Quick safety check"* ]] || [[ "$_norm_capture" == *"trust this folder"* ]] || [[ "$_norm_capture" == *"trust this directory"* ]]; } \
1268
+ && [[ "$_norm_capture" == *"Enter to confirm"* ]] \
1269
+ && [[ "$_norm_capture" =~ '❯ ?[0-9]+\. ?Yes' ]]; then
1270
+ if (( now - last >= 3 )); then
1271
+ log " Claude v2.x trust prompt detected in pane $pane_id, auto-approving (Enter)"
1272
+ log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
1273
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1274
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1275
+ fi
1276
+ return 0
1277
+ fi
1278
+ # Older claude trust prompt format (omc-team parity).
1279
+ if [[ "$_norm_capture" == *"Do you trust the contents of this directory"* ]] \
1280
+ && { [[ "$_norm_capture" =~ 'Yes,[[:space:]]*continue' ]] || [[ "$_norm_capture" == *"Press enter to continue"* ]]; }; then
1281
+ if (( now - last >= 3 )); then
1282
+ log " Claude (legacy) trust prompt detected in pane $pane_id, auto-approving (Enter)"
1283
+ log_debug "[FLOW] claude_trust_prompt_auto_approved=true pane=$pane_id"
1284
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1285
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1286
+ fi
1287
+ return 0
1288
+ fi
1289
+
1290
+ local -a lines
1291
+ lines=("${(@f)capture}")
1292
+ local i n=${#lines[@]} prompt_visible=0
1293
+ # v5.7 §4.23 (E2E real-claude-CLI finding): tmux narrow-pane wrap breaks
1294
+ # multi-line prompts (e.g. "Do you want to\nmake this edit to\nfile.md?\n
1295
+ # ❯ 1. Yes") so PROMPT+AFFORDANCE±1 line-adjacency misses them. Fix: run
1296
+ # the match against the LAST 15 normalized lines (whitespace collapsed)
1297
+ # — where the active prompt sits — as a single string. PROMPT_RE +
1298
+ # AFFORDANCE_RE both present → auto-Enter unless DEFAULT_NO_RE present
1299
+ # (BLOCK). §4.17.b is preserved: full-capture default-No scan protects
1300
+ # against scrollback contamination.
1301
+ local _tail_start=$((n > 15 ? n - 14 : 1))
1302
+ local _tail_normalized=""
1303
+ for ((i=_tail_start; i <= n; i++)); do
1304
+ _tail_normalized+="${lines[i]} "
1305
+ done
1306
+ while [[ "$_tail_normalized" == *" "* ]]; do _tail_normalized="${_tail_normalized// / }"; done
1307
+ local default_no_seen=0
1308
+ local sample_pattern="${_tail_normalized:0:120}"
1309
+ if [[ "$_tail_normalized" =~ $_PROMPT_RE ]] && [[ "$_tail_normalized" =~ $_AFFORDANCE_RE ]]; then
1310
+ prompt_visible=1
1311
+ fi
1312
+ # Default-No scan: full capture, not just tail (scrollback contamination guard).
1313
+ if [[ "$capture" =~ $_DEFAULT_NO_RE ]]; then
1314
+ default_no_seen=1
1315
+ fi
1316
+
1317
+ if (( default_no_seen )); then
1318
+ # v5.7 §4.17 + §4.17.b: default-No prompts ([y/N], "default: no") cannot
1319
+ # be auto-Enter'd safely — pressing Enter would CANCEL the operation.
1320
+ # If the pane has ANY default-No prompt visible (even alongside older
1321
+ # default-Yes prompts in scrollback), BLOCK with traceability.
1322
+ log_error "Default-No prompt detected in pane $pane_id — cannot safely auto-dismiss"
1323
+ log_debug "[GOV] default_no_prompt_detected=true pane=$pane_id action=block"
1324
+ write_blocked_sentinel \
1325
+ "Pane shows a default-No / explicit-No-default permission prompt. Auto-Enter would CANCEL the operation rather than approve it. Operator must manually respond with 'y' or extend prompt-handling logic. Pattern: $sample_pattern" \
1326
+ "${CURRENT_US:-ALL}" \
1327
+ "infra_failure"
1328
+ return 0
1329
+ fi
1330
+
1331
+ if (( prompt_visible )); then
1332
+ # All visible prompts are default-Yes-equivalent — safe to auto-Enter.
1333
+ if [[ -z "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
1334
+ PANE_PROMPT_STUCK_SINCE[$pane_id]=$now
1335
+ fi
1336
+ if (( now - last >= 3 )); then
1337
+ log " Permission prompt detected in pane $pane_id, auto-approving (Enter)"
1338
+ log_debug "[FLOW] permission_prompt_auto_approved=true pane=$pane_id"
1339
+ tmux send-keys -t "$pane_id" Enter 2>/dev/null
1340
+ LAST_AUTO_APPROVE_TS[$pane_id]=$now
1341
+ PANE_DISMISS_FAILED_COUNT[$pane_id]=$((${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0} + 1))
1342
+ fi
1343
+ return 0
1344
+ fi
1345
+
1346
+ # v5.7 §4.18: unknown-prompt fast-fail (E2E + omc benchmarking finding).
1347
+ # If pane has an affordance marker (y/n bracket etc.) but NO recognized
1348
+ # PROMPT_RE phrasing, the Worker is likely awaiting an unknown variant of
1349
+ # a yes/no prompt. omc-team's principle (tmux-session.ts:639): never
1350
+ # auto-Enter on unknown prompts — pressing Enter could approve OR cancel
1351
+ # depending on default. BLOCK immediately so the operator can extend the
1352
+ # PROMPT_RE catalog, instead of waiting 10 min for the freeze timeout.
1353
+ #
1354
+ # False-positive guard: skip if any "active task" marker is present
1355
+ # (esc to interrupt / background terminal / spinner) — that means the
1356
+ # Worker is producing output and the affordance text is just transcript.
1357
+ local active=0
1358
+ local affordance_seen=0
1359
+ local sample=""
1360
+ for ((i=1; i <= n; i++)); do
1361
+ if [[ "${lines[i]}" =~ $_ACTIVE_TASK_RE ]]; then
1362
+ active=1
1363
+ break
1364
+ fi
1365
+ done
1366
+ if (( ! active )); then
1367
+ # Only check the last 5 non-empty lines (where an idle prompt would sit).
1368
+ local -a tail_lines
1369
+ tail_lines=()
1370
+ local k
1371
+ for ((k=n; k >= 1 && ${#tail_lines[@]} < 5; k--)); do
1372
+ [[ -z "${lines[k]}" ]] && continue
1373
+ tail_lines=("${lines[k]}" "${tail_lines[@]}")
1374
+ done
1375
+ for line in "${tail_lines[@]}"; do
1376
+ if [[ "$line" =~ $_AFFORDANCE_RE ]]; then
1377
+ affordance_seen=1
1378
+ sample="${line:0:120}"
1379
+ break
1380
+ fi
1381
+ done
1382
+ fi
1383
+ if (( affordance_seen )); then
1384
+ # Re-check default-No (could be the active prompt's bracket — must BLOCK).
1385
+ local default_no_in_tail=0
1386
+ for line in "${tail_lines[@]}"; do
1387
+ if [[ "$line" =~ $_DEFAULT_NO_RE ]]; then
1388
+ default_no_in_tail=1
1389
+ break
1390
+ fi
1391
+ done
1392
+ local reason
1393
+ if (( default_no_in_tail )); then
1394
+ reason="Pane shows a default-No affordance ([y/N], 'default: no') but the surrounding prompt phrasing is not in PROMPT_RE. Auto-Enter would CANCEL. Operator must respond manually or extend PROMPT_RE. Sample: $sample"
1395
+ else
1396
+ reason="Pane shows a y/n affordance marker without a recognized prompt phrasing — likely an unknown CLI prompt variant. Refusing to guess auto-Enter (which could be the wrong default). Operator must respond manually or extend PROMPT_RE. Sample: $sample"
1397
+ fi
1398
+ log_error "Unknown-prompt affordance detected in pane $pane_id — fast-fail BLOCK"
1399
+ log_debug "[GOV] unknown_prompt_detected=true pane=$pane_id action=block default_no=$default_no_in_tail"
1400
+ write_blocked_sentinel "$reason" "${CURRENT_US:-ALL}" "infra_failure"
1401
+ return 0
1402
+ fi
1403
+ # No prompt visible — clear stall tracking so re-entry is fresh.
1404
+ if [[ -n "${PANE_PROMPT_STUCK_SINCE[$pane_id]:-}" ]]; then
1405
+ log_debug "[FLOW] prompt_cleared=true pane=$pane_id"
1406
+ # zsh: unset assoc-array member via reset to empty + delete key.
1407
+ PANE_PROMPT_STUCK_SINCE[$pane_id]=""
1408
+ PANE_DISMISS_FAILED_COUNT[$pane_id]=""
1409
+ unset "PANE_PROMPT_STUCK_SINCE[$pane_id]"
1410
+ unset "PANE_DISMISS_FAILED_COUNT[$pane_id]"
1411
+ fi
1412
+ }
1413
+
1414
+ # v5.7 §4.16: bounded prompt-stall escalation (codex Critic HIGH finding).
1415
+ # Closes the "alive process → extend indefinitely" gap: if a pane stays in
1416
+ # prompt-visible state for PROMPT_STALL_TIMEOUT (default 5min) OR
1417
+ # auto_dismiss has tried PROMPT_DISMISS_FAIL_LIMIT times without progress,
1418
+ # write BLOCKED `prompt_stall` so the campaign exits with traceability
1419
+ # instead of infinite-waiting.
1420
+ #
1421
+ # Returns 0 if pane is fine; returns 1 (and writes BLOCKED sentinel) if
1422
+ # stall threshold exceeded — caller should propagate the failure.
1423
+ check_prompt_stall() {
1424
+ local pane_id="$1"
1425
+ local us_id="${2:-${CURRENT_US:-ALL}}"
1426
+ local stuck_since=${PANE_PROMPT_STUCK_SINCE[$pane_id]:-0}
1427
+ (( stuck_since == 0 )) && return 0
1428
+ local now
1429
+ now=$(_now_s)
1430
+ local stuck_for=$(( now - stuck_since ))
1431
+ local fail_count=${PANE_DISMISS_FAILED_COUNT[$pane_id]:-0}
1432
+
1433
+ if (( stuck_for >= PROMPT_STALL_TIMEOUT )) || (( fail_count >= PROMPT_DISMISS_FAIL_LIMIT )); then
1434
+ log_error "Pane $pane_id stuck on prompt for ${stuck_for}s ($fail_count dismiss attempts) — escalating to BLOCKED"
1435
+ log_debug "[GOV] iter=${ITERATION:-0} prompt_stall_escalated=true pane=$pane_id stuck_for=${stuck_for}s dismiss_attempts=$fail_count threshold=${PROMPT_STALL_TIMEOUT}s"
1436
+ write_blocked_sentinel \
1437
+ "Pane stuck on TUI prompt for ${stuck_for}s after ${fail_count} dismiss attempts. Auto-dismiss patterns may need to be widened (see ~/.claude/ralph-desk/known-prompts.txt convention) or the underlying claude CLI prompt is genuinely unsupported. No documentation produced for this iteration." \
1438
+ "$us_id" \
1439
+ "infra_failure"
1440
+ return 1
1441
+ fi
1442
+ return 0
1443
+ }
1444
+
1445
+ # v0.14.1 / v0.14.2: codex post-work idle UI detector. The codex CLI shows
1446
+ # a status line like "─ Worked for 5m 36s ──" + a "› " prompt + "Context
1447
+ # X% left" / model + suggestion ("Improve documentation in @filename")
1448
+ # after it finishes the verifier task and is waiting for the next user
1449
+ # input. This is NOT a permission prompt — it is a successful idle state.
1450
+ # The byte-stasis check below mistook this for "frozen" and BLOCKED a
1451
+ # verifier whose verdict file was already on disk. v0.14.2 Bug Report #4
1452
+ # observed the v0.14.1 patterns being too narrow (BOS 12th launch had
1453
+ # extra horizontal-rule wrapping that broke the strict dash-bracket regex)
1454
+ # — relaxed below to multiple independent markers; ANY one fires idle.
1455
+ is_codex_idle_ui() {
1456
+ local pane_text="$1"
1457
+ # 1. "Worked for Xm Ys" — most reliable codex idle marker.
1458
+ print -- "$pane_text" | grep -qE 'Worked for [0-9]+m [0-9]+s' && return 0
1459
+ # 2. "Context X% left" status bar — appears whenever codex is alive +
1460
+ # waiting at the prompt; captures the case where horizontal rules
1461
+ # above were stripped by tmux capture truncation.
1462
+ print -- "$pane_text" | grep -qE 'Context [0-9]+%[[:space:]]*left' && return 0
1463
+ # 3. codex model + branch line (e.g. "gpt-5.5 high · feature/...") —
1464
+ # only printed alongside the idle prompt, never during work.
1465
+ print -- "$pane_text" | grep -qE 'gpt-[0-9]+(\.[0-9]+)? (low|medium|high|xhigh) ·' && return 0
1466
+ # 4. codex default-suggestion prompt prefix at line start. v0.14.1 had
1467
+ # only "›" but BOS Bug #4 showed the leading character can be wrapped
1468
+ # by tmux narrowness — also accept the suggestion phrases verbatim.
1469
+ print -- "$pane_text" | grep -qE 'Improve documentation in @|Summarize recent commits|Explain (this )?code' && return 0
1470
+ return 1
1471
+ }
1472
+
1473
+ # v0.14.2 Bug Report #4 H1: codex sometimes lands the verdict at the
1474
+ # pre-v0.13.0 legacy path (`<root>/.claude/ralph-desk/memos/...`) instead
1475
+ # of `.rlp-desk/memos/`, even when the prompt instructs otherwise. When
1476
+ # we observe the legacy file with valid JSON, atomically rename it into
1477
+ # place so the rest of the pipeline (harvest + analytics + sentinels)
1478
+ # sees a single canonical path. Best-effort: any failure leaves the file
1479
+ # untouched and the campaign keeps polling.
1480
+ _migrate_legacy_verdict() {
1481
+ [[ -n "${LEGACY_VERDICT_FILE:-}" && -f "$LEGACY_VERDICT_FILE" ]] || return 1
1482
+ jq -e . "$LEGACY_VERDICT_FILE" >/dev/null 2>&1 || return 1
1483
+ log "Verdict file found at legacy path ${LEGACY_VERDICT_FILE} — moving to ${VERDICT_FILE}"
1484
+ log_debug "[GOV] iter=${ITERATION:-0} legacy_verdict_migrated=true from=${LEGACY_VERDICT_FILE} to=${VERDICT_FILE}"
1485
+ mkdir -p "$(dirname "$VERDICT_FILE")" 2>/dev/null
1486
+ mv -f "$LEGACY_VERDICT_FILE" "$VERDICT_FILE" 2>/dev/null && return 0
1487
+ return 1
1488
+ }
1489
+
1490
+ # v0.14.1 / v0.14.2: verdict-aware short-circuit. When the pane being
1491
+ # polled is the verifier pane AND a valid verdict file already exists on
1492
+ # disk (canonical path OR legacy path that we then auto-migrate), the
1493
+ # verifier has finished its work — the harvest step (run_single_verifier
1494
+ # / consensus loop) is the one that should observe the verdict, not the
1495
+ # generic no-progress watcher. Returning 0 here lets the outer loop keep
1496
+ # polling instead of escalating BLOCKED. Bug Reports #3 (BOS 2026-05-04)
1497
+ # + #4 (BOS 2026-05-05).
1498
+ _verifier_pane_has_verdict() {
1499
+ local pane_id="$1"
1500
+ [[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]] || return 1
1501
+ # Canonical path first.
1502
+ if [[ -n "${VERDICT_FILE:-}" && -f "$VERDICT_FILE" ]]; then
1503
+ jq -e . "$VERDICT_FILE" >/dev/null 2>&1 && return 0
1504
+ fi
1505
+ # v0.14.2 Fix-D: codex may have written to the legacy path. Try to
1506
+ # migrate; success means the canonical file is now in place.
1507
+ _migrate_legacy_verdict && return 0
1508
+ return 1
1509
+ }
1510
+
1511
+ # v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout — independent
1512
+ # of prompt detection. Closes the gap where an undetected prompt or alive-
1513
+ # but-frozen Worker can bypass Layer 4 and infinite-wait.
1514
+ #
1515
+ # Strategy: capture pane content each call, hash/compare to last; if
1516
+ # unchanged for PROGRESS_NO_CHANGE_TIMEOUT (default 10min), write BLOCKED.
1517
+ # Returns 0 if pane is making progress (or first call); 1 (and writes
1518
+ # BLOCKED) if no-progress threshold exceeded.
1519
+ check_no_progress() {
1520
+ local pane_id="$1"
1521
+ local us_id="${2:-${CURRENT_US:-ALL}}"
1522
+ local now
1523
+ now=$(_now_s)
1524
+ local capture
1525
+ capture=$(tmux capture-pane -t "$pane_id" -p -S -20 2>/dev/null) || return 0
1526
+
1527
+ # v0.14.1 Fix-A / v0.14.2 Fix-D: codex verifier writes verdict, then
1528
+ # sits at "Worked for Xm Ys" idle UI. byte-stasis would BLOCK after
1529
+ # 600s even though the verdict is on disk. Check both canonical and
1530
+ # legacy verdict paths — auto-migrate legacy if found — and defer to
1531
+ # the harvest step when the pane is a verifier pane.
1532
+ if _verifier_pane_has_verdict "$pane_id"; then
1533
+ PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
1534
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1535
+ return 0
1536
+ fi
1537
+ # v0.14.2: root-cause tracing for Bug Report #4. When the watcher is
1538
+ # examining a verifier pane that does NOT have a verdict yet, log once
1539
+ # per byte-stasis transition so post-mortem can tell whether the
1540
+ # verdict was missing entirely vs. the idle-UI grace was the gating
1541
+ # factor. Idempotent flag lives in PANE_VERIFIER_TRACE_LOGGED.
1542
+ if [[ "$pane_id" == "${VERIFIER_PANE:-}" || "$pane_id" == "${FINAL_VERIFIER_PANE:-}" ]]; then
1543
+ if [[ -z "${PANE_VERIFIER_TRACE_LOGGED[$pane_id]:-}" ]]; then
1544
+ PANE_VERIFIER_TRACE_LOGGED[$pane_id]=1
1545
+ log_debug "[GOV] iter=${ITERATION:-0} verifier_progress_check=miss pane=$pane_id verdict_canonical=${VERDICT_FILE} verdict_canonical_exists=$([[ -f "$VERDICT_FILE" ]] && echo true || echo false) verdict_legacy=${LEGACY_VERDICT_FILE:-unset} verdict_legacy_exists=$([[ -f "${LEGACY_VERDICT_FILE:-/nonexistent}" ]] && echo true || echo false)"
1546
+ fi
1547
+ fi
1548
+
1549
+ local last_content="${PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]:-}"
1550
+ if [[ "$capture" != "$last_content" ]]; then
1551
+ PANE_LAST_CONTENT_FOR_PROGRESS[$pane_id]="$capture"
1552
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1553
+ return 0
1554
+ fi
1555
+
1556
+ local last_change=${PANE_LAST_CHANGE_TS[$pane_id]:-0}
1557
+ if (( last_change == 0 )); then
1558
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1559
+ return 0
1560
+ fi
1561
+
1562
+ local frozen_for=$(( now - last_change ))
1563
+ if (( frozen_for >= PROGRESS_NO_CHANGE_TIMEOUT )); then
1564
+ # v0.14.1 Fix-B: even without a verdict file, codex sometimes parks at
1565
+ # its idle UI mid-run (e.g. partial-write window before atomic mv).
1566
+ # Grant one-time +CODEX_IDLE_GRACE_S grace before escalating so we do
1567
+ # not BLOCK at the exact second the verdict is being mv'd into place.
1568
+ if is_codex_idle_ui "$capture"; then
1569
+ local already_graced="${PANE_CODEX_IDLE_GRACED[$pane_id]:-0}"
1570
+ if (( already_graced == 0 )); then
1571
+ PANE_CODEX_IDLE_GRACED[$pane_id]=1
1572
+ PANE_LAST_CHANGE_TS[$pane_id]=$now
1573
+ log "Pane $pane_id at codex idle UI for ${frozen_for}s — granting +${CODEX_IDLE_GRACE_S}s grace before BLOCK escalation"
1574
+ log_debug "[GOV] iter=${ITERATION:-0} codex_idle_grace=true pane=$pane_id grace_s=${CODEX_IDLE_GRACE_S}"
1575
+ return 0
1576
+ fi
1577
+ fi
1578
+ log_error "Pane $pane_id has not changed for ${frozen_for}s — alive but frozen. Escalating to BLOCKED."
1579
+ log_debug "[GOV] iter=${ITERATION:-0} no_progress_escalated=true pane=$pane_id frozen_for=${frozen_for}s threshold=${PROGRESS_NO_CHANGE_TIMEOUT}s"
1580
+ write_blocked_sentinel \
1581
+ "Pane content has been unchanged for ${frozen_for}s (>= ${PROGRESS_NO_CHANGE_TIMEOUT}s threshold). Worker process may be alive but stuck on an undetected prompt, hung network call, or genuine deadlock. No documentation produced; manual inspection required." \
1582
+ "$us_id" \
1583
+ "infra_failure"
1584
+ return 1
1585
+ fi
1586
+ return 0
1587
+ }
1588
+
1589
+ # --- governance.md s7 step 5+6: Nudge idle panes ---
1590
+ check_and_nudge_idle_pane() {
1591
+ local pane_id="$1"
1592
+ local nudge_count_var="$2"
1593
+
1594
+ # v5.7 §4.13.a: auto-dismiss permission prompts before idle check.
1595
+ # Otherwise Worker hangs at "Do you want to create..." until nudge timeout.
1596
+ auto_dismiss_prompts "$pane_id"
1597
+
1598
+ local current_content
1599
+ current_content=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null | tail -3)
1600
+
1601
+ if [[ "$current_content" == "${LAST_PANE_CONTENT[$pane_id]:-}" ]]; then
1602
+ local idle_since="${PANE_IDLE_SINCE[$pane_id]:-$(date +%s)}"
1603
+ local now
1604
+ now=$(date +%s)
1605
+ if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
1606
+ # A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
1607
+ local _nudge_capture
1608
+ _nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
1609
+ if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
1610
+ log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
1611
+ else
1612
+ local count=${(P)nudge_count_var}
1613
+ if (( count < MAX_NUDGES )); then
1614
+ log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
1615
+ safe_send_keys "$pane_id" ""
1616
+ (( count++ ))
1617
+ eval "$nudge_count_var=$count"
1618
+ fi
1619
+ fi
1620
+ fi
1621
+ else
1622
+ LAST_PANE_CONTENT[$pane_id]="$current_content"
1623
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
1624
+ fi
1625
+ }
1626
+
1627
+ # =============================================================================
1628
+ # Exponential Backoff Restart (tmux pattern)
1629
+ # =============================================================================
1630
+
1631
+ # --- governance.md s7 step 5: Restart dead workers with backoff ---
1632
+ restart_worker() {
1633
+ local pane_id="$1"
1634
+ local iter="$2"
1635
+ local trigger_file="$3"
1636
+
1637
+ # Codex workers are 1-shot exec; restart is not applicable
1638
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1639
+ log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
1640
+ return 1
1641
+ fi
1642
+
1643
+ local restart_count="${WORKER_RESTARTS[$iter]:-0}"
1644
+
1645
+ if (( restart_count >= MAX_RESTARTS )); then
1646
+ log_error "Worker exceeded max restarts ($MAX_RESTARTS) for iteration $iter"
1647
+ return 1 # caller writes BLOCKED
1648
+ fi
1649
+
1650
+ # Exponential backoff: 5s, 10s, 20s, 60s (cap)
1651
+ local -a delays=(5 10 20 60)
1652
+ local delay=${delays[$((restart_count + 1))]:-60}
1653
+ log " Restarting worker (attempt $((restart_count + 1))/$MAX_RESTARTS) after ${delay}s backoff..."
1654
+ sleep "$delay"
1655
+
1656
+ # Kill existing claude, wait for shell prompt
1657
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
1658
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
1659
+ sleep 2
1660
+
1661
+ # Re-launch worker (tmux interactive pattern)
1662
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1663
+ safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
1664
+ else
1665
+ safe_send_keys "$pane_id" "$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
1666
+ fi
1667
+ WORKER_RESTARTS[$iter]=$((restart_count + 1))
1668
+ return 0
1669
+ }
1670
+
1671
+ # =============================================================================
1672
+ # Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
1673
+ # =============================================================================
1674
+
1675
+ # Per-US PRD injection helper
1676
+ # Substitutes the full PRD path with a per-US split path in the Worker prompt base.
1677
+ # Falls back to the full PRD with a stderr warning if the split file is missing.
1678
+ # Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
1679
+ inject_per_us_prd() {
1680
+ local prompt_base="$1"
1681
+ local full_prd="$2"
1682
+ local per_us_prd="${3:-}"
1683
+
1684
+ if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
1685
+ sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
1686
+ else
1687
+ if [[ -n "$per_us_prd" ]]; then
1688
+ echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
1689
+ fi
1690
+ cat "$prompt_base"
1691
+ fi
1692
+ }
1693
+
1694
+ # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
1695
+ # NEVER send prompt content through tmux send-keys.
1696
+ # Write payloads to files, send only short trigger commands (<200 chars).
1697
+ write_worker_trigger() {
1698
+ local iter="$1"
1699
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-prompt.md"
1700
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-trigger.sh"
1701
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).worker-output.log"
1702
+
1703
+ # Build the worker prompt: base prompt + iteration context
1704
+ local contract
1705
+ contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -5)
1706
+
1707
+ # Check for fix contract from previous verifier failure
1708
+ local prev_iter=$((iter - 1))
1709
+ local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
1710
+
1711
+ # Compute next unverified US before prompt assembly (required for per-US PRD injection)
1712
+ local next_us=""
1713
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1714
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
1715
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
1716
+ next_us="$us"
1717
+ break
1718
+ fi
1719
+ done
1720
+ fi
1721
+
1722
+ {
1723
+ # Per-US PRD injection: substitute full PRD path with per-US split path when available
1724
+ local per_us_prd=""
1725
+ [[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
1726
+ inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
1727
+ echo ""
1728
+ echo "---"
1729
+ echo "## Iteration Context"
1730
+ echo "- **Iteration**: $iter"
1731
+ echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
1732
+ echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
1733
+ if (( _PRD_CHANGED )); then
1734
+ echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
1735
+ fi
1736
+
1737
+ # Include fix contract if previous verifier failed
1738
+ if [[ -f "$fix_contract_file" ]]; then
1739
+ echo ""
1740
+ echo "---"
1741
+ echo "## IMPORTANT: Fix Contract from Verifier (iteration $prev_iter)"
1742
+ echo "The Verifier REJECTED your previous work. You MUST fix the issues below."
1743
+ echo "Do NOT just resubmit — actually change the code to address each issue."
1744
+ echo ""
1745
+ cat "$fix_contract_file"
1746
+ fi
1747
+
1748
+ # Per-US mode: tell Worker exactly which US to work on
1749
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
1750
+ if [[ -n "$next_us" ]]; then
1751
+ echo ""
1752
+ echo "---"
1753
+ echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
1754
+ echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
1755
+ echo "The Leader has determined that **${next_us}** is the next unverified story."
1756
+ echo "You MUST implement ONLY **${next_us}** in this iteration."
1757
+ echo "Do NOT implement any other user stories."
1758
+ # Per-US test-spec injection: point Worker to scoped test-spec if available
1759
+ local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
1760
+ if [[ -f "$per_us_test_spec" ]]; then
1761
+ echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
1762
+ else
1763
+ echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1764
+ fi
1765
+ echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
1766
+ echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
1767
+ echo ""
1768
+ echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
1769
+ elif [[ -n "$VERIFIED_US" ]]; then
1770
+ # All individual US verified — this is the final full verify iteration
1771
+ echo ""
1772
+ echo "---"
1773
+ echo "## FINAL VERIFICATION ITERATION"
1774
+ echo "All individual US have been verified: $VERIFIED_US"
1775
+ echo "Run all tests and verification commands to confirm everything works together."
1776
+ echo "Signal verify with us_id=\"ALL\" for the final full verification."
1777
+ fi
1778
+ elif [[ "$VERIFY_MODE" = "batch" ]]; then
1779
+ echo ""
1780
+ echo "---"
1781
+ if [[ -n "$VERIFIED_US" ]]; then
1782
+ echo "## BATCH MODE — CONTINUE FROM PARTIAL PROGRESS"
1783
+ echo "The following US have already been verified: **$VERIFIED_US**"
1784
+ echo "- Do NOT re-implement these — they are done."
1785
+ echo "- Focus ONLY on the remaining unverified user stories."
1786
+ echo '- Signal verify with us_id="ALL" when the remaining stories are complete.'
1787
+ else
1788
+ echo "## BATCH MODE OVERRIDE"
1789
+ echo "Ignore any per-US signal instructions above. In batch mode:"
1790
+ echo "- Implement ALL user stories in this iteration"
1791
+ echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
1792
+ echo "- Do NOT signal verify after individual stories"
1793
+ fi
1794
+ fi
1795
+
1796
+ # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1797
+ if (( AUTONOMOUS_MODE )); then
1798
+ echo ""
1799
+ echo "---"
1800
+ echo "## AUTONOMOUS MODE"
1801
+ echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1802
+ echo "**Resolution priority**: PRD > test-spec > context > memory"
1803
+ echo "If documents disagree, follow PRD and proceed. Log any conflict you find by"
1804
+ echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1805
+ echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1806
+ echo "Do NOT wait for human input. Keep working."
1807
+ fi
1808
+ } | atomic_write "$prompt_file"
1809
+
1810
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1811
+ # Engine-specific launch command (expanded at write time)
1812
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1813
+ local engine_cmd="${CODEX_BIN:-codex} \\
1814
+ -m $WORKER_CODEX_MODEL \\
1815
+ -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
1816
+ --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1817
+ \"\$(cat $prompt_file)\""
1818
+ local engine_comment="# Run codex with fresh context (fallback trigger — TUI primary launch via launch_worker_codex)"
1819
+ else
1820
+ local engine_cmd
1821
+ engine_cmd=$(build_claude_cmd print "$WORKER_MODEL" "$prompt_file" "$output_log" "$WORKER_EFFORT")
1822
+ local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 5)"
1823
+ fi
1824
+
1825
+ {
1826
+ cat <<TRIGGER_EOF
1827
+ #!/bin/zsh
1828
+ # Trigger for iteration $iter worker - generated by run_ralph_desk.zsh
1829
+ # DO NOT use exec here -- it breaks heartbeat cleanup
1830
+
1831
+ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
1832
+
1833
+ # Background heartbeat writer (tmux pattern)
1834
+ (
1835
+ while true; do
1836
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1837
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1838
+ sleep 15
1839
+ done
1840
+ ) &
1841
+ HEARTBEAT_PID=\$!
1842
+
1843
+ $engine_comment
1844
+ $engine_cmd
1845
+
1846
+ # Cleanup heartbeat writer
1847
+ kill \$HEARTBEAT_PID 2>/dev/null
1848
+ wait \$HEARTBEAT_PID 2>/dev/null
1849
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1850
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1851
+ TRIGGER_EOF
1852
+ } | atomic_write "$trigger_file"
1853
+ chmod +x "$trigger_file"
1854
+
1855
+ log " Worker prompt: $prompt_file"
1856
+ log " Worker trigger: $trigger_file"
1857
+ }
1858
+
1859
+ write_verifier_trigger() {
1860
+ local iter="$1"
1861
+ local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
1862
+ local verifier_model="${3:-$VERIFIER_MODEL}"
1863
+ local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
1864
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1865
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1866
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
1867
+
1868
+ # Read us_id from iter-signal.json for per-US scoping
1869
+ local us_id=""
1870
+ if [[ -f "$SIGNAL_FILE" ]]; then
1871
+ us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1872
+ fi
1873
+
1874
+ # Build verifier prompt from base with US scope
1875
+ {
1876
+ cat "$VERIFIER_PROMPT_BASE"
1877
+ echo ""
1878
+ echo "---"
1879
+ echo "## Verification Context"
1880
+ echo "- **Iteration**: $iter"
1881
+ echo "- **Done Claim**: $DONE_CLAIM_FILE"
1882
+ echo "- **Verify Mode**: $VERIFY_MODE"
1883
+ if [[ -n "$us_id" ]]; then
1884
+ if [[ "$us_id" = "ALL" ]]; then
1885
+ echo "- **Scope**: FULL VERIFY — check ALL acceptance criteria from the PRD"
1886
+ else
1887
+ echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
1888
+ fi
1889
+ if [[ -n "$VERIFIED_US" ]]; then
1890
+ echo "- **Previously verified US**: $VERIFIED_US"
1891
+ echo "- **Note**: Skip re-verifying the above US. Focus on unverified stories."
1892
+ fi
1893
+ fi
1894
+
1895
+ # Autonomous mode: don't stop on ambiguity, PRD is authoritative
1896
+ if (( AUTONOMOUS_MODE )); then
1897
+ echo ""
1898
+ echo "---"
1899
+ echo "## AUTONOMOUS MODE"
1900
+ echo "Do NOT stop or ask questions when encountering ambiguity or document conflicts."
1901
+ echo "**Resolution priority**: PRD > test-spec > context > memory"
1902
+ echo "If documents disagree, follow PRD and proceed. Log any conflict by"
1903
+ echo "appending to \`$LOGS_DIR/conflict-log.jsonl\` in format:"
1904
+ echo ' {"iteration":N,"us_id":"US-NNN","source_a":"prd","source_b":"test-spec","conflict":"description","resolution":"followed PRD"}'
1905
+ echo "Do NOT wait for human input. Keep verifying."
1906
+ fi
1907
+ } | atomic_write "$prompt_file"
1908
+
1909
+ # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
1910
+ # Engine-specific launch command (expanded at write time)
1911
+ if [[ "$verifier_engine" = "codex" ]]; then
1912
+ local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
1913
+ -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
1914
+ --disable plugins --dangerously-bypass-approvals-and-sandbox \\
1915
+ \"\$(cat $prompt_file)\" \\
1916
+ > >(tee $output_log) 2>&1"
1917
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 7) — process substitution preserves tty"
1918
+ else
1919
+ local engine_cmd
1920
+ engine_cmd=$(build_claude_cmd print "$verifier_model" "$prompt_file" "$output_log" "$VERIFIER_EFFORT")
1921
+ local engine_comment="# Run claude with fresh context, no MCP/skills (governance.md s7 step 7)"
1922
+ fi
1923
+
1924
+ {
1925
+ cat <<TRIGGER_EOF
1926
+ #!/bin/zsh
1927
+ # Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
1928
+ # DO NOT use exec here -- it breaks heartbeat cleanup
1929
+
1930
+ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
1931
+
1932
+ # Background heartbeat writer (tmux pattern)
1933
+ (
1934
+ while true; do
1935
+ echo '{"epoch":'\$(date +%s)',"pid":'"\$\$"'}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1936
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1937
+ sleep 15
1938
+ done
1939
+ ) &
1940
+ HEARTBEAT_PID=\$!
1941
+
1942
+ $engine_comment
1943
+ $engine_cmd
1944
+
1945
+ # Cleanup heartbeat writer
1946
+ kill \$HEARTBEAT_PID 2>/dev/null
1947
+ wait \$HEARTBEAT_PID 2>/dev/null
1948
+ echo '{"epoch":'\$(date +%s)',"status":"exited"}' > "\${HEARTBEAT_FILE}.tmp.\$\$"
1949
+ mv "\${HEARTBEAT_FILE}.tmp.\$\$" "\$HEARTBEAT_FILE"
1950
+ TRIGGER_EOF
1951
+ } | atomic_write "$trigger_file"
1952
+ chmod +x "$trigger_file"
1953
+
1954
+ log " Verifier prompt: $prompt_file"
1955
+ log " Verifier trigger: $trigger_file"
1956
+ }
1957
+
1958
+ # =============================================================================
1959
+ # Cleanup (trap handler)
1960
+ # =============================================================================
1961
+
1962
+ cleanup() {
1963
+ log "Cleaning up..."
1964
+
1965
+ # Remove lockfile
1966
+ if (( LOCKFILE_ACQUIRED )); then
1967
+ rm -f "$LOCKFILE_PATH" 2>/dev/null
1968
+ else
1969
+ log_debug "cleanup: lockfile not owned by this process, skipping removal"
1970
+ fi
1971
+
1972
+ # US-026 R14 P0: remove project-scoped runner lockfile if owned by this slug
1973
+ if [[ -f "$RUNNER_LOCKFILE_PATH" ]]; then
1974
+ local own_slug
1975
+ own_slug=$(jq -r '.slug' "$RUNNER_LOCKFILE_PATH" 2>/dev/null)
1976
+ if [[ "$own_slug" == "$SLUG" ]]; then
1977
+ rm -rf "$RUNNER_LOCKDIR" "$RUNNER_LOCKFILE_PATH" 2>/dev/null
1978
+ fi
1979
+ fi
1980
+
1981
+ # Kill claude processes then kill panes
1982
+ log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
1983
+ if [[ -n "${WORKER_PANE:-}" ]]; then
1984
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1985
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
1986
+ fi
1987
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
1988
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1989
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1990
+ fi
1991
+ sleep 2
1992
+ # Kill panes on completion
1993
+ if [[ -n "${WORKER_PANE:-}" ]]; then
1994
+ tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
1995
+ fi
1996
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
1997
+ tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
1998
+ fi
1999
+ log " Panes cleaned up."
2000
+
2001
+ # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
2002
+ setopt local_options nonomatch 2>/dev/null
2003
+ rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
2004
+
2005
+ # AC4: Generate campaign report on all terminal states (always-on)
2006
+ generate_campaign_report
2007
+
2008
+ # US-001: Generate SV report after campaign report (tmux mode)
2009
+ generate_sv_report
2010
+
2011
+ # Print summary
2012
+ local end_time
2013
+ end_time=$(date +%s)
2014
+ local elapsed=$(( end_time - START_TIME ))
2015
+ local minutes=$(( elapsed / 60 ))
2016
+ local seconds=$(( elapsed % 60 ))
2017
+
2018
+ local final_status="UNKNOWN"
2019
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
2020
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
2021
+ else final_status="TIMEOUT"; fi
2022
+
2023
+ # --- Update metadata.json with final status ---
2024
+ if [[ -f "$METADATA_FILE" ]]; then
2025
+ jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
2026
+ '.campaign_status = $status | .end_time = $end_time' \
2027
+ "$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
2028
+ fi
2029
+
2030
+ if (( DEBUG )); then
2031
+ local end_ts=$(date +%s)
2032
+ local elapsed=$((end_ts - START_TIME))
2033
+
2034
+ log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
2035
+
2036
+ # --- Validation ---
2037
+ log_debug "[FLOW] === Execution Validation ==="
2038
+
2039
+ # 1. Did the correct verify mode run?
2040
+ log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
2041
+
2042
+ # 2. Per-US: were all US individually verified?
2043
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2044
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2045
+ local expected_us=""
2046
+ if [[ -f "$prd_file" ]]; then
2047
+ expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2048
+ fi
2049
+ local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
2050
+ local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
2051
+
2052
+ if [[ "$final_status" = "COMPLETE" ]]; then
2053
+ if (( verified_count >= expected_count )); then
2054
+ log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
2055
+ else
2056
+ log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
2057
+ fi
2058
+ else
2059
+ log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
2060
+ fi
2061
+ fi
2062
+
2063
+ # 3. Consensus: were both engines used?
2064
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
2065
+ if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
2066
+ log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
2067
+ else
2068
+ log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
2069
+ fi
2070
+ fi
2071
+
2072
+ # 4. Engine match: did the configured engines actually run?
2073
+ local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
2074
+ local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
2075
+ log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
2076
+
2077
+ # 5. Fix loops: how many fix contracts were generated?
2078
+ local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
2079
+ log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
2080
+
2081
+ # 6. Circuit breakers: any triggered?
2082
+ local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
2083
+ log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
2084
+
2085
+ # 7. Overall result
2086
+ log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
2087
+ fi
2088
+
2089
+ echo ""
2090
+ echo "============================================================"
2091
+ echo " Ralph Desk Tmux Runner - Session Complete"
2092
+ echo "============================================================"
2093
+ echo " Session: $SESSION_NAME"
2094
+ echo " Slug: $SLUG"
2095
+ echo " Iterations: $ITERATION / $MAX_ITER"
2096
+ echo " Elapsed: ${minutes}m ${seconds}s"
2097
+ echo ""
2098
+
2099
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
2100
+ echo " Final State: COMPLETE"
2101
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then
2102
+ echo " Final State: BLOCKED"
2103
+ else
2104
+ echo " Final State: STOPPED (interrupted or timeout)"
2105
+ fi
2106
+
2107
+ echo ""
2108
+ echo " Tmux session left alive for inspection:"
2109
+ echo " tmux attach -t $SESSION_NAME"
2110
+ echo " tmux kill-session -t $SESSION_NAME"
2111
+ echo "============================================================"
2112
+ }
2113
+
2114
+ # =============================================================================
2115
+ # Poll Loop (used for both Worker and Verifier)
2116
+ # =============================================================================
2117
+
2118
+ # --- governance.md s7 step 5+6: Poll for signal file with heartbeat monitoring ---
2119
+ poll_for_signal() {
2120
+ local signal_file="$1"
2121
+ local heartbeat_file="$2"
2122
+ local pane_id="$3"
2123
+ local trigger_file="$4"
2124
+ local role="$5" # "worker" or "verifier"
2125
+ local nudge_count=0
2126
+ local api_retry_count=0
2127
+ local poll_start
2128
+ poll_start=$(date +%s)
2129
+
2130
+ # Initialize idle tracking for this pane
2131
+ LAST_PANE_CONTENT[$pane_id]=""
2132
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
2133
+
2134
+ while true; do
2135
+ local now
2136
+ now=$(date +%s)
2137
+ local elapsed=$(( now - poll_start ))
2138
+
2139
+ # Per-iteration timeout check
2140
+ if (( elapsed >= ITER_TIMEOUT )); then
2141
+ log_error "$role timed out after ${ITER_TIMEOUT}s for iteration $ITERATION"
2142
+ return 1 # timeout
2143
+ fi
2144
+
2145
+ # Check if signal file appeared
2146
+ if [[ -f "$signal_file" ]]; then
2147
+ log " Signal file detected: $signal_file"
2148
+ return 0 # success
2149
+ fi
2150
+
2151
+ # A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
2152
+ # ONLY for Worker polling — Verifier waits for verdict file, not done-claim
2153
+ #
2154
+ # v5.7 §4.14 (Bug 5 fix, CRITICAL): if Worker pane shows a pending TUI
2155
+ # permission prompt (`Do you want to ...` with `(y/n)` / `❯ 1.` affordance),
2156
+ # Worker is NOT done — it's stuck mid-write after the first done-claim pass.
2157
+ # Suspending A4 fallback in this case prevents premature Verifier dispatch
2158
+ # against partial Worker output. auto_dismiss_prompts() will already have
2159
+ # tried to clear the prompt; if it's still visible the worker is in a
2160
+ # multi-prompt sequence and needs more time, not an A4 short-circuit.
2161
+ if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
2162
+ local _a4_capture
2163
+ _a4_capture=$(tmux capture-pane -t "$pane_id" -p -S -50 2>/dev/null || true)
2164
+ local -a _a4_lines
2165
+ _a4_lines=("${(@f)_a4_capture}")
2166
+ local _a4_i _a4_n=${#_a4_lines[@]} _a4_blocked=0
2167
+ for ((_a4_i=1; _a4_i <= _a4_n; _a4_i++)); do
2168
+ if [[ "${_a4_lines[_a4_i]}" =~ $_PROMPT_RE ]]; then
2169
+ local _a4_prev="${_a4_lines[_a4_i-1]:-}"
2170
+ local _a4_cur="${_a4_lines[_a4_i]}"
2171
+ local _a4_next="${_a4_lines[_a4_i+1]:-}"
2172
+ if [[ "$_a4_prev" =~ $_AFFORDANCE_RE || "$_a4_cur" =~ $_AFFORDANCE_RE || "$_a4_next" =~ $_AFFORDANCE_RE ]]; then
2173
+ _a4_blocked=1
2174
+ break
2175
+ fi
2176
+ fi
2177
+ done
2178
+ if (( _a4_blocked )); then
2179
+ log " Worker pane has pending permission prompt — A4 fallback suspended (Bug 5 guard)"
2180
+ log_debug "[GOV] iter=$ITERATION a4_fallback_suspended=true reason=worker_prompt_pending pane=$pane_id"
2181
+ # Continue polling; do NOT auto-generate signal. auto_dismiss_prompts will
2182
+ # try to dismiss on the next loop iteration.
2183
+ else
2184
+ local dc_us_id
2185
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
2186
+ if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
2187
+ log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
2188
+ log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
2189
+ echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
2190
+ _emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4"
2191
+ return 0
2192
+ fi
2193
+ fi
2194
+ fi
2195
+
2196
+ # API transient-error recovery with bounded backoff
2197
+ local pane_output_for_retry
2198
+ pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
2199
+ local is_api_text_retry=0
2200
+ if [[ -n "$pane_output_for_retry" ]] &&
2201
+ ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
2202
+ || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
2203
+ || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
2204
+ || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
2205
+ || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
2206
+ is_api_text_retry=1
2207
+ fi
2208
+
2209
+ if (( is_api_text_retry )) || is_api_error "$pane_id"; then
2210
+ (( api_retry_count++ ))
2211
+ log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
2212
+ if (( api_retry_count >= _API_MAX_RETRIES )); then
2213
+ log_error "API unavailable after ${_API_MAX_RETRIES} retries"
2214
+ write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries" "" "infra_failure"
2215
+ return 2
2216
+ fi
2217
+ # A5: If pane shows "queued messages" or rate-limit corruption, restart pane
2218
+ if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
2219
+ log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
2220
+ log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
2221
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
2222
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
2223
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
2224
+ fi
2225
+ sleep "$_API_RETRY_INTERVAL_S"
2226
+ continue
2227
+ else
2228
+ api_retry_count=0
2229
+ fi
2230
+
2231
+ # Check heartbeat freshness (tmux pattern)
2232
+ if [[ -f "$heartbeat_file" ]]; then
2233
+ if check_heartbeat_exited "$heartbeat_file"; then
2234
+ # Process exited but no signal file -- give a brief grace period
2235
+ sleep 3
2236
+ if [[ -f "$signal_file" ]]; then
2237
+ log " Signal file detected after process exit: $signal_file"
2238
+ return 0
2239
+ fi
2240
+ # Dispatch to engine-specific exit handler
2241
+ if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
2242
+ handle_worker_exit_codex "$ITERATION" "$signal_file"
2243
+ return 0
2244
+ fi
2245
+ # Claude path (or verifier of any engine)
2246
+ if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
2247
+ # Reset poll timer for the restart
2248
+ poll_start=$(date +%s)
2249
+ nudge_count=0
2250
+ LAST_PANE_CONTENT[$pane_id]=""
2251
+ PANE_IDLE_SINCE[$pane_id]=$(date +%s)
2252
+ sleep "$POLL_INTERVAL"
2253
+ continue
2254
+ else
2255
+ return 1 # max restarts exceeded
2256
+ fi
2257
+ fi
2258
+
2259
+ if ! check_heartbeat "$heartbeat_file"; then
2260
+ log " WARNING: $role heartbeat stale (>${HEARTBEAT_STALE_THRESHOLD}s)"
2261
+ (( HEARTBEAT_STALE_COUNT++ ))
2262
+ # Circuit breaker: 3 consecutive heartbeat stale events
2263
+ if (( HEARTBEAT_STALE_COUNT >= 3 )); then
2264
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
2265
+ log_error "Circuit breaker: 3 consecutive heartbeat stale events"
2266
+ return 1
2267
+ fi
2268
+ # Attempt restart
2269
+ if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
2270
+ poll_start=$(date +%s)
2271
+ nudge_count=0
2272
+ continue
2273
+ else
2274
+ return 1
2275
+ fi
2276
+ else
2277
+ # Heartbeat is fresh, reset stale counter
2278
+ HEARTBEAT_STALE_COUNT=0
2279
+ fi
2280
+ fi
2281
+
2282
+ # Dead pane detection during poll: check if claude/codex process died
2283
+ local poll_cmd
2284
+ poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
2285
+ # Dead pane detection — delegates to check_dead_pane() for engine-aware logic
2286
+ if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
2287
+ log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
2288
+ log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
2289
+ # Return failure so caller can handle recovery
2290
+ return 1
2291
+ fi
2292
+
2293
+ # v5.7 §4.13.a: window-bounded prompt auto-dismiss (replaces broad inline grep).
2294
+ # check_and_nudge_idle_pane also calls auto_dismiss_prompts internally, but
2295
+ # we keep this explicit call so dismiss happens BEFORE the idle/nudge check
2296
+ # and is logged with iter context.
2297
+ auto_dismiss_prompts "$pane_id"
2298
+
2299
+ # v5.7 §4.16: bounded prompt-stall escalation. If pane has been prompt-stuck
2300
+ # for PROMPT_STALL_TIMEOUT (5min default) or dismiss attempts exceed
2301
+ # PROMPT_DISMISS_FAIL_LIMIT, write BLOCKED `infra_failure` and exit the poll.
2302
+ # Closes the "alive process = infinite extend" gap (codex Critic HIGH).
2303
+ if ! check_prompt_stall "$pane_id"; then
2304
+ return 2 # signal: hard-failed, do not retry
2305
+ fi
2306
+
2307
+ # v5.7 §4.17 (codex Critic HIGH): generic no-progress timeout. Catches
2308
+ # undetected prompts, hung network calls, or any other alive-but-frozen
2309
+ # state. PROGRESS_NO_CHANGE_TIMEOUT defaults to 10 minutes. Independent
2310
+ # of regex prompt detection — fires whenever pane content is byte-equal
2311
+ # for too long even when Worker process is "alive".
2312
+ if ! check_no_progress "$pane_id"; then
2313
+ return 2 # hard-failed, infra_failure recorded
2314
+ fi
2315
+
2316
+ # Idle pane nudging (tmux pattern)
2317
+ check_and_nudge_idle_pane "$pane_id" "nudge_count"
2318
+
2319
+ sleep "$POLL_INTERVAL"
2320
+ done
2321
+ }
2322
+
2323
+ # =============================================================================
2324
+ # Consensus Verification (run two verifiers sequentially in same pane)
2325
+ # =============================================================================
2326
+
2327
+ # --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
2328
+ run_single_verifier() {
2329
+ local iter="$1"
2330
+ local engine="$2" # claude|codex
2331
+ local model="$3" # model for this verifier
2332
+ local suffix="$4" # "-claude" or "-codex"
2333
+ local verdict_dest="$5" # where to copy the verdict file
2334
+
2335
+ # Write trigger for this engine
2336
+ write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
2337
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
2338
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
2339
+
2340
+ # Clean previous Verifier session (with dead pane detection)
2341
+ local verifier_cmd
2342
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2343
+ if [[ -z "$verifier_cmd" ]]; then
2344
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
2345
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
2346
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
2347
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
2348
+ log " New verifier pane: $VERIFIER_PANE"
2349
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
2350
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
2351
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2352
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2353
+ sleep 0.2
2354
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
2355
+ sleep 0.3
2356
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2357
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2358
+ sleep 0.5
2359
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2360
+ sleep 2
2361
+ fi
2362
+ # Always ensure clean shell state before launching new verifier
2363
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2364
+ # Clear pane to avoid residual text interference
2365
+ tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
2366
+ sleep 0.5
2367
+
2368
+ # Remove previous verdict file
2369
+ rm -f "$VERDICT_FILE" 2>/dev/null
2370
+
2371
+ # Launch verifier — dispatch to engine-specific function
2372
+ local verifier_launch
2373
+ if [[ "$engine" = "codex" ]]; then
2374
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2375
+ launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
2376
+ log_debug "Verifier$suffix codex TUI dispatched"
2377
+ else
2378
+ verifier_launch="$(build_claude_cmd tui "$model" "" "" "$VERIFIER_EFFORT")"
2379
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
2380
+ log_error "Verifier$suffix failed to start"
2381
+ return 1
2382
+ fi
2383
+ log_debug "Verifier$suffix claude dispatched"
2384
+ fi
2385
+
2386
+ # Poll for verdict
2387
+ if [[ "$engine" = "codex" ]]; then
2388
+ # Codex exec: file poll + short grace period after verdict detected
2389
+ log " Polling for verify-verdict.json ($suffix, codex TUI)..."
2390
+ local codex_poll_start
2391
+ codex_poll_start=$(date +%s)
2392
+ local _verdict_detected_at=0
2393
+ while true; do
2394
+ # Wait for verdict file with valid JSON
2395
+ if [[ -f "$VERDICT_FILE" ]] && jq . "$VERDICT_FILE" >/dev/null 2>&1; then
2396
+ if (( _verdict_detected_at == 0 )); then
2397
+ _verdict_detected_at=$(date +%s)
2398
+ log " Verdict file detected. Grace period (30s) for codex to finalize..."
2399
+ fi
2400
+ # Grace period: 30s after verdict detection, proceed regardless of pane state
2401
+ local _grace_elapsed=$(( $(date +%s) - _verdict_detected_at ))
2402
+ if (( _grace_elapsed >= 30 )); then
2403
+ log " Grace period complete. Proceeding."
2404
+ break
2405
+ fi
2406
+ # Early exit: if pane returned to shell, no need to wait
2407
+ local _pane_cmd
2408
+ _pane_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null || echo "")
2409
+ if [[ "$_pane_cmd" = "zsh" || "$_pane_cmd" = "bash" || -z "$_pane_cmd" ]]; then
2410
+ log " Codex verifier$suffix process exited. Proceeding."
2411
+ break
2412
+ fi
2413
+ fi
2414
+ local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
2415
+ if (( codex_elapsed >= ITER_TIMEOUT )); then
2416
+ if (( _verdict_detected_at > 0 )); then
2417
+ log " Codex verifier$suffix timed out waiting, but verdict exists. Proceeding."
2418
+ break
2419
+ fi
2420
+ log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
2421
+ return 1
2422
+ fi
2423
+ sleep "$POLL_INTERVAL"
2424
+ done
2425
+ else
2426
+ # Claude: use full poll_for_signal with heartbeat/nudge
2427
+ log " Polling for verify-verdict.json ($suffix)..."
2428
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
2429
+ local verifier_poll_rc=$?
2430
+ if (( verifier_poll_rc == 2 )); then
2431
+ return 1
2432
+ fi
2433
+ log_error "Verifier$suffix poll failed"
2434
+ return 1
2435
+ fi
2436
+ fi
2437
+
2438
+ # Copy verdict to destination
2439
+ cp "$VERDICT_FILE" "$verdict_dest"
2440
+ log " Verifier$suffix verdict saved to $verdict_dest"
2441
+ return 0
2442
+ }
2443
+
2444
+ # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
2445
+ # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
2446
+ # Sets FAILED_US global on failure.
2447
+ run_sequential_final_verify() {
2448
+ local iter="$1"
2449
+ FAILED_US=""
2450
+
2451
+ log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
2452
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
2453
+
2454
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
2455
+ log " Final verify: checking $us..."
2456
+
2457
+ # Temporarily override signal file to scope verifier to this US
2458
+ local orig_signal
2459
+ orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
2460
+ echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
2461
+
2462
+ # Write scoped verifier trigger
2463
+ write_verifier_trigger "$iter"
2464
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
2465
+
2466
+ # Clean verifier pane
2467
+ local verifier_cmd
2468
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2469
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2470
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
2471
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
2472
+ fi
2473
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2474
+
2475
+ # Launch verifier
2476
+ local verifier_launch
2477
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2478
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2479
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
2480
+ else
2481
+ verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
2482
+ launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
2483
+ log_error "Failed to launch verifier for $us"
2484
+ FAILED_US="$us"
2485
+ return 1
2486
+ }
2487
+ fi
2488
+
2489
+ # Poll for verdict
2490
+ rm -f "$VERDICT_FILE"
2491
+ local poll_rc=0
2492
+ poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
2493
+ if (( poll_rc != 0 )); then
2494
+ log_error "Verifier poll failed for $us (rc=$poll_rc)"
2495
+ FAILED_US="$us"
2496
+ return 1
2497
+ fi
2498
+
2499
+ # Check verdict
2500
+ local verdict
2501
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
2502
+ if [[ "$verdict" != "pass" ]]; then
2503
+ FAILED_US="$us"
2504
+ log " Sequential final verify FAILED at $us"
2505
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
2506
+ return 1
2507
+ fi
2508
+ log " Sequential final verify: $us PASSED"
2509
+
2510
+ # Archive per-US final verdict
2511
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
2512
+ done
2513
+
2514
+ # Integration check: run tests if VERIFICATION_CMD is set
2515
+ if [[ -n "${VERIFICATION_CMD:-}" ]]; then
2516
+ log " Running integration test suite after sequential verify..."
2517
+ log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
2518
+ if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
2519
+ log " Integration test suite FAILED"
2520
+ FAILED_US="integration"
2521
+ return 2
2522
+ fi
2523
+ log " Integration test suite PASSED"
2524
+ fi
2525
+
2526
+ log " Sequential final verify: ALL PASSED"
2527
+ return 0
2528
+ }
2529
+
2530
+ # --- US-005: Determine whether consensus verification should run for this signal ---
2531
+ # Returns 0 (use consensus) or 1 (single engine).
2532
+ # Uses unified CONSENSUS_MODE: off|all|final-only
2533
+ _should_use_consensus() {
2534
+ local signal_us_id="${1:-}"
2535
+ case "$CONSENSUS_MODE" in
2536
+ all) return 0 ;;
2537
+ final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
2538
+ off|*) return 1 ;;
2539
+ esac
2540
+ }
2541
+
2542
+ # --- US-004: Run consensus verification (claude + codex sequentially) ---
2543
+ run_consensus_verification() {
2544
+ local iter="$1"
2545
+ local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
2546
+ local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
2547
+
2548
+ CONSENSUS_ROUND=0
2549
+ CLAUDE_VERDICT=""
2550
+ CODEX_VERDICT=""
2551
+
2552
+ while (( CONSENSUS_ROUND < 6 )); do
2553
+ (( CONSENSUS_ROUND++ ))
2554
+ log " Consensus round $CONSENSUS_ROUND/6..."
2555
+
2556
+ # Run claude verifier first
2557
+ local _claude_t0=$(date +%s)
2558
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
2559
+ log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
2560
+ return 1
2561
+ fi
2562
+ ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
2563
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
2564
+ # A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
2565
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
2566
+ log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
2567
+ log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
2568
+ rm -f "$claude_verdict_file" 2>/dev/null
2569
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
2570
+ log_error "Claude verifier retry also failed"
2571
+ return 1
2572
+ fi
2573
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
2574
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
2575
+ log_error "Claude verdict still null after retry — consensus cannot proceed"
2576
+ return 1
2577
+ fi
2578
+ fi
2579
+ log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
2580
+
2581
+ # consensus-fail-fast removed (complexity vs value too low)
2582
+
2583
+ # Run codex verifier second
2584
+ local _codex_t0=$(date +%s)
2585
+ if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
2586
+ log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
2587
+ return 1
2588
+ fi
2589
+ ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
2590
+ CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
2591
+ log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
2592
+
2593
+ log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
2594
+ local _combined_action="retry"
2595
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
2596
+ elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
2597
+ fi
2598
+ log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
2599
+
2600
+ # Both pass → success
2601
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
2602
+ # Create merged verdict with per-engine details
2603
+ {
2604
+ echo '{'
2605
+ echo ' "verdict": "pass",'
2606
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2607
+ echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
2608
+ echo ' "recommended_state_transition": "complete",'
2609
+ echo ' "consensus": {'
2610
+ echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
2611
+ echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
2612
+ echo ' "round": '"$CONSENSUS_ROUND"
2613
+ echo ' }'
2614
+ echo '}'
2615
+ } | atomic_write "$VERDICT_FILE"
2616
+ return 0
2617
+ fi
2618
+
2619
+ # Consensus disagreement
2620
+ log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
2621
+
2622
+ # NOTE: pre_existing_failure heuristic was removed (v0.3.5).
2623
+ # It used unreliable grep-in-description string matching to classify
2624
+ # consensus failures as "pre-existing", bypassing the consensus rule.
2625
+ # Consensus disagreement now ALWAYS flows to fix contract.
2626
+ # Codex CLI crash (no verdict file) is handled upstream via run_single_verifier return 1 → BLOCKED.
2627
+
2628
+ # --- Consensus disagreement: build fix contract ---
2629
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
2630
+ {
2631
+ echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
2632
+ echo ""
2633
+ echo "## Claude Verdict: $CLAUDE_VERDICT"
2634
+ if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
2635
+ echo "### Claude Issues"
2636
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
2637
+ fi
2638
+ echo ""
2639
+ echo "## Codex Verdict: $CODEX_VERDICT"
2640
+ if [[ "$CODEX_VERDICT" = "fail" ]]; then
2641
+ echo "### Codex Issues"
2642
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
2643
+ fi
2644
+ echo ""
2645
+ echo "## Traceability"
2646
+ echo "Only changes that resolve a listed issue are allowed."
2647
+ } | atomic_write "$fix_contract"
2648
+
2649
+ log " Combined fix contract: $fix_contract"
2650
+
2651
+ # If this is not the last round, the caller will dispatch the Worker with the fix contract
2652
+ # For now, write a fail verdict so the main loop can handle the fix loop
2653
+ if (( CONSENSUS_ROUND < 6 )); then
2654
+ # Create a merged fail verdict for the main loop — include issues from BOTH verdicts
2655
+ local merged_issues="[]"
2656
+ local claude_issues codex_issues
2657
+ claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
2658
+ codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
2659
+ merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
2660
+ {
2661
+ echo '{'
2662
+ echo ' "verdict": "fail",'
2663
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2664
+ echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
2665
+ echo ' "issues": '"$merged_issues"','
2666
+ echo ' "recommended_state_transition": "continue",'
2667
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
2668
+ echo '}'
2669
+ } | atomic_write "$VERDICT_FILE"
2670
+ return 2 # special return: consensus disagreement, needs retry
2671
+ fi
2672
+ done
2673
+
2674
+ # Max consensus rounds exceeded — include issues from both verdicts
2675
+ log_error "Consensus failed after 6 rounds"
2676
+ local final_claude_issues final_codex_issues final_merged_issues
2677
+ final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
2678
+ final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
2679
+ final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
2680
+ {
2681
+ echo '{'
2682
+ echo ' "verdict": "fail",'
2683
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
2684
+ echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
2685
+ echo ' "issues": '"$final_merged_issues"','
2686
+ echo ' "recommended_state_transition": "blocked",'
2687
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
2688
+ echo '}'
2689
+ } | atomic_write "$VERDICT_FILE"
2690
+ return 1
2691
+ }
2692
+
2693
+ # =============================================================================
2694
+ # Main Leader Loop
2695
+ # =============================================================================
2696
+
2697
+ main() {
2698
+ # --- US-026 R14 P0: project-scoped runner lockfile (mkdir atomic) ---
2699
+ # Prevents duplicate runners on the same project root regardless of slug.
2700
+ # Different ROOT_HASH allows independent parallel runners across projects.
2701
+ mkdir -p "$(dirname "$RUNNER_LOCKFILE_PATH")" 2>/dev/null
2702
+ if ! mkdir "$RUNNER_LOCKDIR" 2>/dev/null; then
2703
+ local existing existing_slug
2704
+ existing=$(jq -r '.pid' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo 0)
2705
+ existing_slug=$(jq -r '.slug // "unknown"' "$RUNNER_LOCKFILE_PATH" 2>/dev/null || echo unknown)
2706
+ if [[ "$existing" -gt 0 ]] && kill -0 "$existing" 2>/dev/null; then
2707
+ echo "duplicate rlp-desk runner detected on this project root. existing pid=$existing slug=$existing_slug, this attempt slug=$SLUG. exiting." >&2
2708
+ echo " Recover with: rm -rf '$RUNNER_LOCKDIR' '$RUNNER_LOCKFILE_PATH' (only if pid $existing is confirmed dead)" >&2
2709
+ exit 1
2710
+ fi
2711
+ rm -rf "$RUNNER_LOCKDIR"
2712
+ mkdir "$RUNNER_LOCKDIR" 2>/dev/null || {
2713
+ echo "failed to acquire runner lock after stale cleanup; another wrapper raced ahead. exit 1" >&2
2714
+ exit 1
2715
+ }
2716
+ echo "stale runner lockfile cleaned (pid $existing dead) — acquired" >&2
2717
+ fi
2718
+ printf '{"pid":%s,"slug":"%s","root":"%s","started_at":"%s"}\n' \
2719
+ "$$" "$SLUG" "$ROOT" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$RUNNER_LOCKFILE_PATH"
2720
+
2721
+ # --- Lockfile: prevent duplicate execution ---
2722
+ local lockfile="$LOCKFILE_PATH"
2723
+ mkdir -p "$(dirname "$lockfile")" 2>/dev/null
2724
+ if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
2725
+ local lock_pid
2726
+ lock_pid=$(cat "$lockfile" 2>/dev/null)
2727
+ if kill -0 "$lock_pid" 2>/dev/null; then
2728
+ log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
2729
+ exit 1
2730
+ fi
2731
+ # Stale lock — overwrite
2732
+ log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
2733
+ echo $$ > "$lockfile"
2734
+ LOCKFILE_ACQUIRED=1
2735
+ else
2736
+ LOCKFILE_ACQUIRED=1
2737
+ fi
2738
+ # US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty on exit.
2739
+ trap '_emit_final_cost_log; cleanup' EXIT INT TERM
2740
+ mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
2741
+
2742
+ # --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
2743
+ mkdir -p "$ANALYTICS_DIR" 2>/dev/null
2744
+
2745
+ # --- debug.log versioning (in analytics dir, --debug only) ---
2746
+ if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
2747
+ local dbg_n=1
2748
+ while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
2749
+ (( dbg_n++ ))
2750
+ done
2751
+ mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
2752
+ fi
2753
+
2754
+ # --- campaign.jsonl versioning (always-on) ---
2755
+ if [[ -f "$CAMPAIGN_JSONL" ]]; then
2756
+ local cj_n=1
2757
+ while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
2758
+ (( cj_n++ ))
2759
+ done
2760
+ mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
2761
+ fi
2762
+
2763
+ # --- metadata.json: always write at campaign start (cross-project identification) ---
2764
+ jq -n \
2765
+ --arg slug "$SLUG" \
2766
+ --arg project_root "$ROOT" \
2767
+ --arg project_name "$(basename "$ROOT")" \
2768
+ --arg campaign_status "running" \
2769
+ --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
2770
+ --arg end_time "" \
2771
+ --arg worker_model "$WORKER_MODEL" \
2772
+ --arg verifier_model "$VERIFIER_MODEL" \
2773
+ --argjson debug "$DEBUG" \
2774
+ --argjson with_sv "$WITH_SELF_VERIFICATION" \
2775
+ --argjson with_sv_requested "$WITH_SELF_VERIFICATION_REQUESTED" \
2776
+ --arg sv_skipped_reason "$SV_SKIPPED_REASON" \
2777
+ --arg lane_mode "$LANE_MODE" \
2778
+ --argjson consensus "${VERIFY_CONSENSUS:-0}" \
2779
+ '{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, with_self_verification_requested: $with_sv_requested, sv_skipped_reason: $sv_skipped_reason, lane_mode: $lane_mode, consensus: $consensus}' \
2780
+ > "$METADATA_FILE"
2781
+
2782
+ # --- Startup ---
2783
+ log "Ralph Desk Tmux Runner starting..."
2784
+ log " Slug: $SLUG"
2785
+ log " Root: $ROOT"
2786
+ log " Max iterations: $MAX_ITER"
2787
+ log " Worker model: $WORKER_MODEL"
2788
+ log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
2789
+ log " Verify mode: $VERIFY_MODE"
2790
+ log " Consensus mode: $CONSENSUS_MODE"
2791
+ log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
2792
+ log " Poll interval: ${POLL_INTERVAL}s"
2793
+ log " Iter timeout: ${ITER_TIMEOUT}s"
2794
+ # --- Debug: Log execution plan ---
2795
+ if (( DEBUG )); then
2796
+ # Extract US IDs from PRD
2797
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2798
+ local us_list=""
2799
+ if [[ -f "$prd_file" ]]; then
2800
+ us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2801
+ fi
2802
+ local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
2803
+
2804
+ log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
2805
+ log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
2806
+ log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
2807
+ log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
2808
+ log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION (requested=$WITH_SELF_VERIFICATION_REQUESTED skipped=${SV_SKIPPED_REASON:-none}) debug=$DEBUG"
2809
+
2810
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2811
+ # Build expected flow
2812
+ local expected_flow=""
2813
+ for us in $(echo "$us_list" | tr ',' ' '); do
2814
+ expected_flow="${expected_flow}worker->verify($us)->"
2815
+ done
2816
+ expected_flow="${expected_flow}verify(ALL)->COMPLETE"
2817
+ log_debug "[OPTION] expected_flow=$expected_flow"
2818
+ else
2819
+ log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
2820
+ fi
2821
+
2822
+ if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
2823
+ log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
2824
+ fi
2825
+ fi
2826
+
2827
+ # Extract US list for per-US sequencing
2828
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
2829
+ local prd_file="$DESK/plans/prd-$SLUG.md"
2830
+ if [[ -f "$prd_file" ]]; then
2831
+ US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
2832
+ fi
2833
+
2834
+ # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
2835
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
2836
+ if [[ -f "$memory_file" ]]; then
2837
+ local completed_us
2838
+ completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
2839
+ if [[ -n "$completed_us" ]]; then
2840
+ VERIFIED_US="$completed_us"
2841
+ log " Loaded completed stories from memory: $VERIFIED_US"
2842
+ log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
2843
+ fi
2844
+ fi
2845
+
2846
+ # D1: Fallback — restore verified_us from status.json if memory had none
2847
+ if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
2848
+ local status_verified
2849
+ status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
2850
+ if [[ -n "$status_verified" ]]; then
2851
+ VERIFIED_US="$status_verified"
2852
+ log " Restored verified_us from status.json: $VERIFIED_US"
2853
+ log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
2854
+ fi
2855
+ fi
2856
+ fi
2857
+
2858
+ # Initialize PRD snapshot state for live update detection
2859
+ PREV_PRD_HASH=$(compute_prd_hash)
2860
+ PREV_PRD_US_LIST=$(count_prd_us)
2861
+
2862
+ # Dependency checks
2863
+ check_dependencies
2864
+
2865
+ # Print security warning (governance.md s7: --dangerously-skip-permissions)
2866
+ print_security_warning
2867
+
2868
+ # Validate scaffold
2869
+ validate_scaffold
2870
+
2871
+ # Check for existing sessions
2872
+ check_existing_sessions
2873
+
2874
+ # Create tmux session with pane IDs (governance.md s7 step 1)
2875
+ create_session
2876
+
2877
+ # Set trap for cleanup on exit/error
2878
+ # US-023 R11 P2-K: chain `_emit_final_cost_log` so cost-log.jsonl is never silently empty.
2879
+ trap '_emit_final_cost_log; cleanup' EXIT
2880
+
2881
+ # Initialize context hash for stale detection
2882
+ PREV_CONTEXT_HASH=$(compute_context_hash)
2883
+
2884
+ # --- governance.md s7: Leader Loop ---
2885
+ local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced — Worker extends indefinitely when active
2886
+
2887
+ for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
2888
+ # US-024 R12 P0: lifecycle check site #2 — verify session/panes alive at iter entry.
2889
+ _r12_check_lifecycle "iter_start"
2890
+ log ""
2891
+ log "========== Iteration $ITERATION / $MAX_ITER =========="
2892
+ local ITER_START_TIME
2893
+ ITER_START_TIME=$(date +%s)
2894
+ local _iter_contract=""
2895
+ _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
2896
+ log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
2897
+
2898
+ # --- governance.md s7 step 1: Check sentinels ---
2899
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then
2900
+ log "COMPLETE sentinel found. Campaign succeeded."
2901
+ update_status "complete" "complete"
2902
+ return 0
2903
+ fi
2904
+ if [[ -f "$BLOCKED_SENTINEL" ]]; then
2905
+ log "BLOCKED sentinel found. Campaign blocked."
2906
+ update_status "blocked" "blocked"
2907
+ return 1
2908
+ fi
2909
+
2910
+ # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
2911
+ rm -f "$SIGNAL_FILE" "$DONE_CLAIM_FILE" "$VERDICT_FILE" 2>/dev/null
2912
+ rm -f "$WORKER_HEARTBEAT" "$VERIFIER_HEARTBEAT" 2>/dev/null
2913
+
2914
+ # --- Clean previous claude session in panes (one-shot lifecycle) ---
2915
+ # Only needed from iteration 2 onwards (iteration 1 has fresh panes)
2916
+ if (( ITERATION > 1 )); then
2917
+ # Send C-c first (in case claude is mid-task), then /exit
2918
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
2919
+ sleep 1
2920
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
2921
+ sleep 2
2922
+ # Wait for shell prompt before proceeding
2923
+ wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
2924
+ fi
2925
+
2926
+ # Reset per-iteration state
2927
+ local worker_nudge_count=0
2928
+ local verifier_nudge_count=0
2929
+ ITER_VERIFIER_START=""
2930
+ ITER_VERIFIER_END=""
2931
+
2932
+ # --- US-004: detect PRD changes for live update + re-split ---
2933
+ check_prd_update
2934
+
2935
+ # --- governance.md s7 step 4: Build worker prompt + trigger ---
2936
+ write_worker_trigger "$ITERATION"
2937
+ local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
2938
+
2939
+ # AC1: capture worker start timestamp
2940
+ ITER_WORKER_START=$(date +%s)
2941
+
2942
+ update_status "worker" "running"
2943
+
2944
+ # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
2945
+ log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
2946
+
2947
+ local worker_launch
2948
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2949
+ worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
2950
+ if ! launch_worker_codex "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2951
+ write_blocked_sentinel "Worker codex failed to start in pane" "" "infra_failure"
2952
+ update_status "blocked" "worker_start_failed"
2953
+ return 1
2954
+ fi
2955
+ else
2956
+ worker_launch="$(build_claude_cmd tui "$WORKER_MODEL" "" "" "$WORKER_EFFORT")"
2957
+ if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2958
+ write_blocked_sentinel "Worker claude failed to start in pane" "" "infra_failure"
2959
+ update_status "blocked" "worker_start_failed"
2960
+ return 1
2961
+ fi
2962
+ fi
2963
+
2964
+ # --- governance.md s7 step 5+6: Poll for Worker completion ---
2965
+ # US-024 R12 P0: lifecycle check site #3 — verify panes alive after worker dispatch, before wait-loop.
2966
+ _r12_check_lifecycle "post_send"
2967
+ log " Polling for iter-signal.json..."
2968
+ local worker_poll_done=0
2969
+ while (( ! worker_poll_done )); do
2970
+ local worker_poll_rc=0
2971
+ if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
2972
+ worker_poll_done=1
2973
+ log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
2974
+ else
2975
+ worker_poll_rc=$?
2976
+ if (( worker_poll_rc == 2 )); then
2977
+ return 1
2978
+ fi
2979
+ # Check if Worker is still actively running (not stuck)
2980
+ local worker_cmd
2981
+ worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
2982
+ if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
2983
+ # Process alive — extend indefinitely (no hard ceiling kill)
2984
+ # Stale-context breaker and nudge system handle truly stuck workers
2985
+ local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
2986
+ local ceiling_exceeded=""
2987
+ if (( iter_elapsed >= HARD_CEILING )); then
2988
+ ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s — not enforced, logged only]"
2989
+ log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
2990
+ log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
2991
+ fi
2992
+ log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
2993
+ log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
2994
+ log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
2995
+ update_status "worker" "slow"
2996
+ # Loop continues — re-poll same iteration
2997
+ else
2998
+ # Worker is truly dead/stuck
2999
+ (( MONITOR_FAILURE_COUNT++ ))
3000
+ log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
3001
+ if (( MONITOR_FAILURE_COUNT >= 3 )); then
3002
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
3003
+ write_blocked_sentinel "3 consecutive monitor failures (worker not active)" "" "infra_failure"
3004
+ update_status "blocked" "monitor_failures"
3005
+ return 1
3006
+ fi
3007
+ log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3) — will retry"
3008
+ update_status "worker" "poll_failed"
3009
+ log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd retry=true"
3010
+ # v0.14.3 P0-5 (Bug Report #5): previously this branch wrote BLOCKED
3011
+ # unconditionally even at counter 1/3, so a single transient
3012
+ # worker-dead detection halted the campaign in 5s instead of
3013
+ # honoring the 3-strike circuit breaker above (L3001-3006). Removed
3014
+ # the unconditional sentinel write; the loop now continues so the
3015
+ # next polling tick can either confirm the dead state (counter
3016
+ # eventually reaches 3 → BLOCKED) or recover (worker resumes →
3017
+ # MONITOR_FAILURE_COUNT reset on success at L3025).
3018
+ fi
3019
+ fi
3020
+ done
3021
+
3022
+ if [[ ! -f "$SIGNAL_FILE" ]]; then
3023
+ log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
3024
+ # No signal — monitor failure, go to next iteration
3025
+ continue
3026
+ fi
3027
+
3028
+ # Reset monitor failure count on success
3029
+ MONITOR_FAILURE_COUNT=0
3030
+
3031
+ # AC1: capture worker end timestamp; reset consensus timing
3032
+ ITER_WORKER_END=$(date +%s)
3033
+ ITER_VERIFIER_CLAUDE_DURATION_S=""
3034
+ ITER_VERIFIER_CODEX_DURATION_S=""
3035
+
3036
+ # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
3037
+ local signal_status
3038
+ signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
3039
+ local signal_summary
3040
+ signal_summary=$(jq -r '.summary // "no summary"' "$SIGNAL_FILE" 2>/dev/null)
3041
+
3042
+ log " Worker signal: status=$signal_status summary=\"$signal_summary\""
3043
+
3044
+ # Read us_id early for EXEC logging (also used later in verify branch)
3045
+ local signal_us_id_early=""
3046
+ signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3047
+ log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
3048
+
3049
+ case "$signal_status" in
3050
+ continue)
3051
+ # --- governance.md s7 step 6: continue -> go to step 8 ---
3052
+ log " Worker requests continue. Moving to next iteration."
3053
+ update_status "worker" "continue"
3054
+ ;;
3055
+ verify_partial)
3056
+ # US-019 R7 P1-G: Worker explicitly verified a subset of ACs and deferred the rest.
3057
+ # Verifier evaluates only verified_acs. Malformed (empty verified_acs) downgrades to blocked.
3058
+ local vp_count
3059
+ vp_count=$(jq -r '.verified_acs // [] | length' "$SIGNAL_FILE" 2>/dev/null || echo 0)
3060
+ if [[ "$vp_count" -eq 0 ]]; then
3061
+ log " Worker signal verify_partial but verified_acs is empty — downgrading to blocked (verify_partial_malformed)."
3062
+ local vp_us_id
3063
+ vp_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3064
+ write_blocked_sentinel "verify_partial_malformed: empty verified_acs" "${vp_us_id:-${CURRENT_US:-ALL}}" "mission_abort"
3065
+ update_status "blocked" "verify_partial_malformed"
3066
+ break
3067
+ fi
3068
+ log " Worker signal verify_partial (verified_acs count=$vp_count). Routing to verify path."
3069
+ signal_status="verify"
3070
+ ;&
3071
+ verify)
3072
+ # --- governance.md s7 step 7: Execute Verifier ---
3073
+ # Read us_id from signal for per-US scoping
3074
+ local signal_us_id=""
3075
+ signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
3076
+ log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
3077
+
3078
+ # AC1: capture verifier start timestamp
3079
+ ITER_VERIFIER_START=$(date +%s)
3080
+
3081
+ update_status "verifier" "running"
3082
+
3083
+ # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
3084
+ if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
3085
+ log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
3086
+ local seq_rc=0
3087
+ run_sequential_final_verify "$ITERATION" || seq_rc=$?
3088
+ if (( seq_rc == 0 )); then
3089
+ write_complete_sentinel "Sequential final verify passed (all US verified individually)"
3090
+ update_status "complete" "pass"
3091
+ write_campaign_jsonl "$ITERATION" "ALL" "pass"
3092
+ return 0
3093
+ else
3094
+ # Sequential verify failed — fall through to fix loop with failed US
3095
+ log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
3096
+ signal_us_id="${FAILED_US:-ALL}"
3097
+ # Synthesize a fail verdict for the fix loop
3098
+ echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
3099
+ fi
3100
+ fi
3101
+
3102
+ # --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
3103
+ local use_consensus=0
3104
+ _should_use_consensus "$signal_us_id" && use_consensus=1
3105
+
3106
+ # --- Consensus vs single verification ---
3107
+ if (( use_consensus )); then
3108
+ # US-004: Run consensus verification (claude + codex sequentially)
3109
+ local consensus_rc=0
3110
+ run_consensus_verification "$ITERATION" || consensus_rc=$?
3111
+
3112
+ if (( consensus_rc == 2 )); then
3113
+ # Consensus disagreement — treat as fail, fix loop will handle
3114
+ log " Consensus disagreement, treating as fail."
3115
+ elif (( consensus_rc != 0 )); then
3116
+ # Consensus verification failed entirely
3117
+ log_error "Consensus verification failed"
3118
+ write_blocked_sentinel "Consensus verification failed after max rounds" "" "repeat_axis"
3119
+ update_status "blocked" "consensus_failed"
3120
+ return 1
3121
+ fi
3122
+ else
3123
+ # Standard single-engine verification
3124
+ write_verifier_trigger "$ITERATION"
3125
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
3126
+
3127
+ # Step 7a: Clean previous Verifier session (with dead pane detection)
3128
+ local verifier_cmd
3129
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
3130
+ if [[ -z "$verifier_cmd" ]]; then
3131
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
3132
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
3133
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
3134
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
3135
+ log " New verifier pane: $VERIFIER_PANE"
3136
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
3137
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
3138
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
3139
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
3140
+ sleep 0.2
3141
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
3142
+ sleep 0.3
3143
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
3144
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
3145
+ sleep 0.5
3146
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
3147
+ sleep 2
3148
+ fi
3149
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
3150
+
3151
+ local verifier_launch
3152
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3153
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --disable plugins --dangerously-bypass-approvals-and-sandbox"
3154
+ else
3155
+ verifier_launch="$(build_claude_cmd tui "$VERIFIER_MODEL" "" "" "$VERIFIER_EFFORT")"
3156
+ fi
3157
+ log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
3158
+
3159
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3160
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
3161
+ else
3162
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
3163
+ update_status "verifier" "start_failed"
3164
+ continue
3165
+ fi
3166
+ fi
3167
+
3168
+ # Poll for verify-verdict.json
3169
+ log " Polling for verify-verdict.json..."
3170
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
3171
+ local verifier_poll_rc=$?
3172
+ if (( verifier_poll_rc == 2 )); then
3173
+ return 1
3174
+ fi
3175
+ log_error "Verifier poll failed"
3176
+ # Verifier is dead/stuck — BLOCK and let user decide
3177
+ write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection." "" "infra_failure"
3178
+ update_status "blocked" "verifier_dead"
3179
+ return 1
3180
+ fi
3181
+ fi
3182
+
3183
+ # AC1: capture verifier end timestamp
3184
+ ITER_VERIFIER_END=$(date +%s)
3185
+
3186
+ # --- governance.md s7 step 7: Read verdict via jq ---
3187
+ local verdict
3188
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
3189
+ local recommended
3190
+ recommended=$(jq -r '.recommended_state_transition' "$VERDICT_FILE" 2>/dev/null)
3191
+ local verdict_summary
3192
+ verdict_summary=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3193
+
3194
+ log " Verifier: verdict=$verdict recommended=$recommended"
3195
+ log " Verifier summary: \"$verdict_summary\""
3196
+ local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
3197
+ log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
3198
+
3199
+ case "$verdict" in
3200
+ pass)
3201
+ CONSECUTIVE_FAILURES=0
3202
+ CONSENSUS_ROUND=0
3203
+ _SAME_US_FAIL_COUNT=0
3204
+ _LAST_FAILED_US=""
3205
+ if (( _MODEL_UPGRADED )); then
3206
+ log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
3207
+ log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
3208
+ WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
3209
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
3210
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
3211
+ WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
3212
+ fi
3213
+ _MODEL_UPGRADED=0
3214
+ fi
3215
+
3216
+ # --- Verified US tracking (both per-us and batch modes) ---
3217
+ if [[ -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
3218
+ # Add this US to verified list
3219
+ if [[ -n "$VERIFIED_US" ]]; then
3220
+ VERIFIED_US="${VERIFIED_US},${signal_us_id}"
3221
+ else
3222
+ VERIFIED_US="$signal_us_id"
3223
+ fi
3224
+ log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
3225
+ log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
3226
+ update_status "verifier" "pass_us"
3227
+ # Worker will do next US on next iteration
3228
+ elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
3229
+ # Final full verify passed or complete recommended
3230
+ write_complete_sentinel "$verdict_summary"
3231
+ update_status "complete" "pass"
3232
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
3233
+ return 0
3234
+ else
3235
+ log " Verifier passed but did not recommend complete. Continuing."
3236
+ update_status "verifier" "pass_continue"
3237
+ fi
3238
+ ;;
3239
+ fail)
3240
+ # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
3241
+
3242
+ # Parse per_us_results from verdict to track partial progress (batch + per-us)
3243
+ local _prev_verified="$VERIFIED_US"
3244
+ if jq -e '.per_us_results' "$VERDICT_FILE" &>/dev/null; then
3245
+ local _newly_passed
3246
+ _newly_passed=$(jq -r '.per_us_results | to_entries[] | select(.value == "pass") | .key' "$VERDICT_FILE" 2>/dev/null)
3247
+ for _pus in $(echo "$_newly_passed"); do
3248
+ if ! echo ",$VERIFIED_US," | grep -q ",$_pus,"; then
3249
+ if [[ -n "$VERIFIED_US" ]]; then
3250
+ VERIFIED_US="${VERIFIED_US},${_pus}"
3251
+ else
3252
+ VERIFIED_US="$_pus"
3253
+ fi
3254
+ log " Partial progress: $_pus passed (overall FAIL). Verified so far: $VERIFIED_US"
3255
+ fi
3256
+ done
3257
+ log_debug "[FLOW] iter=$ITERATION partial_progress prev=$_prev_verified now=$VERIFIED_US"
3258
+ fi
3259
+
3260
+ # Partial progress resets consecutive failures (progress was made)
3261
+ if [[ "$VERIFIED_US" != "$_prev_verified" ]]; then
3262
+ CONSECUTIVE_FAILURES=0
3263
+ log " Progress detected — consecutive_failures reset to 0"
3264
+ log_debug "[GOV] iter=$ITERATION consecutive_failures_reset=partial_progress"
3265
+ fi
3266
+
3267
+ (( CONSECUTIVE_FAILURES++ ))
3268
+ record_us_failure "${signal_us_id:-unknown}"
3269
+ check_model_upgrade "${signal_us_id:-unknown}"
3270
+
3271
+ # Mid-CB warning: alert at halfway point (governance §8 early warning)
3272
+ if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
3273
+ log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
3274
+ log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
3275
+ fi
3276
+ local verdict_summary_fail
3277
+ verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3278
+ log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
3279
+
3280
+ # Extract issues from verdict for next Worker's fix contract
3281
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).fix-contract.md"
3282
+ {
3283
+ echo "# Fix Contract (from Verifier iteration $ITERATION)"
3284
+ echo ""
3285
+ if [[ -n "$VERIFIED_US" ]]; then
3286
+ echo "## Verified US (do NOT re-implement these)"
3287
+ echo "$VERIFIED_US" | tr ',' '\n' | sed 's/^/- /'
3288
+ echo ""
3289
+ echo "**Focus ONLY on unverified user stories. The above are already verified.**"
3290
+ echo ""
3291
+ fi
3292
+ echo "## Summary"
3293
+ echo "$verdict_summary_fail"
3294
+ echo ""
3295
+ echo "## Issues (from verify-verdict.json)"
3296
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$VERDICT_FILE" 2>/dev/null || echo "- (no structured issues available)"
3297
+ echo ""
3298
+ echo "## Next Iteration Contract"
3299
+ jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
3300
+ } | atomic_write "$fix_contract"
3301
+ log " Fix contract: $fix_contract"
3302
+ log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
3303
+
3304
+ # Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
3305
+ if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
3306
+ # For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
3307
+ _ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
3308
+ if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
3309
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
3310
+ log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
3311
+ write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
3312
+ else
3313
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
3314
+ log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
3315
+ write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures" "" "repeat_axis"
3316
+ fi
3317
+ update_status "blocked" "consecutive_failures"
3318
+ return 1
3319
+ fi
3320
+
3321
+ update_status "verifier" "fail"
3322
+ ;;
3323
+ request_info)
3324
+ # --- governance.md s7 step 7: request_info (degraded in tmux mode) ---
3325
+ local verdict_summary_ri
3326
+ verdict_summary_ri=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
3327
+ log " Verifier requests info (degraded in tmux lean mode)."
3328
+ log " Questions: \"$verdict_summary_ri\""
3329
+ log " Treating as soft fail — Worker will see verdict in next iteration."
3330
+ update_status "verifier" "request_info"
3331
+ ;;
3332
+ blocked)
3333
+ local _verdict_cat
3334
+ _verdict_cat=$(_classify_cross_us_or_metric "$verdict_summary")
3335
+ write_blocked_sentinel "Verifier verdict: blocked - $verdict_summary" "" "$_verdict_cat"
3336
+ update_status "blocked" "verifier_blocked"
3337
+ return 1
3338
+ ;;
3339
+ *)
3340
+ log_error "Unknown verdict: $verdict"
3341
+ update_status "verifier" "unknown_verdict"
3342
+ ;;
3343
+ esac
3344
+ ;;
3345
+ blocked)
3346
+ # --- governance.md s7 step 6: blocked -> write sentinel ---
3347
+ local _signal_cat
3348
+ _signal_cat=$(_classify_cross_us_or_metric "$signal_summary")
3349
+ write_blocked_sentinel "Worker reported blocked: $signal_summary" "" "$_signal_cat"
3350
+ update_status "blocked" "worker_blocked"
3351
+ return 1
3352
+ ;;
3353
+ *)
3354
+ log_error "Unknown signal status: $signal_status"
3355
+ update_status "worker" "unknown_status"
3356
+ ;;
3357
+ esac
3358
+
3359
+ # --- step 7d: Archive iteration artifacts before cleanup ---
3360
+ archive_iter_artifacts "$ITERATION"
3361
+
3362
+ # --- AC5: Write per-iteration cost estimate ---
3363
+ write_cost_log "$ITERATION"
3364
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
3365
+
3366
+ # --- governance.md s7 step 8: Write result log ---
3367
+ write_result_log "$ITERATION" "$signal_status"
3368
+
3369
+ # --- governance.md s7 step 8: Circuit breaker - stale context check ---
3370
+ if ! check_stale_context; then
3371
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
3372
+ write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)" "" "context_limit"
3373
+ update_status "blocked" "stale_context"
3374
+ return 1
3375
+ fi
3376
+
3377
+ # --- governance.md s7 step 8: Update status ---
3378
+ update_status "idle" "${signal_status:-unknown}"
3379
+ done
3380
+
3381
+ # Max iterations reached
3382
+ log "Max iterations ($MAX_ITER) reached."
3383
+ update_status "timeout" "max_iter"
3384
+ return 1
3385
+ }
3386
+
3387
+ # =============================================================================
3388
+ # Entry Point
3389
+ # =============================================================================
3390
+
3391
+ # --- CLI: parse --worker-model / --verifier-model flags ---
3392
+ # These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
3393
+ # Format: "model:reasoning" → codex engine; "model-name" → claude engine
3394
+ _cli_i=1
3395
+ while (( _cli_i <= $# )); do
3396
+ case "${@[$_cli_i]}" in
3397
+ --worker-model)
3398
+ (( _cli_i++ ))
3399
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
3400
+ WORKER_ENGINE="${_cli_parsed%% *}"
3401
+ _cli_rest="${_cli_parsed#* }"
3402
+ WORKER_MODEL="${_cli_rest%% *}"
3403
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
3404
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
3405
+ WORKER_CODEX_REASONING="${_cli_rest##* }"
3406
+ elif [[ "$_cli_rest" == *" "* ]]; then
3407
+ WORKER_EFFORT="${_cli_rest##* }"
3408
+ fi
3409
+ ;;
3410
+ --verifier-model)
3411
+ (( _cli_i++ ))
3412
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
3413
+ VERIFIER_ENGINE="${_cli_parsed%% *}"
3414
+ _cli_rest="${_cli_parsed#* }"
3415
+ VERIFIER_MODEL="${_cli_rest%% *}"
3416
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
3417
+ VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
3418
+ VERIFIER_CODEX_REASONING="${_cli_rest##* }"
3419
+ elif [[ "$_cli_rest" == *" "* ]]; then
3420
+ VERIFIER_EFFORT="${_cli_rest##* }"
3421
+ fi
3422
+ ;;
3423
+ --lock-worker-model)
3424
+ LOCK_WORKER_MODEL=1
3425
+ ;;
3426
+ --autonomous)
3427
+ AUTONOMOUS_MODE=1
3428
+ ;;
3429
+ --lane-strict)
3430
+ # P1-E opt-in: lane mtime audit escalates to BLOCKED instead of WARN.
3431
+ # See governance §7¾.
3432
+ LANE_MODE="strict"
3433
+ ;;
3434
+ --test-density-strict)
3435
+ # US-018 R6 P1-F opt-in: AC with < 3 tests fails init (exit 1) instead of WARN.
3436
+ # See governance §7f.
3437
+ TEST_DENSITY_MODE="strict"
3438
+ ;;
3439
+ --final-verifier-model)
3440
+ (( _cli_i++ ))
3441
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
3442
+ FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
3443
+ _cli_rest="${_cli_parsed#* }"
3444
+ FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
3445
+ if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
3446
+ FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
3447
+ FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
3448
+ elif [[ "$_cli_rest" == *" "* ]]; then
3449
+ FINAL_VERIFIER_EFFORT="${_cli_rest##* }"
3450
+ fi
3451
+ ;;
3452
+ --consensus)
3453
+ (( _cli_i++ ))
3454
+ CONSENSUS_MODE="${@[$_cli_i]:-off}"
3455
+ ;;
3456
+ --consensus-model)
3457
+ (( _cli_i++ ))
3458
+ CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:medium}"
3459
+ ;;
3460
+ --final-consensus-model)
3461
+ (( _cli_i++ ))
3462
+ FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.5:high}"
3463
+ ;;
3464
+ --final-consensus)
3465
+ # Legacy: map to new --consensus final-only
3466
+ CONSENSUS_MODE="final-only"
3467
+ ;;
3468
+ --verify-consensus)
3469
+ # Legacy: map to new --consensus all
3470
+ CONSENSUS_MODE="all"
3471
+ ;;
3472
+ esac
3473
+ (( _cli_i++ ))
3474
+ done
3475
+ unset _cli_i _cli_parsed _cli_rest
3476
+
3477
+ # Require tmux — tmux mode only works inside an active tmux session
3478
+ if [[ -z "${TMUX:-}" ]]; then
3479
+ echo "ERROR: tmux mode requires running inside a tmux session."
3480
+ echo ""
3481
+ echo " Start tmux first, then retry:"
3482
+ echo " tmux"
3483
+ echo " LOOP_NAME=$SLUG $0"
3484
+ echo ""
3485
+ echo " Or use Agent() mode instead (no tmux needed):"
3486
+ echo " /rlp-desk run $SLUG"
3487
+ exit 1
3488
+ fi
3489
+
3490
+ main "$@"