@ai-dev-methodologies/rlp-desk 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,7 @@ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
55
55
  MAX_RESTARTS="${MAX_RESTARTS:-3}"
56
56
  IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
57
57
  MAX_NUDGES="${MAX_NUDGES:-3}"
58
+ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
58
59
 
59
60
  # --- Engine Selection ---
60
61
  WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
@@ -68,7 +69,18 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
68
69
  # --- Verify Mode ---
69
70
  VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
70
71
  VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
72
+ FINAL_CONSENSUS="${FINAL_CONSENSUS:-0}" # 0|1 — consensus for final ALL verify only (independent of VERIFY_CONSENSUS)
71
73
  CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
74
+ CONSENSUS_FAIL_FAST="${CONSENSUS_FAIL_FAST:-0}" # 0|1 — skip second verifier if first fails
75
+ CB_THRESHOLD="${CB_THRESHOLD:-3}" # consecutive failures before BLOCKED (default: 3)
76
+ # Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
77
+ if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
78
+ EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
79
+ else
80
+ EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
81
+ fi
82
+ _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
83
+ _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
72
84
 
73
85
  # --- Derived Paths ---
74
86
  DESK="$ROOT/.claude/ralph-desk"
@@ -76,6 +88,14 @@ PROMPTS_DIR="$DESK/prompts"
76
88
  CONTEXT_DIR="$DESK/context"
77
89
  MEMOS_DIR="$DESK/memos"
78
90
  LOGS_DIR="$DESK/logs/$SLUG"
91
+ RUNTIME_DIR="$LOGS_DIR/runtime"
92
+ PRD_FILE="$DESK/plans/prd-$SLUG.md"
93
+ TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
94
+ # --- Analytics Directory (user-level, cross-project) ---
95
+ ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
96
+ ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
97
+ CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
98
+ METADATA_FILE="$ANALYTICS_DIR/metadata.json"
79
99
  WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
80
100
  VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
81
101
  CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
@@ -85,10 +105,12 @@ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
85
105
  VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
86
106
  COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
87
107
  BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
88
- STATUS_FILE="$LOGS_DIR/status.json"
89
- SESSION_CONFIG="$LOGS_DIR/session-config.json"
90
- WORKER_HEARTBEAT="$LOGS_DIR/worker-heartbeat.json"
91
- VERIFIER_HEARTBEAT="$LOGS_DIR/verifier-heartbeat.json"
108
+ LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
109
+ STATUS_FILE="$RUNTIME_DIR/status.json"
110
+ SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
111
+ WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
112
+ VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
113
+ COST_LOG="$LOGS_DIR/cost-log.jsonl"
92
114
 
93
115
  # --- Session Naming ---
94
116
  TIMESTAMP=$(date +%Y%m%d-%H%M%S)
@@ -103,41 +125,265 @@ HEARTBEAT_STALE_COUNT=0
103
125
  MONITOR_FAILURE_COUNT=0
104
126
  CONSECUTIVE_FAILURES=0
105
127
  PREV_CONTEXT_HASH=""
128
+ PREV_PRD_HASH=""
129
+ PREV_PRD_US_LIST=""
130
+ _PRD_CHANGED=0
106
131
  ITERATION=0
107
132
  START_TIME=$(date +%s)
133
+ BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
134
+ CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
135
+ SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
108
136
  VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
109
137
  CONSENSUS_ROUND=0 # current consensus round for current US
110
138
  US_LIST="" # comma-separated US IDs from PRD (per-us mode)
139
+ LOCKFILE_ACQUIRED=0
140
+ LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
141
+ _SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
142
+ _LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
143
+ _MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
144
+ _ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
145
+ _ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
111
146
 
112
147
  # =============================================================================
113
148
  # Utility Functions
114
149
  # =============================================================================
115
150
 
116
151
  DEBUG="${DEBUG:-0}"
117
- DEBUG_LOG="$ROOT/.claude/ralph-desk/logs/${LOOP_NAME:-unknown}/debug.log"
152
+ DEBUG_LOG="$ANALYTICS_DIR/debug.log"
153
+
154
+ # Source shared business logic
155
+ LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
156
+ source "$LIB_DIR/lib_ralph_desk.zsh"
157
+
158
+ # A16: Warn if running in foreground (may conflict with Claude Code pane)
159
+ if [[ -z "${RLP_BACKGROUND:-}" ]]; then
160
+ echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
161
+ echo " Recommended: launch via Bash tool with run_in_background: true" >&2
162
+ echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
163
+ fi
118
164
 
119
- log() {
120
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
165
+ # check_dead_pane() — determine if pane command indicates a dead/exited process
166
+ # Engine-aware: bash is normal for codex workers (trigger runs in bash),
167
+ # but indicates dead pane for claude workers.
168
+ # Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
169
+ # Returns: 0 if dead, 1 if alive
170
+ check_dead_pane() {
171
+ local poll_cmd="$1"
172
+ local engine="${2:-claude}"
173
+ local role="${3:-worker}"
174
+
175
+ if [[ -z "$poll_cmd" ]]; then
176
+ return 0 # empty = dead
177
+ elif [[ "$poll_cmd" == "zsh" ]]; then
178
+ return 0 # bare zsh = dead
179
+ elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
180
+ return 0 # bash = dead for claude (codex uses bash trigger)
181
+ fi
182
+ return 1 # alive
121
183
  }
122
184
 
123
- log_debug() {
124
- if (( DEBUG )); then
125
- mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
126
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
185
+ # launch_worker_codex() — launch codex Worker via trigger script (non-interactive exec)
186
+ # Args: $1=pane_id $2=trigger_file $3=iteration
187
+ # Returns: 0 always (codex failures detected by poll_for_signal)
188
+ launch_worker_codex() {
189
+ local pane_id="$1"
190
+ local trigger_file="$2"
191
+ local iter="$3"
192
+
193
+ log " Launching Worker codex via trigger script in pane $pane_id..."
194
+ paste_to_pane "$pane_id" "bash $trigger_file"
195
+ tmux send-keys -t "$pane_id" Enter
196
+ log_debug "Worker codex trigger sent: $trigger_file"
197
+ sleep 3 # brief wait for codex to start
198
+ return 0
199
+ }
200
+
201
+ # launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
202
+ # Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
203
+ # restart recovery on submit failure.
204
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
205
+ # Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
206
+ launch_worker_claude() {
207
+ local pane_id="$1"
208
+ local prompt_file="$2"
209
+ local iter="$3"
210
+ local worker_launch="$4"
211
+
212
+ log " Launching Worker claude in pane $pane_id..."
213
+ paste_to_pane "$pane_id" "$worker_launch"
214
+ tmux send-keys -t "$pane_id" Enter
215
+
216
+ # Wait for claude TUI to be ready
217
+ if ! wait_for_pane_ready "$pane_id" 30; then
218
+ log_error "Worker claude failed to start"
219
+ return 1
220
+ fi
221
+
222
+ # Send instruction to claude TUI
223
+ sleep 3
224
+ local worker_instruction="Read and execute the instructions in $prompt_file"
225
+ paste_to_pane "$pane_id" "$worker_instruction"
226
+ tmux send-keys -t "$pane_id" Enter
227
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
228
+
229
+ # 15-iteration submit loop — verify claude started working
230
+ local submit_attempts=0
231
+ while (( submit_attempts < 15 )); do
232
+ sleep 2
233
+ local pane_check
234
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
235
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
236
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
237
+ log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
238
+ break
239
+ fi
240
+ # Every 3 failed attempts, re-send full instruction
241
+ if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
242
+ log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
243
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
244
+ sleep 0.2
245
+ paste_to_pane "$pane_id" "$worker_instruction"
246
+ sleep 0.15
247
+ tmux send-keys -t "$pane_id" Enter
248
+ sleep 1
249
+ fi
250
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
251
+ sleep 0.3
252
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
253
+ (( submit_attempts++ ))
254
+ done
255
+
256
+ # If 15 attempts failed, restart claude and retry
257
+ if (( submit_attempts >= 15 )); then
258
+ log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
259
+ log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
260
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
261
+ sleep 0.5
262
+ tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
263
+ sleep 2
264
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
265
+ paste_to_pane "$pane_id" "$worker_launch"
266
+ tmux send-keys -t "$pane_id" Enter
267
+ if wait_for_pane_ready "$pane_id" 30; then
268
+ sleep 3
269
+ paste_to_pane "$pane_id" "$worker_instruction"
270
+ tmux send-keys -t "$pane_id" Enter
271
+ log " Worker restarted and instruction re-sent"
272
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
273
+ else
274
+ log_error "Worker restart failed — pane not ready"
275
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
276
+ fi
277
+ fi
278
+
279
+ return 0
280
+ }
281
+
282
+ # launch_verifier_codex() — launch codex Verifier in pane (non-interactive)
283
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
284
+ # Returns: 0 always
285
+ launch_verifier_codex() {
286
+ local pane_id="$1"
287
+ local prompt_file="$2"
288
+ local iter="$3"
289
+ local verifier_launch="$4"
290
+
291
+ log " Launching Verifier codex in pane $pane_id..."
292
+ paste_to_pane "$pane_id" "$verifier_launch"
293
+ tmux send-keys -t "$pane_id" Enter
294
+ sleep 3
295
+ return 0
296
+ }
297
+
298
+ # launch_verifier_claude() — launch claude Verifier TUI, send instruction, verify submission
299
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
300
+ # Returns: 0 on success
301
+ launch_verifier_claude() {
302
+ local pane_id="$1"
303
+ local prompt_file="$2"
304
+ local iter="$3"
305
+ local verifier_launch="$4"
306
+
307
+ log " Launching Verifier claude in pane $pane_id..."
308
+ paste_to_pane "$pane_id" "$verifier_launch"
309
+ tmux send-keys -t "$pane_id" Enter
310
+
311
+ if ! wait_for_pane_ready "$pane_id" 30; then
312
+ log_error "Verifier failed to start"
313
+ return 1
127
314
  fi
315
+
316
+ sleep 3
317
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
318
+ paste_to_pane "$pane_id" "$verifier_instruction"
319
+ tmux send-keys -t "$pane_id" Enter
320
+ log_debug "Verifier instruction sent directly"
321
+
322
+ # Submit loop — verify verifier started working
323
+ local submit_attempts=0
324
+ while (( submit_attempts < 15 )); do
325
+ sleep 2
326
+ local vs_check
327
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
328
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
329
+ log_debug "Verifier started working after $((submit_attempts + 1)) checks"
330
+ break
331
+ fi
332
+ if (( submit_attempts == 8 )); then
333
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
334
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
335
+ sleep 0.1
336
+ paste_to_pane "$pane_id" "$verifier_instruction"
337
+ tmux send-keys -t "$pane_id" Enter
338
+ fi
339
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
340
+ sleep 0.3
341
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
342
+ (( submit_attempts++ ))
343
+ done
344
+ return 0
128
345
  }
129
346
 
130
- log_error() {
131
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
347
+ # handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
348
+ # On exit: check done-claim, auto-generate iter-signal.
349
+ # Args: $1=iteration $2=signal_file
350
+ # Returns: 0 (signal generated), 1 (error)
351
+ handle_worker_exit_codex() {
352
+ local iter="$1"
353
+ local signal_file="$2"
354
+
355
+ log " Codex worker process exited. Checking for done-claim..."
356
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
357
+ local dc_us_id
358
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
359
+ log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
360
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exec exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
361
+ else
362
+ log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
363
+ local current_us
364
+ current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
365
+ local mem_us
366
+ mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
367
+ [[ -n "$mem_us" ]] && current_us="$mem_us"
368
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exec exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
369
+ fi
370
+ return 0
132
371
  }
133
372
 
134
- # --- governance.md s7: Atomic file writes (tmux pattern) ---
135
- # All file writes by the Leader use tmp+mv to prevent corruption.
136
- atomic_write() {
137
- local target="$1"
138
- local tmp="${target}.tmp.$$"
139
- cat > "$tmp"
140
- mv "$tmp" "$target"
373
+ # handle_worker_exit_claude() handle claude worker process exit (restart with backoff)
374
+ # Args: $1=pane_id $2=iteration $3=trigger_file
375
+ # Returns: 0 (restarted), 1 (max restarts exceeded)
376
+ handle_worker_exit_claude() {
377
+ local pane_id="$1"
378
+ local iter="$2"
379
+ local trigger_file="$3"
380
+
381
+ log_error "Worker exited without writing signal file"
382
+ if restart_worker "$pane_id" "$iter" "$trigger_file"; then
383
+ return 0
384
+ else
385
+ return 1
386
+ fi
141
387
  }
142
388
 
143
389
  # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
@@ -148,12 +394,28 @@ replace_worker_pane() {
148
394
  log " Replacing dead $role pane $old_pane..."
149
395
  tmux kill-pane -t "$old_pane" 2>/dev/null
150
396
 
151
- # Create fresh pane via split-window off leader (omc-teams kill-and-replace pattern)
397
+ # Create fresh pane maintaining original layout: worker(top-right) / verifier(bottom-right)
152
398
  local new_pane
153
- new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
399
+ if [[ "$role" == "verifier" ]]; then
400
+ # Verifier goes below worker: split vertically from worker pane
401
+ if tmux display-message -t "$WORKER_PANE" -p '#{pane_id}' &>/dev/null; then
402
+ new_pane=$(tmux split-window -v -d -t "$WORKER_PANE" -P -F '#{pane_id}' -c "$ROOT")
403
+ else
404
+ # Fallback: worker pane also dead, split horizontally from leader
405
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
406
+ fi
407
+ else
408
+ # Worker goes above verifier: split vertically before verifier pane
409
+ if tmux display-message -t "$VERIFIER_PANE" -p '#{pane_id}' &>/dev/null; then
410
+ new_pane=$(tmux split-window -v -b -d -t "$VERIFIER_PANE" -P -F '#{pane_id}' -c "$ROOT")
411
+ else
412
+ # Fallback: verifier pane also dead, split horizontally from leader
413
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
414
+ fi
415
+ fi
154
416
 
155
417
  log " New $role pane: $new_pane (replaced $old_pane)"
156
- log_debug "[EXEC] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
418
+ log_debug "[FLOW] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
157
419
 
158
420
  # Update session-config.json with new pane ID
159
421
  if [[ -f "$SESSION_CONFIG" ]]; then
@@ -178,9 +440,13 @@ check_dependencies() {
178
440
  missing=1
179
441
  fi
180
442
 
181
- if ! command -v claude >/dev/null 2>&1; then
182
- log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
183
- missing=1
443
+ # claude required only when claude engine is used for Worker or Verifier execution;
444
+ # codex-only campaigns can run without claude generate_sv_report degrades gracefully
445
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
446
+ if ! command -v claude >/dev/null 2>&1; then
447
+ log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
448
+ missing=1
449
+ fi
184
450
  fi
185
451
 
186
452
  if ! command -v jq >/dev/null 2>&1; then
@@ -189,14 +455,9 @@ check_dependencies() {
189
455
  fi
190
456
 
191
457
  # Codex binary required only when engine=codex or consensus verification is enabled
192
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
458
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
193
459
  if ! command -v codex >/dev/null 2>&1; then
194
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
195
- log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
196
- else
197
- log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
198
- fi
199
- log_error "Install with: npm install -g @openai/codex"
460
+ log_error "codex CLI not found. Install: npm install -g @openai/codex"
200
461
  missing=1
201
462
  fi
202
463
  fi
@@ -205,52 +466,19 @@ check_dependencies() {
205
466
  exit 1
206
467
  fi
207
468
 
208
- # Resolve full path to claude binary for reliable launches
209
- CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
210
- log " Claude binary: $CLAUDE_BIN"
469
+ # Resolve full path to claude binary when claude engine is in use
470
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
471
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
472
+ log " Claude binary: $CLAUDE_BIN"
473
+ fi
211
474
 
212
475
  # Resolve codex binary if needed
213
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
476
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
214
477
  CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
215
478
  log " Codex binary: $CODEX_BIN"
216
479
  fi
217
480
  }
218
481
 
219
- # =============================================================================
220
- # Scaffold Validation
221
- # =============================================================================
222
-
223
- validate_scaffold() {
224
- local errors=0
225
-
226
- if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
227
- log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
228
- errors=1
229
- fi
230
-
231
- if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
232
- log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
233
- errors=1
234
- fi
235
-
236
- if [[ ! -f "$CONTEXT_FILE" ]]; then
237
- log_error "Context file not found: $CONTEXT_FILE"
238
- errors=1
239
- fi
240
-
241
- if [[ ! -f "$MEMORY_FILE" ]]; then
242
- log_error "Memory file not found: $MEMORY_FILE"
243
- errors=1
244
- fi
245
-
246
- if (( errors )); then
247
- log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
248
- exit 1
249
- fi
250
-
251
- mkdir -p "$LOGS_DIR"
252
- }
253
-
254
482
  # =============================================================================
255
483
  # Session Management (tmux pattern: pane IDs)
256
484
  # =============================================================================
@@ -300,15 +528,42 @@ create_session() {
300
528
 
301
529
  fi
302
530
 
531
+ # Set pane titles and enable border labels for visual distinction
532
+ local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
533
+ local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
534
+ [[ "$VERIFY_CONSENSUS" = "1" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + codex:$VERIFIER_CODEX_MODEL)"
535
+ tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
536
+ tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
537
+ tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
538
+ # Color-coded pane borders: green=leader, blue=worker, yellow=verifier
539
+ tmux set-option -p -t "$LEADER_PANE" pane-border-style "fg=green" 2>/dev/null
540
+ tmux set-option -p -t "$WORKER_PANE" pane-border-style "fg=blue" 2>/dev/null
541
+ tmux set-option -p -t "$VERIFIER_PANE" pane-border-style "fg=yellow" 2>/dev/null
542
+ # Show pane titles in border
543
+ tmux set-option pane-border-status top 2>/dev/null
544
+ tmux set-option pane-border-format "#{?pane_active,#[fg=white bold],#[fg=grey]} #{pane_title} " 2>/dev/null
545
+
303
546
  log " Leader pane: $LEADER_PANE"
304
547
  log " Worker pane: $WORKER_PANE"
305
548
  log " Verifier pane: $VERIFIER_PANE"
306
549
 
550
+ # AC12: Capture baseline commit before writing session config
551
+ BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
552
+
553
+ # Truncate cost-log for fresh run (previous data in versioned campaign reports)
554
+ > "$COST_LOG"
555
+
556
+ # SV flag warning for tmux mode
557
+ if (( WITH_SELF_VERIFICATION )); then
558
+ log " NOTE: --with-self-verification recorded but SV report generation is Agent-mode only"
559
+ fi
560
+
307
561
  # Write session config (atomic write)
308
562
  echo '{
309
563
  "session_name": "'"$SESSION_NAME"'",
310
564
  "slug": "'"$SLUG"'",
311
565
  "created_at": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",
566
+ "baseline_commit": "'"$BASELINE_COMMIT"'",
312
567
  "panes": {
313
568
  "leader": "'"$LEADER_PANE"'",
314
569
  "worker": "'"$WORKER_PANE"'",
@@ -340,7 +595,10 @@ create_session() {
340
595
  "heartbeat_stale_threshold": '"$HEARTBEAT_STALE_THRESHOLD"',
341
596
  "max_restarts": '"$MAX_RESTARTS"',
342
597
  "idle_nudge_threshold": '"$IDLE_NUDGE_THRESHOLD"',
343
- "max_nudges": '"$MAX_NUDGES"'
598
+ "max_nudges": '"$MAX_NUDGES"',
599
+ "cb_threshold": '"$CB_THRESHOLD"',
600
+ "effective_cb_threshold": '"$EFFECTIVE_CB_THRESHOLD"',
601
+ "with_self_verification": '"$WITH_SELF_VERIFICATION"'
344
602
  }
345
603
  }' | atomic_write "$SESSION_CONFIG"
346
604
 
@@ -366,6 +624,17 @@ check_copy_mode() {
366
624
  # Verification-Based Send Retry (tmux pattern)
367
625
  # =============================================================================
368
626
 
627
+ # --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
628
+ paste_to_pane() {
629
+ local pane_id="$1"
630
+ local text="$2"
631
+ local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
632
+ echo -n "$text" > "$tmpbuf"
633
+ tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
634
+ tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
635
+ rm -f "$tmpbuf"
636
+ }
637
+
369
638
  # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
370
639
  safe_send_keys() {
371
640
  local pane_id="$1"
@@ -403,9 +672,9 @@ safe_send_keys() {
403
672
  tmux send-keys -t "$pane_id" "2" Enter
404
673
  sleep 0.2
405
674
  fi
406
- # Send text in literal mode with -- separator
407
- log_debug " Sending text to pane $pane_id (${#text} chars)"
408
- tmux send-keys -t "$pane_id" -l -- "$text"
675
+ # Send text via buffer paste (reliable for long strings)
676
+ log_debug " Pasting text to pane $pane_id (${#text} chars)"
677
+ paste_to_pane "$pane_id" "$text"
409
678
 
410
679
  # Allow input buffer to settle (tmux: 150ms)
411
680
  sleep 0.15
@@ -415,9 +684,7 @@ safe_send_keys() {
415
684
  while (( round < 6 )); do
416
685
  sleep 0.1
417
686
  if (( round == 0 && pane_busy )); then
418
- # Busy pane: Tab+C-m queue semantics (tmux pattern)
419
- tmux send-keys -t "$pane_id" Tab
420
- sleep 0.08
687
+ # Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
421
688
  tmux send-keys -t "$pane_id" C-m
422
689
  else
423
690
  tmux send-keys -t "$pane_id" C-m
@@ -450,7 +717,7 @@ safe_send_keys() {
450
717
  if ! check_copy_mode "$pane_id"; then
451
718
  return 1
452
719
  fi
453
- tmux send-keys -t "$pane_id" -l -- "$text"
720
+ paste_to_pane "$pane_id" "$text"
454
721
  sleep 0.12
455
722
  local retry_round=0
456
723
  while (( retry_round < 4 )); do
@@ -598,12 +865,19 @@ check_and_nudge_idle_pane() {
598
865
  local now
599
866
  now=$(date +%s)
600
867
  if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
601
- local count=${(P)nudge_count_var}
602
- if (( count < MAX_NUDGES )); then
603
- log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
604
- safe_send_keys "$pane_id" ""
605
- (( count++ ))
606
- eval "$nudge_count_var=$count"
868
+ # A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
869
+ local _nudge_capture
870
+ _nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
871
+ if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
872
+ log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
873
+ else
874
+ local count=${(P)nudge_count_var}
875
+ if (( count < MAX_NUDGES )); then
876
+ log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
877
+ safe_send_keys "$pane_id" ""
878
+ (( count++ ))
879
+ eval "$nudge_count_var=$count"
880
+ fi
607
881
  fi
608
882
  fi
609
883
  else
@@ -621,6 +895,13 @@ restart_worker() {
621
895
  local pane_id="$1"
622
896
  local iter="$2"
623
897
  local trigger_file="$3"
898
+
899
+ # Codex workers are 1-shot exec; restart is not applicable
900
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
901
+ log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
902
+ return 1
903
+ fi
904
+
624
905
  local restart_count="${WORKER_RESTARTS[$iter]:-0}"
625
906
 
626
907
  if (( restart_count >= MAX_RESTARTS )); then
@@ -653,6 +934,25 @@ restart_worker() {
653
934
  # Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
654
935
  # =============================================================================
655
936
 
937
+ # Per-US PRD injection helper
938
+ # Substitutes the full PRD path with a per-US split path in the Worker prompt base.
939
+ # Falls back to the full PRD with a stderr warning if the split file is missing.
940
+ # Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
941
+ inject_per_us_prd() {
942
+ local prompt_base="$1"
943
+ local full_prd="$2"
944
+ local per_us_prd="${3:-}"
945
+
946
+ if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
947
+ sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
948
+ else
949
+ if [[ -n "$per_us_prd" ]]; then
950
+ echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
951
+ fi
952
+ cat "$prompt_base"
953
+ fi
954
+ }
955
+
656
956
  # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
657
957
  # NEVER send prompt content through tmux send-keys.
658
958
  # Write payloads to files, send only short trigger commands (<200 chars).
@@ -670,14 +970,31 @@ write_worker_trigger() {
670
970
  local prev_iter=$((iter - 1))
671
971
  local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
672
972
 
973
+ # Compute next unverified US before prompt assembly (required for per-US PRD injection)
974
+ local next_us=""
975
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
976
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
977
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
978
+ next_us="$us"
979
+ break
980
+ fi
981
+ done
982
+ fi
983
+
673
984
  {
674
- cat "$WORKER_PROMPT_BASE"
985
+ # Per-US PRD injection: substitute full PRD path with per-US split path when available
986
+ local per_us_prd=""
987
+ [[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
988
+ inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
675
989
  echo ""
676
990
  echo "---"
677
991
  echo "## Iteration Context"
678
992
  echo "- **Iteration**: $iter"
679
993
  echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
680
994
  echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
995
+ if (( _PRD_CHANGED )); then
996
+ echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
997
+ fi
681
998
 
682
999
  # Include fix contract if previous verifier failed
683
1000
  if [[ -f "$fix_contract_file" ]]; then
@@ -692,23 +1009,25 @@ write_worker_trigger() {
692
1009
 
693
1010
  # Per-US mode: tell Worker exactly which US to work on
694
1011
  if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
695
- # Find next unverified US
696
- local next_us=""
697
- for us in $(echo "$US_LIST" | tr ',' ' '); do
698
- if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
699
- next_us="$us"
700
- break
701
- fi
702
- done
703
-
704
1012
  if [[ -n "$next_us" ]]; then
705
1013
  echo ""
706
1014
  echo "---"
707
- echo "## PER-US SCOPE LOCK (this iteration)"
1015
+ echo "## PER-US SCOPE LOCK (this iteration) — OVERRIDES memory contract"
1016
+ echo "**IGNORE the 'Next Iteration Contract' from memory if it references a different story.**"
1017
+ echo "The Leader has determined that **${next_us}** is the next unverified story."
708
1018
  echo "You MUST implement ONLY **${next_us}** in this iteration."
709
1019
  echo "Do NOT implement any other user stories."
1020
+ # Per-US test-spec injection: point Worker to scoped test-spec if available
1021
+ local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
1022
+ if [[ -f "$per_us_test_spec" ]]; then
1023
+ echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
1024
+ else
1025
+ echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1026
+ fi
710
1027
  echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
711
1028
  echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
1029
+ echo ""
1030
+ echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
712
1031
  elif [[ -n "$VERIFIED_US" ]]; then
713
1032
  # All individual US verified — this is the final full verify iteration
714
1033
  echo ""
@@ -732,12 +1051,12 @@ write_worker_trigger() {
732
1051
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
733
1052
  # Engine-specific launch command (expanded at write time)
734
1053
  if [[ "$WORKER_ENGINE" = "codex" ]]; then
735
- local engine_cmd="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL \\
1054
+ local engine_cmd="${CODEX_BIN:-codex} exec \\
1055
+ -m $WORKER_CODEX_MODEL \\
736
1056
  -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
737
1057
  --dangerously-bypass-approvals-and-sandbox \\
738
- \"\$(cat $prompt_file)\" \\
739
- 2>&1 | tee $output_log"
740
- local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
1058
+ \"\$(cat $prompt_file)\""
1059
+ local engine_comment="# Run codex exec with fresh context (no pipe — codex requires terminal)"
741
1060
  else
742
1061
  local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
743
1062
  --model $WORKER_MODEL \\
@@ -868,106 +1187,6 @@ TRIGGER_EOF
868
1187
  log " Verifier trigger: $trigger_file"
869
1188
  }
870
1189
 
871
- # =============================================================================
872
- # Status Updates
873
- # =============================================================================
874
-
875
- # --- governance.md s7 step 8: Update status.json ---
876
- update_status() {
877
- local phase="$1"
878
- local last_result="$2"
879
-
880
- # Build verified_us as JSON array
881
- local verified_us_json="[]"
882
- if [[ -n "$VERIFIED_US" ]]; then
883
- verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
884
- fi
885
-
886
- # Build consensus fields
887
- local consensus_json=""
888
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
889
- consensus_json=',
890
- "consensus_scope": "'"$CONSENSUS_SCOPE"'",
891
- "consensus_round": '"$CONSENSUS_ROUND"',
892
- "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
893
- "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
894
- fi
895
-
896
- echo '{
897
- "slug": "'"$SLUG"'",
898
- "iteration": '"$ITERATION"',
899
- "max_iter": '"$MAX_ITER"',
900
- "phase": "'"$phase"'",
901
- "worker_model": "'"$WORKER_MODEL"'",
902
- "verifier_model": "'"$VERIFIER_MODEL"'",
903
- "worker_engine": "'"$WORKER_ENGINE"'",
904
- "verifier_engine": "'"$VERIFIER_ENGINE"'",
905
- "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
906
- "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
907
- "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
908
- "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
909
- "verify_mode": "'"$VERIFY_MODE"'",
910
- "verify_consensus": '"$VERIFY_CONSENSUS"',
911
- "last_result": "'"$last_result"'",
912
- "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
913
- "verified_us": '"$verified_us_json"''"$consensus_json"',
914
- "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
915
- }' | atomic_write "$STATUS_FILE"
916
- }
917
-
918
- # --- governance.md s7 step 8: Write result log ---
919
- write_result_log() {
920
- local iter="$1"
921
- local result="$2"
922
- local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
923
-
924
- local git_diff=""
925
- git_diff=$(git diff --stat HEAD~1 HEAD 2>/dev/null || echo "(no git diff available)")
926
-
927
- {
928
- echo "# Iteration $iter Result"
929
- echo ""
930
- echo "## Status"
931
- echo "$result [leader-measured]"
932
- echo ""
933
- echo "## Files Changed"
934
- echo '```'
935
- echo "$git_diff"
936
- echo '```'
937
- echo "[git-measured]"
938
- echo ""
939
- echo "## Timestamp"
940
- echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
941
- } | atomic_write "$result_file"
942
- }
943
-
944
- # =============================================================================
945
- # Sentinel Writers
946
- # =============================================================================
947
-
948
- # --- governance.md s7: Only the Leader writes sentinels ---
949
- write_complete_sentinel() {
950
- local summary="$1"
951
- echo "# Campaign Complete
952
-
953
- Completed at iteration $ITERATION.
954
- $summary
955
-
956
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
957
- log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
958
- }
959
-
960
- write_blocked_sentinel() {
961
- local reason="$1"
962
- echo "# Campaign Blocked
963
-
964
- Blocked at iteration $ITERATION.
965
- Reason: $reason
966
-
967
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
968
- log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
969
- }
970
-
971
1190
  # =============================================================================
972
1191
  # Cleanup (trap handler)
973
1192
  # =============================================================================
@@ -976,7 +1195,11 @@ cleanup() {
976
1195
  log "Cleaning up..."
977
1196
 
978
1197
  # Remove lockfile
979
- rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
1198
+ if (( LOCKFILE_ACQUIRED )); then
1199
+ rm -f "$LOCKFILE_PATH" 2>/dev/null
1200
+ else
1201
+ log_debug "cleanup: lockfile not owned by this process, skipping removal"
1202
+ fi
980
1203
 
981
1204
  # Kill claude processes then kill panes
982
1205
  log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
@@ -1002,6 +1225,12 @@ cleanup() {
1002
1225
  setopt local_options nonomatch 2>/dev/null
1003
1226
  rm -f "$LOGS_DIR"/*.tmp.* "$MEMOS_DIR"/*.tmp.* 2>/dev/null
1004
1227
 
1228
+ # AC4: Generate campaign report on all terminal states (always-on)
1229
+ generate_campaign_report
1230
+
1231
+ # US-001: Generate SV report after campaign report (tmux mode)
1232
+ generate_sv_report
1233
+
1005
1234
  # Print summary
1006
1235
  local end_time
1007
1236
  end_time=$(date +%s)
@@ -1014,17 +1243,24 @@ cleanup() {
1014
1243
  elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
1015
1244
  else final_status="TIMEOUT"; fi
1016
1245
 
1246
+ # --- Update metadata.json with final status ---
1247
+ if [[ -f "$METADATA_FILE" ]]; then
1248
+ jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1249
+ '.campaign_status = $status | .end_time = $end_time' \
1250
+ "$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
1251
+ fi
1252
+
1017
1253
  if (( DEBUG )); then
1018
1254
  local end_ts=$(date +%s)
1019
1255
  local elapsed=$((end_ts - START_TIME))
1020
1256
 
1021
- log_debug "[EXEC] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
1257
+ log_debug "[FLOW] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
1022
1258
 
1023
1259
  # --- Validation ---
1024
- log_debug "[VALIDATE] === Execution Validation ==="
1260
+ log_debug "[FLOW] === Execution Validation ==="
1025
1261
 
1026
1262
  # 1. Did the correct verify mode run?
1027
- log_debug "[VALIDATE] verify_mode=$VERIFY_MODE configured=true"
1263
+ log_debug "[FLOW] verify_mode=$VERIFY_MODE configured=true"
1028
1264
 
1029
1265
  # 2. Per-US: were all US individually verified?
1030
1266
  if [[ "$VERIFY_MODE" = "per-us" ]]; then
@@ -1038,39 +1274,39 @@ cleanup() {
1038
1274
 
1039
1275
  if [[ "$final_status" = "COMPLETE" ]]; then
1040
1276
  if (( verified_count >= expected_count )); then
1041
- log_debug "[VALIDATE] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
1277
+ log_debug "[FLOW] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
1042
1278
  else
1043
- log_debug "[VALIDATE] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
1279
+ log_debug "[FLOW] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
1044
1280
  fi
1045
1281
  else
1046
- log_debug "[VALIDATE] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
1282
+ log_debug "[FLOW] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
1047
1283
  fi
1048
1284
  fi
1049
1285
 
1050
1286
  # 3. Consensus: were both engines used?
1051
1287
  if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1052
1288
  if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
1053
- log_debug "[VALIDATE] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1289
+ log_debug "[FLOW] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1054
1290
  else
1055
- log_debug "[VALIDATE] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1291
+ log_debug "[FLOW] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1056
1292
  fi
1057
1293
  fi
1058
1294
 
1059
1295
  # 4. Engine match: did the configured engines actually run?
1060
- local worker_dispatches=$(grep -c '\[EXEC\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1061
- local verifier_dispatches=$(grep -c '\[EXEC\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1062
- log_debug "[VALIDATE] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
1296
+ local worker_dispatches=$(grep -c '\[FLOW\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1297
+ local verifier_dispatches=$(grep -c '\[FLOW\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1298
+ log_debug "[FLOW] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
1063
1299
 
1064
1300
  # 5. Fix loops: how many fix contracts were generated?
1065
- local fix_count=$(grep -c '\[EXEC\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
1066
- log_debug "[VALIDATE] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
1301
+ local fix_count=$(grep -c '\[DECIDE\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
1302
+ log_debug "[FLOW] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
1067
1303
 
1068
1304
  # 6. Circuit breakers: any triggered?
1069
- local cb_count=$(grep -c '\[EXEC\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
1070
- log_debug "[VALIDATE] circuit_breakers_triggered=$cb_count"
1305
+ local cb_count=$(grep -c '\[GOV\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
1306
+ log_debug "[FLOW] circuit_breakers_triggered=$cb_count"
1071
1307
 
1072
1308
  # 7. Overall result
1073
- log_debug "[VALIDATE] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
1309
+ log_debug "[FLOW] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
1074
1310
  fi
1075
1311
 
1076
1312
  echo ""
@@ -1110,6 +1346,7 @@ poll_for_signal() {
1110
1346
  local trigger_file="$4"
1111
1347
  local role="$5" # "worker" or "verifier"
1112
1348
  local nudge_count=0
1349
+ local api_retry_count=0
1113
1350
  local poll_start
1114
1351
  poll_start=$(date +%s)
1115
1352
 
@@ -1134,6 +1371,54 @@ poll_for_signal() {
1134
1371
  return 0 # success
1135
1372
  fi
1136
1373
 
1374
+ # A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
1375
+ # ONLY for Worker polling — Verifier waits for verdict file, not done-claim
1376
+ if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
1377
+ local dc_us_id
1378
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
1379
+ if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
1380
+ log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
1381
+ log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
1382
+ echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
1383
+ return 0
1384
+ fi
1385
+ fi
1386
+
1387
+ # API transient-error recovery with bounded backoff
1388
+ local pane_output_for_retry
1389
+ pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
1390
+ local is_api_text_retry=0
1391
+ if [[ -n "$pane_output_for_retry" ]] &&
1392
+ ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
1393
+ || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
1394
+ || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
1395
+ || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
1396
+ || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
1397
+ is_api_text_retry=1
1398
+ fi
1399
+
1400
+ if (( is_api_text_retry )) || is_api_error "$pane_id"; then
1401
+ (( api_retry_count++ ))
1402
+ log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
1403
+ if (( api_retry_count >= _API_MAX_RETRIES )); then
1404
+ log_error "API unavailable after ${_API_MAX_RETRIES} retries"
1405
+ write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
1406
+ return 2
1407
+ fi
1408
+ # A5: If pane shows "queued messages" or rate-limit corruption, restart pane
1409
+ if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
1410
+ log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
1411
+ log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
1412
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
1413
+ tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null; sleep 2
1414
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
1415
+ fi
1416
+ sleep "$_API_RETRY_INTERVAL_S"
1417
+ continue
1418
+ else
1419
+ api_retry_count=0
1420
+ fi
1421
+
1137
1422
  # Check heartbeat freshness (tmux pattern)
1138
1423
  if [[ -f "$heartbeat_file" ]]; then
1139
1424
  if check_heartbeat_exited "$heartbeat_file"; then
@@ -1143,9 +1428,13 @@ poll_for_signal() {
1143
1428
  log " Signal file detected after process exit: $signal_file"
1144
1429
  return 0
1145
1430
  fi
1146
- log_error "$role exited without writing signal file"
1147
- # Attempt restart with exponential backoff
1148
- if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
1431
+ # Dispatch to engine-specific exit handler
1432
+ if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
1433
+ handle_worker_exit_codex "$ITERATION" "$signal_file"
1434
+ return 0
1435
+ fi
1436
+ # Claude path (or verifier of any engine)
1437
+ if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
1149
1438
  # Reset poll timer for the restart
1150
1439
  poll_start=$(date +%s)
1151
1440
  nudge_count=0
@@ -1163,7 +1452,7 @@ poll_for_signal() {
1163
1452
  (( HEARTBEAT_STALE_COUNT++ ))
1164
1453
  # Circuit breaker: 3 consecutive heartbeat stale events
1165
1454
  if (( HEARTBEAT_STALE_COUNT >= 3 )); then
1166
- log_debug "[EXEC] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
1455
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
1167
1456
  log_error "Circuit breaker: 3 consecutive heartbeat stale events"
1168
1457
  return 1
1169
1458
  fi
@@ -1181,12 +1470,23 @@ poll_for_signal() {
1181
1470
  fi
1182
1471
  fi
1183
1472
 
1473
+ # Dead pane detection during poll: check if claude/codex process died
1474
+ local poll_cmd
1475
+ poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
1476
+ # Dead pane detection — delegates to check_dead_pane() for engine-aware logic
1477
+ if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
1478
+ log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
1479
+ log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
1480
+ # Return failure so caller can handle recovery
1481
+ return 1
1482
+ fi
1483
+
1184
1484
  # Auto-approve permission prompts during poll
1185
1485
  local poll_capture
1186
1486
  poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
1187
1487
  if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1188
1488
  log " Permission prompt detected during poll, auto-approving..."
1189
- log_debug "[EXEC] iter=$ITERATION permission_prompt_auto_approved=true"
1489
+ log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
1190
1490
  tmux send-keys -t "$pane_id" Enter
1191
1491
  sleep 0.5
1192
1492
  fi
@@ -1198,38 +1498,6 @@ poll_for_signal() {
1198
1498
  done
1199
1499
  }
1200
1500
 
1201
- # =============================================================================
1202
- # Circuit Breaker: Stale Context Detection
1203
- # =============================================================================
1204
-
1205
- # --- governance.md s7 step 8: Stale context detection ---
1206
- compute_context_hash() {
1207
- if [[ -f "$CONTEXT_FILE" ]]; then
1208
- md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
1209
- else
1210
- echo "no-context"
1211
- fi
1212
- }
1213
-
1214
- check_stale_context() {
1215
- local current_hash
1216
- current_hash=$(compute_context_hash)
1217
-
1218
- if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
1219
- (( STALE_CONTEXT_COUNT++ ))
1220
- log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
1221
- if (( STALE_CONTEXT_COUNT >= 3 )); then
1222
- log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
1223
- return 1
1224
- fi
1225
- else
1226
- STALE_CONTEXT_COUNT=0
1227
- fi
1228
-
1229
- PREV_CONTEXT_HASH="$current_hash"
1230
- return 0
1231
- }
1232
-
1233
1501
  # =============================================================================
1234
1502
  # Consensus Verification (run two verifiers sequentially in same pane)
1235
1503
  # =============================================================================
@@ -1247,10 +1515,23 @@ run_single_verifier() {
1247
1515
  local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1248
1516
  local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1249
1517
 
1250
- # Clean previous Verifier session
1518
+ # Clean previous Verifier session (with dead pane detection)
1251
1519
  local verifier_cmd
1252
1520
  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1253
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1521
+ if [[ -z "$verifier_cmd" ]]; then
1522
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
1523
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
1524
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
1525
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
1526
+ log " New verifier pane: $VERIFIER_PANE"
1527
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
1528
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
1529
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
1530
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
1531
+ sleep 0.2
1532
+ tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
1533
+ sleep 0.3
1534
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1254
1535
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1255
1536
  sleep 0.5
1256
1537
  tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
@@ -1265,55 +1546,19 @@ run_single_verifier() {
1265
1546
  # Remove previous verdict file
1266
1547
  rm -f "$VERDICT_FILE" 2>/dev/null
1267
1548
 
1268
- # Launch verifier
1549
+ # Launch verifier — dispatch to engine-specific function
1550
+ local verifier_launch
1269
1551
  if [[ "$engine" = "codex" ]]; then
1270
- # Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
1271
- local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1272
- log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
1273
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
1274
- tmux send-keys -t "$VERIFIER_PANE" Enter
1275
- log_debug "Verifier$suffix codex exec sent directly"
1552
+ verifier_launch="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1553
+ launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
1554
+ log_debug "Verifier$suffix codex exec dispatched"
1276
1555
  else
1277
- # Claude: use interactive TUI
1278
- local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1279
- log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
1280
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1281
- tmux send-keys -t "$VERIFIER_PANE" Enter
1282
-
1283
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1556
+ verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1557
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
1284
1558
  log_error "Verifier$suffix failed to start"
1285
1559
  return 1
1286
1560
  fi
1287
-
1288
- sleep 3
1289
- local verifier_instruction="Read and execute the instructions in $prompt_file"
1290
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1291
- tmux send-keys -t "$VERIFIER_PANE" Enter
1292
- log_debug "Verifier$suffix instruction sent directly"
1293
-
1294
- # Verify claude actually started working
1295
- local v_submit=0
1296
- while (( v_submit < 15 )); do
1297
- sleep 2
1298
- local v_check
1299
- v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1300
- if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
1301
- log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
1302
- break
1303
- fi
1304
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1305
- if (( v_submit == 8 )); then
1306
- log_debug "Adaptive instruction retry: clearing line and re-typing"
1307
- tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
1308
- sleep 0.1
1309
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1310
- tmux send-keys -t "$VERIFIER_PANE" Enter
1311
- fi
1312
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1313
- sleep 0.3
1314
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1315
- (( v_submit++ ))
1316
- done
1561
+ log_debug "Verifier$suffix claude dispatched"
1317
1562
  fi
1318
1563
 
1319
1564
  # Poll for verdict
@@ -1341,6 +1586,10 @@ run_single_verifier() {
1341
1586
  # Claude: use full poll_for_signal with heartbeat/nudge
1342
1587
  log " Polling for verify-verdict.json ($suffix)..."
1343
1588
  if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
1589
+ local verifier_poll_rc=$?
1590
+ if (( verifier_poll_rc == 2 )); then
1591
+ return 1
1592
+ fi
1344
1593
  log_error "Verifier$suffix poll failed"
1345
1594
  return 1
1346
1595
  fi
@@ -1352,6 +1601,110 @@ run_single_verifier() {
1352
1601
  return 0
1353
1602
  }
1354
1603
 
1604
+ # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
1605
+ # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
1606
+ # Sets FAILED_US global on failure.
1607
+ run_sequential_final_verify() {
1608
+ local iter="$1"
1609
+ FAILED_US=""
1610
+
1611
+ log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
1612
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
1613
+
1614
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
1615
+ log " Final verify: checking $us..."
1616
+
1617
+ # Temporarily override signal file to scope verifier to this US
1618
+ local orig_signal
1619
+ orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
1620
+ echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
1621
+
1622
+ # Write scoped verifier trigger
1623
+ write_verifier_trigger "$iter"
1624
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
1625
+
1626
+ # Clean verifier pane
1627
+ local verifier_cmd
1628
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1629
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1630
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
1631
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null; sleep 2
1632
+ fi
1633
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1634
+
1635
+ # Launch verifier
1636
+ local verifier_launch
1637
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1638
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1639
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
1640
+ else
1641
+ verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1642
+ launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
1643
+ log_error "Failed to launch verifier for $us"
1644
+ FAILED_US="$us"
1645
+ return 1
1646
+ }
1647
+ fi
1648
+
1649
+ # Poll for verdict
1650
+ rm -f "$VERDICT_FILE"
1651
+ local poll_rc=0
1652
+ poll_for_signal "$VERDICT_FILE" "$ITER_TIMEOUT" "verdict" || poll_rc=$?
1653
+ if (( poll_rc != 0 )); then
1654
+ log_error "Verifier poll failed for $us (rc=$poll_rc)"
1655
+ FAILED_US="$us"
1656
+ return 1
1657
+ fi
1658
+
1659
+ # Check verdict
1660
+ local verdict
1661
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
1662
+ if [[ "$verdict" != "pass" ]]; then
1663
+ FAILED_US="$us"
1664
+ log " Sequential final verify FAILED at $us"
1665
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
1666
+ return 1
1667
+ fi
1668
+ log " Sequential final verify: $us PASSED"
1669
+
1670
+ # Archive per-US final verdict
1671
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
1672
+ done
1673
+
1674
+ # Integration check: run tests if VERIFICATION_CMD is set
1675
+ if [[ -n "${VERIFICATION_CMD:-}" ]]; then
1676
+ log " Running integration test suite after sequential verify..."
1677
+ log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
1678
+ if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
1679
+ log " Integration test suite FAILED"
1680
+ FAILED_US="integration"
1681
+ return 2
1682
+ fi
1683
+ log " Integration test suite PASSED"
1684
+ fi
1685
+
1686
+ log " Sequential final verify: ALL PASSED"
1687
+ return 0
1688
+ }
1689
+
1690
+ # --- US-005: Determine whether consensus verification should run for this signal ---
1691
+ # Returns 0 (use consensus) or 1 (single engine).
1692
+ # VERIFY_CONSENSUS + CONSENSUS_SCOPE handles per-US consensus.
1693
+ # FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
1694
+ _should_use_consensus() {
1695
+ local signal_us_id="${1:-}"
1696
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1697
+ case "$CONSENSUS_SCOPE" in
1698
+ all) return 0 ;;
1699
+ final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
1700
+ esac
1701
+ fi
1702
+ if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
1703
+ return 0
1704
+ fi
1705
+ return 1
1706
+ }
1707
+
1355
1708
  # --- US-004: Run consensus verification (claude + codex sequentially) ---
1356
1709
  run_consensus_verification() {
1357
1710
  local iter="$1"
@@ -1362,32 +1715,59 @@ run_consensus_verification() {
1362
1715
  CLAUDE_VERDICT=""
1363
1716
  CODEX_VERDICT=""
1364
1717
 
1365
- while (( CONSENSUS_ROUND < 3 )); do
1718
+ while (( CONSENSUS_ROUND < 6 )); do
1366
1719
  (( CONSENSUS_ROUND++ ))
1367
- log " Consensus round $CONSENSUS_ROUND/3..."
1720
+ log " Consensus round $CONSENSUS_ROUND/6..."
1368
1721
 
1369
1722
  # Run claude verifier first
1723
+ local _claude_t0=$(date +%s)
1370
1724
  if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1371
1725
  log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
1372
1726
  return 1
1373
1727
  fi
1728
+ ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
1374
1729
  CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1375
- log_debug "[EXEC] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1730
+ # A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
1731
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1732
+ log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
1733
+ log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
1734
+ rm -f "$claude_verdict_file" 2>/dev/null
1735
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1736
+ log_error "Claude verifier retry also failed"
1737
+ return 1
1738
+ fi
1739
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1740
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1741
+ log_error "Claude verdict still null after retry — consensus cannot proceed"
1742
+ return 1
1743
+ fi
1744
+ fi
1745
+ log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1746
+
1747
+ # F8: --consensus-fail-fast — skip second verifier if first fails
1748
+ if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
1749
+ log " Consensus fail-fast: claude=fail, skipping codex verifier"
1750
+ log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
1751
+ CODEX_VERDICT="skipped"
1752
+ return 2 # disagreement/fail signal
1753
+ fi
1376
1754
 
1377
1755
  # Run codex verifier second
1756
+ local _codex_t0=$(date +%s)
1378
1757
  if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
1379
1758
  log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
1380
1759
  return 1
1381
1760
  fi
1761
+ ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
1382
1762
  CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
1383
- log_debug "[EXEC] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
1763
+ log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
1384
1764
 
1385
1765
  log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
1386
1766
  local _combined_action="retry"
1387
1767
  if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
1388
- elif (( CONSENSUS_ROUND >= 3 )); then _combined_action="blocked"
1768
+ elif (( CONSENSUS_ROUND >= 6 )); then _combined_action="blocked"
1389
1769
  fi
1390
- log_debug "[EXEC] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
1770
+ log_debug "[GOV] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
1391
1771
 
1392
1772
  # Both pass → success
1393
1773
  if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
@@ -1409,7 +1789,7 @@ run_consensus_verification() {
1409
1789
  fi
1410
1790
 
1411
1791
  # Consensus disagreement
1412
- log_debug "[EXEC] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
1792
+ log_debug "[GOV] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
1413
1793
 
1414
1794
  # NOTE: pre_existing_failure heuristic was removed (v0.3.5).
1415
1795
  # It used unreliable grep-in-description string matching to classify
@@ -1442,14 +1822,19 @@ run_consensus_verification() {
1442
1822
 
1443
1823
  # If this is not the last round, the caller will dispatch the Worker with the fix contract
1444
1824
  # For now, write a fail verdict so the main loop can handle the fix loop
1445
- if (( CONSENSUS_ROUND < 3 )); then
1446
- # Create a merged fail verdict for the main loop
1825
+ if (( CONSENSUS_ROUND < 6 )); then
1826
+ # Create a merged fail verdict for the main loop — include issues from BOTH verdicts
1827
+ local merged_issues="[]"
1828
+ local claude_issues codex_issues
1829
+ claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
1830
+ codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
1831
+ merged_issues=$(echo "$claude_issues $codex_issues" | jq -s 'add // []')
1447
1832
  {
1448
1833
  echo '{'
1449
1834
  echo ' "verdict": "fail",'
1450
1835
  echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1451
- echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/3): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1452
- echo ' "issues": [],'
1836
+ echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/6): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1837
+ echo ' "issues": '"$merged_issues"','
1453
1838
  echo ' "recommended_state_transition": "continue",'
1454
1839
  echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
1455
1840
  echo '}'
@@ -1458,56 +1843,91 @@ run_consensus_verification() {
1458
1843
  fi
1459
1844
  done
1460
1845
 
1461
- # Max consensus rounds exceeded
1462
- log_error "Consensus failed after 3 rounds"
1846
+ # Max consensus rounds exceeded — include issues from both verdicts
1847
+ log_error "Consensus failed after 6 rounds"
1848
+ local final_claude_issues final_codex_issues final_merged_issues
1849
+ final_claude_issues=$(jq -c '[.issues[]? | . + {"source": "claude"}]' "$claude_verdict_file" 2>/dev/null || echo '[]')
1850
+ final_codex_issues=$(jq -c '[.issues[]? | . + {"source": "codex"}]' "$codex_verdict_file" 2>/dev/null || echo '[]')
1851
+ final_merged_issues=$(echo "$final_claude_issues $final_codex_issues" | jq -s 'add // []')
1463
1852
  {
1464
1853
  echo '{'
1465
1854
  echo ' "verdict": "fail",'
1466
1855
  echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1467
- echo ' "summary": "Consensus failed after 3 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1468
- echo ' "issues": [],'
1856
+ echo ' "summary": "Consensus failed after 6 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1857
+ echo ' "issues": '"$final_merged_issues"','
1469
1858
  echo ' "recommended_state_transition": "blocked",'
1470
- echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 3 }'
1859
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 6 }'
1471
1860
  echo '}'
1472
1861
  } | atomic_write "$VERDICT_FILE"
1473
1862
  return 1
1474
1863
  }
1475
1864
 
1476
- # =============================================================================
1477
- # Security Warning
1478
- # =============================================================================
1479
-
1480
- print_security_warning() {
1481
- echo ""
1482
- echo "================================================================"
1483
- echo " WARNING: Running with --dangerously-skip-permissions"
1484
- echo ""
1485
- echo " The claude CLI will execute tools (file writes, shell commands)"
1486
- echo " without asking for confirmation. Only run this on code you"
1487
- echo " trust in an environment you control."
1488
- echo "================================================================"
1489
- echo ""
1490
- }
1491
-
1492
1865
  # =============================================================================
1493
1866
  # Main Leader Loop
1494
1867
  # =============================================================================
1495
1868
 
1496
1869
  main() {
1497
1870
  # --- Lockfile: prevent duplicate execution ---
1498
- local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
1871
+ local lockfile="$LOCKFILE_PATH"
1499
1872
  mkdir -p "$(dirname "$lockfile")" 2>/dev/null
1500
1873
  if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
1501
1874
  local lock_pid
1502
1875
  lock_pid=$(cat "$lockfile" 2>/dev/null)
1503
1876
  if kill -0 "$lock_pid" 2>/dev/null; then
1504
- log_error "Another instance is already running (PID $lock_pid)"
1877
+ log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
1505
1878
  exit 1
1506
1879
  fi
1507
1880
  # Stale lock — overwrite
1881
+ log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
1508
1882
  echo $$ > "$lockfile"
1883
+ LOCKFILE_ACQUIRED=1
1884
+ else
1885
+ LOCKFILE_ACQUIRED=1
1886
+ fi
1887
+ trap cleanup EXIT INT TERM
1888
+ mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
1889
+
1890
+ # --- Analytics directory: create only when --debug or --with-self-verification ---
1891
+ if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1892
+ mkdir -p "$ANALYTICS_DIR" 2>/dev/null
1893
+ fi
1894
+
1895
+ # --- debug.log versioning (in analytics dir) ---
1896
+ if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
1897
+ local dbg_n=1
1898
+ while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
1899
+ (( dbg_n++ ))
1900
+ done
1901
+ mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
1902
+ fi
1903
+
1904
+ # --- campaign.jsonl versioning (in analytics dir, after mkdir) ---
1905
+ if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1906
+ if [[ -f "$CAMPAIGN_JSONL" ]]; then
1907
+ local cj_n=1
1908
+ while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
1909
+ (( cj_n++ ))
1910
+ done
1911
+ mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
1912
+ fi
1913
+ fi
1914
+
1915
+ # --- metadata.json: write at campaign start ---
1916
+ if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1917
+ jq -n \
1918
+ --arg slug "$SLUG" \
1919
+ --arg project_root "$ROOT" \
1920
+ --arg campaign_status "running" \
1921
+ --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1922
+ --arg end_time "" \
1923
+ --arg worker_model "$WORKER_MODEL" \
1924
+ --arg verifier_model "$VERIFIER_MODEL" \
1925
+ --argjson debug "$DEBUG" \
1926
+ --argjson with_sv "$WITH_SELF_VERIFICATION" \
1927
+ --argjson consensus "$VERIFY_CONSENSUS" \
1928
+ '{slug: $slug, project_root: $project_root, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
1929
+ > "$METADATA_FILE"
1509
1930
  fi
1510
- mkdir -p "$LOGS_DIR" 2>/dev/null
1511
1931
 
1512
1932
  # --- Startup ---
1513
1933
  log "Ralph Desk Tmux Runner starting..."
@@ -1518,6 +1938,7 @@ main() {
1518
1938
  log " Verifier model: $VERIFIER_MODEL"
1519
1939
  log " Verify mode: $VERIFY_MODE"
1520
1940
  log " Verify consensus:$VERIFY_CONSENSUS"
1941
+ log " Final consensus: $FINAL_CONSENSUS"
1521
1942
  log " Consensus scope: $CONSENSUS_SCOPE"
1522
1943
  log " Poll interval: ${POLL_INTERVAL}s"
1523
1944
  log " Iter timeout: ${ITER_TIMEOUT}s"
@@ -1531,10 +1952,11 @@ main() {
1531
1952
  fi
1532
1953
  local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
1533
1954
 
1534
- log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
1535
- log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
1536
- log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
1537
- log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
1955
+ log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
1956
+ log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
1957
+ log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
1958
+ log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
1959
+ log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
1538
1960
 
1539
1961
  if [[ "$VERIFY_MODE" = "per-us" ]]; then
1540
1962
  # Build expected flow
@@ -1543,13 +1965,13 @@ main() {
1543
1965
  expected_flow="${expected_flow}worker->verify($us)->"
1544
1966
  done
1545
1967
  expected_flow="${expected_flow}verify(ALL)->COMPLETE"
1546
- log_debug "[PLAN] expected_flow=$expected_flow"
1968
+ log_debug "[OPTION] expected_flow=$expected_flow"
1547
1969
  else
1548
- log_debug "[PLAN] expected_flow=worker(all)->verify(ALL)->COMPLETE"
1970
+ log_debug "[OPTION] expected_flow=worker(all)->verify(ALL)->COMPLETE"
1549
1971
  fi
1550
1972
 
1551
1973
  if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1552
- log_debug "[PLAN] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
1974
+ log_debug "[OPTION] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
1553
1975
  fi
1554
1976
  fi
1555
1977
 
@@ -1559,8 +1981,35 @@ main() {
1559
1981
  if [[ -f "$prd_file" ]]; then
1560
1982
  US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1561
1983
  fi
1984
+
1985
+ # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
1986
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
1987
+ if [[ -f "$memory_file" ]]; then
1988
+ local completed_us
1989
+ completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
1990
+ if [[ -n "$completed_us" ]]; then
1991
+ VERIFIED_US="$completed_us"
1992
+ log " Loaded completed stories from memory: $VERIFIED_US"
1993
+ log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
1994
+ fi
1995
+ fi
1996
+
1997
+ # D1: Fallback — restore verified_us from status.json if memory had none
1998
+ if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
1999
+ local status_verified
2000
+ status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
2001
+ if [[ -n "$status_verified" ]]; then
2002
+ VERIFIED_US="$status_verified"
2003
+ log " Restored verified_us from status.json: $VERIFIED_US"
2004
+ log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
2005
+ fi
2006
+ fi
1562
2007
  fi
1563
2008
 
2009
+ # Initialize PRD snapshot state for live update detection
2010
+ PREV_PRD_HASH=$(compute_prd_hash)
2011
+ PREV_PRD_US_LIST=$(count_prd_us)
2012
+
1564
2013
  # Dependency checks
1565
2014
  check_dependencies
1566
2015
 
@@ -1583,7 +2032,7 @@ main() {
1583
2032
  PREV_CONTEXT_HASH=$(compute_context_hash)
1584
2033
 
1585
2034
  # --- governance.md s7: Leader Loop ---
1586
- local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # absolute max per iteration (no extensions beyond this)
2035
+ local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced Worker extends indefinitely when active
1587
2036
 
1588
2037
  for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
1589
2038
  log ""
@@ -1592,7 +2041,7 @@ main() {
1592
2041
  ITER_START_TIME=$(date +%s)
1593
2042
  local _iter_contract=""
1594
2043
  _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
1595
- log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
2044
+ log_debug "[FLOW] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
1596
2045
 
1597
2046
  # --- governance.md s7 step 1: Check sentinels ---
1598
2047
  if [[ -f "$COMPLETE_SENTINEL" ]]; then
@@ -1625,122 +2074,92 @@ main() {
1625
2074
  # Reset per-iteration state
1626
2075
  local worker_nudge_count=0
1627
2076
  local verifier_nudge_count=0
2077
+ ITER_VERIFIER_START=""
2078
+ ITER_VERIFIER_END=""
2079
+
2080
+ # --- US-004: detect PRD changes for live update + re-split ---
2081
+ check_prd_update
1628
2082
 
1629
2083
  # --- governance.md s7 step 4: Build worker prompt + trigger ---
1630
2084
  write_worker_trigger "$ITERATION"
1631
2085
  local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
1632
2086
 
2087
+ # AC1: capture worker start timestamp
2088
+ ITER_WORKER_START=$(date +%s)
2089
+
1633
2090
  update_status "worker" "running"
1634
2091
 
1635
- # --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
1636
- # Step 5a: Launch interactive worker engine in Worker pane
2092
+ # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
2093
+ log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
2094
+
1637
2095
  local worker_launch
1638
2096
  if [[ "$WORKER_ENGINE" = "codex" ]]; then
1639
- worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1640
- log " Launching Worker codex in pane $WORKER_PANE..."
2097
+ local worker_trigger="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-trigger.sh"
2098
+ worker_launch="bash $worker_trigger"
2099
+ launch_worker_codex "$WORKER_PANE" "$worker_trigger" "$ITERATION"
1641
2100
  else
1642
2101
  worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1643
- log " Launching Worker claude in pane $WORKER_PANE..."
1644
- fi
1645
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
1646
- tmux send-keys -t "$WORKER_PANE" Enter
1647
- log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
1648
-
1649
- # Step 5b: Wait for claude TUI to be ready (tmux pattern)
1650
- if ! wait_for_pane_ready "$WORKER_PANE" 30; then
1651
- log_error "Worker claude failed to start"
1652
- write_blocked_sentinel "Worker claude failed to start in pane"
1653
- update_status "blocked" "worker_start_failed"
1654
- return 1
1655
- fi
1656
-
1657
- # Step 5c: Wait for claude to fully initialize, then send instruction directly
1658
- sleep 3
1659
- local worker_instruction="Read and execute the instructions in $worker_prompt"
1660
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1661
- tmux send-keys -t "$WORKER_PANE" Enter
1662
- log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
1663
-
1664
- # Verify claude actually started working — keep sending C-m until activity detected
1665
- local submit_attempts=0
1666
- while (( submit_attempts < 15 )); do
1667
- sleep 2
1668
- local pane_check
1669
- pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
1670
- if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1671
- log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
1672
- log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
1673
- break
1674
- fi
1675
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1676
- if (( submit_attempts == 8 )); then
1677
- log_debug "Adaptive instruction retry: clearing line and re-typing"
1678
- tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
1679
- sleep 0.1
1680
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1681
- tmux send-keys -t "$WORKER_PANE" Enter
2102
+ if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2103
+ write_blocked_sentinel "Worker claude failed to start in pane"
2104
+ update_status "blocked" "worker_start_failed"
2105
+ return 1
1682
2106
  fi
1683
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1684
- sleep 0.3
1685
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1686
- (( submit_attempts++ ))
1687
- done
1688
- if (( submit_attempts >= 15 )); then
1689
- log " WARNING: Could not confirm Worker started working after 15 attempts"
1690
- log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=15"
1691
2107
  fi
1692
2108
 
1693
2109
  # --- governance.md s7 step 5+6: Poll for Worker completion ---
1694
2110
  log " Polling for iter-signal.json..."
1695
2111
  local worker_poll_done=0
1696
2112
  while (( ! worker_poll_done )); do
2113
+ local worker_poll_rc=0
1697
2114
  if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1698
2115
  worker_poll_done=1
1699
- log_debug "[EXEC] iter=$ITERATION poll_signal_received=true"
2116
+ log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
1700
2117
  else
2118
+ worker_poll_rc=$?
2119
+ if (( worker_poll_rc == 2 )); then
2120
+ return 1
2121
+ fi
1701
2122
  # Check if Worker is still actively running (not stuck)
1702
2123
  local worker_cmd
1703
2124
  worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1704
2125
  if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
1705
- # Check hard ceiling before extending
2126
+ # Process alive — extend indefinitely (no hard ceiling kill)
2127
+ # Stale-context breaker and nudge system handle truly stuck workers
1706
2128
  local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
2129
+ local ceiling_exceeded=""
1707
2130
  if (( iter_elapsed >= HARD_CEILING )); then
1708
- log_error "Worker hit hard ceiling (${HARD_CEILING}s = 3x iter_timeout). Killing iteration."
1709
- log_debug "[EXEC] iter=$ITERATION hard_ceiling_hit=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd"
1710
- tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1711
- sleep 1
1712
- WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
1713
- update_status "worker" "hard_timeout"
1714
- worker_poll_done=1
1715
- break
2131
+ ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s not enforced, logged only]"
2132
+ log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
2133
+ log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
1716
2134
  fi
1717
- log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s/${HARD_CEILING}s)"
1718
- log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s"
1719
- log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
2135
+ log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
2136
+ log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
2137
+ log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
1720
2138
  update_status "worker" "slow"
1721
2139
  # Loop continues — re-poll same iteration
1722
2140
  else
1723
2141
  # Worker is truly dead/stuck
1724
2142
  (( MONITOR_FAILURE_COUNT++ ))
1725
- log_debug "[EXEC] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
2143
+ log_debug "[GOV] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
1726
2144
  if (( MONITOR_FAILURE_COUNT >= 3 )); then
1727
- log_debug "[EXEC] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
2145
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
1728
2146
  write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
1729
2147
  update_status "blocked" "monitor_failures"
1730
2148
  return 1
1731
2149
  fi
1732
2150
  log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1733
2151
  update_status "worker" "poll_failed"
1734
- worker_poll_done=1 # exit poll loop, continue to next iteration
1735
- log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
1736
- # Worker is truly dead/stuck kill and replace pane (omc-teams pattern)
1737
- WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
2152
+ log_debug "[FLOW] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
2153
+ # Worker is truly dead/stuck — BLOCK and let user decide
2154
+ write_blocked_sentinel "Worker process dead/stuck (poll failed). Pane preserved for inspection."
2155
+ update_status "blocked" "worker_dead"
2156
+ return 1
1738
2157
  fi
1739
2158
  fi
1740
2159
  done
1741
2160
 
1742
2161
  if [[ ! -f "$SIGNAL_FILE" ]]; then
1743
- log_debug "[EXEC] iter=$ITERATION no_signal_after_poll=true continuing"
2162
+ log_debug "[FLOW] iter=$ITERATION no_signal_after_poll=true continuing"
1744
2163
  # No signal — monitor failure, go to next iteration
1745
2164
  continue
1746
2165
  fi
@@ -1748,6 +2167,11 @@ main() {
1748
2167
  # Reset monitor failure count on success
1749
2168
  MONITOR_FAILURE_COUNT=0
1750
2169
 
2170
+ # AC1: capture worker end timestamp; reset consensus timing
2171
+ ITER_WORKER_END=$(date +%s)
2172
+ ITER_VERIFIER_CLAUDE_DURATION_S=""
2173
+ ITER_VERIFIER_CODEX_DURATION_S=""
2174
+
1751
2175
  # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
1752
2176
  local signal_status
1753
2177
  signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
@@ -1759,7 +2183,7 @@ main() {
1759
2183
  # Read us_id early for EXEC logging (also used later in verify branch)
1760
2184
  local signal_us_id_early=""
1761
2185
  signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1762
- log_debug "[EXEC] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
2186
+ log_debug "[FLOW] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
1763
2187
 
1764
2188
  case "$signal_status" in
1765
2189
  continue)
@@ -1774,17 +2198,34 @@ main() {
1774
2198
  signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1775
2199
  log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
1776
2200
 
2201
+ # AC1: capture verifier start timestamp
2202
+ ITER_VERIFIER_START=$(date +%s)
2203
+
1777
2204
  update_status "verifier" "running"
1778
2205
 
1779
- # --- Consensus scope check ---
1780
- local use_consensus=0
1781
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1782
- case "$CONSENSUS_SCOPE" in
1783
- all) use_consensus=1 ;;
1784
- final-only) [[ "$signal_us_id" == "ALL" ]] && use_consensus=1 ;;
1785
- esac
2206
+ # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
2207
+ if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
2208
+ log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
2209
+ local seq_rc=0
2210
+ run_sequential_final_verify "$ITERATION" || seq_rc=$?
2211
+ if (( seq_rc == 0 )); then
2212
+ write_complete_sentinel "Sequential final verify passed (all US verified individually)"
2213
+ update_status "complete" "pass"
2214
+ write_campaign_jsonl "$ITERATION" "ALL" "pass"
2215
+ return 0
2216
+ else
2217
+ # Sequential verify failed — fall through to fix loop with failed US
2218
+ log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
2219
+ signal_us_id="${FAILED_US:-ALL}"
2220
+ # Synthesize a fail verdict for the fix loop
2221
+ echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
2222
+ fi
1786
2223
  fi
1787
2224
 
2225
+ # --- Consensus scope check (US-005: _should_use_consensus handles VERIFY_CONSENSUS + FINAL_CONSENSUS) ---
2226
+ local use_consensus=0
2227
+ _should_use_consensus "$signal_us_id" && use_consensus=1
2228
+
1788
2229
  # --- Consensus vs single verification ---
1789
2230
  if (( use_consensus )); then
1790
2231
  # US-004: Run consensus verification (claude + codex sequentially)
@@ -1806,78 +2247,65 @@ main() {
1806
2247
  write_verifier_trigger "$ITERATION"
1807
2248
  local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1808
2249
 
1809
- # Step 7a: Clean previous Verifier session if running
2250
+ # Step 7a: Clean previous Verifier session (with dead pane detection)
1810
2251
  local verifier_cmd
1811
2252
  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1812
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2253
+ if [[ -z "$verifier_cmd" ]]; then
2254
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
2255
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
2256
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
2257
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
2258
+ log " New verifier pane: $VERIFIER_PANE"
2259
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
2260
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
2261
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2262
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2263
+ sleep 0.2
2264
+ tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
2265
+ sleep 0.3
2266
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1813
2267
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1814
2268
  sleep 0.5
1815
2269
  tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1816
2270
  sleep 2
1817
- wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1818
2271
  fi
2272
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1819
2273
 
1820
2274
  local verifier_launch
1821
2275
  if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1822
2276
  verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1823
- log " Launching Verifier codex in pane $VERIFIER_PANE..."
1824
2277
  else
1825
2278
  verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1826
- log " Launching Verifier claude in pane $VERIFIER_PANE..."
1827
2279
  fi
1828
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1829
- tmux send-keys -t "$VERIFIER_PANE" Enter
1830
- log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
1831
-
1832
- # Step 7b: Wait for TUI to be ready
1833
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1834
- log_error "Verifier failed to start"
1835
- update_status "verifier" "start_failed"
1836
- continue
1837
- fi
1838
-
1839
- # Step 7c: Send instruction
1840
- sleep 3
1841
- local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1842
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1843
- tmux send-keys -t "$VERIFIER_PANE" Enter
1844
- log_debug "Verifier instruction sent directly"
2280
+ log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
1845
2281
 
1846
- # Verify verifier actually started working
1847
- local vs_submit=0
1848
- while (( vs_submit < 15 )); do
1849
- sleep 2
1850
- local vs_check
1851
- vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1852
- if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1853
- log_debug "Verifier started working after $((vs_submit + 1)) checks"
1854
- break
1855
- fi
1856
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1857
- if (( vs_submit == 8 )); then
1858
- log_debug "Adaptive instruction retry: clearing line and re-typing"
1859
- tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
1860
- sleep 0.1
1861
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1862
- tmux send-keys -t "$VERIFIER_PANE" Enter
2282
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2283
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
2284
+ else
2285
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
2286
+ update_status "verifier" "start_failed"
2287
+ continue
1863
2288
  fi
1864
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1865
- sleep 0.3
1866
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1867
- (( vs_submit++ ))
1868
- done
2289
+ fi
1869
2290
 
1870
2291
  # Poll for verify-verdict.json
1871
2292
  log " Polling for verify-verdict.json..."
1872
2293
  if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
2294
+ local verifier_poll_rc=$?
2295
+ if (( verifier_poll_rc == 2 )); then
2296
+ return 1
2297
+ fi
1873
2298
  log_error "Verifier poll failed"
1874
- update_status "verifier" "poll_failed"
1875
- # Verifier is dead/stuck kill and replace pane (omc-teams pattern)
1876
- VERIFIER_PANE=$(replace_worker_pane "$VERIFIER_PANE" "verifier")
1877
- continue
2299
+ # Verifier is dead/stuck — BLOCK and let user decide
2300
+ write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
2301
+ update_status "blocked" "verifier_dead"
2302
+ return 1
1878
2303
  fi
1879
2304
  fi
1880
2305
 
2306
+ # AC1: capture verifier end timestamp
2307
+ ITER_VERIFIER_END=$(date +%s)
2308
+
1881
2309
  # --- governance.md s7 step 7: Read verdict via jq ---
1882
2310
  local verdict
1883
2311
  verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
@@ -1889,12 +2317,24 @@ main() {
1889
2317
  log " Verifier: verdict=$verdict recommended=$recommended"
1890
2318
  log " Verifier summary: \"$verdict_summary\""
1891
2319
  local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
1892
- log_debug "[EXEC] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
2320
+ log_debug "[GOV] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
1893
2321
 
1894
2322
  case "$verdict" in
1895
2323
  pass)
1896
2324
  CONSECUTIVE_FAILURES=0
1897
2325
  CONSENSUS_ROUND=0
2326
+ _SAME_US_FAIL_COUNT=0
2327
+ _LAST_FAILED_US=""
2328
+ if (( _MODEL_UPGRADED )); then
2329
+ log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
2330
+ log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
2331
+ WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
2332
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2333
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
2334
+ WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
2335
+ fi
2336
+ _MODEL_UPGRADED=0
2337
+ fi
1898
2338
 
1899
2339
  # --- Per-US tracking ---
1900
2340
  if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
@@ -1905,13 +2345,14 @@ main() {
1905
2345
  VERIFIED_US="$signal_us_id"
1906
2346
  fi
1907
2347
  log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
1908
- log_debug "[EXEC] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
2348
+ log_debug "[FLOW] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
1909
2349
  update_status "verifier" "pass_us"
1910
2350
  # Worker will do next US on next iteration
1911
2351
  elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
1912
2352
  # Final full verify passed or complete recommended
1913
2353
  write_complete_sentinel "$verdict_summary"
1914
2354
  update_status "complete" "pass"
2355
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
1915
2356
  return 0
1916
2357
  else
1917
2358
  log " Verifier passed but did not recommend complete. Continuing."
@@ -1921,6 +2362,7 @@ main() {
1921
2362
  fail)
1922
2363
  # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
1923
2364
  (( CONSECUTIVE_FAILURES++ ))
2365
+ check_model_upgrade "${signal_us_id:-unknown}"
1924
2366
  local verdict_summary_fail
1925
2367
  verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
1926
2368
  log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
@@ -1940,13 +2382,21 @@ main() {
1940
2382
  jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
1941
2383
  } | atomic_write "$fix_contract"
1942
2384
  log " Fix contract: $fix_contract"
1943
- log_debug "[EXEC] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
1944
-
1945
- # Circuit breaker: consecutive failures
1946
- if (( CONSECUTIVE_FAILURES >= 3 )); then
1947
- log_debug "[EXEC] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"3 consecutive verification failures\""
1948
- log_error "Circuit breaker: 3 consecutive verification failures"
1949
- write_blocked_sentinel "3 consecutive verification failures"
2385
+ log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
2386
+
2387
+ # Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
2388
+ if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
2389
+ # For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
2390
+ _ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
2391
+ if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
2392
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
2393
+ log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
2394
+ write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2395
+ else
2396
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
2397
+ log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2398
+ write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2399
+ fi
1950
2400
  update_status "blocked" "consecutive_failures"
1951
2401
  return 1
1952
2402
  fi
@@ -1985,12 +2435,19 @@ main() {
1985
2435
  ;;
1986
2436
  esac
1987
2437
 
2438
+ # --- step 7d: Archive iteration artifacts before cleanup ---
2439
+ archive_iter_artifacts "$ITERATION"
2440
+
2441
+ # --- AC5: Write per-iteration cost estimate ---
2442
+ write_cost_log "$ITERATION"
2443
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
2444
+
1988
2445
  # --- governance.md s7 step 8: Write result log ---
1989
2446
  write_result_log "$ITERATION" "$signal_status"
1990
2447
 
1991
2448
  # --- governance.md s7 step 8: Circuit breaker - stale context check ---
1992
2449
  if ! check_stale_context; then
1993
- log_debug "[EXEC] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
2450
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
1994
2451
  write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
1995
2452
  update_status "blocked" "stale_context"
1996
2453
  return 1
@@ -2010,6 +2467,45 @@ main() {
2010
2467
  # Entry Point
2011
2468
  # =============================================================================
2012
2469
 
2470
+ # --- CLI: parse --worker-model / --verifier-model flags ---
2471
+ # These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
2472
+ # Format: "model:reasoning" → codex engine; "model-name" → claude engine
2473
+ _cli_i=1
2474
+ while (( _cli_i <= $# )); do
2475
+ case "${@[$_cli_i]}" in
2476
+ --worker-model)
2477
+ (( _cli_i++ ))
2478
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
2479
+ WORKER_ENGINE="${_cli_parsed%% *}"
2480
+ _cli_rest="${_cli_parsed#* }"
2481
+ WORKER_MODEL="${_cli_rest%% *}"
2482
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2483
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
2484
+ WORKER_CODEX_REASONING="${_cli_rest##* }"
2485
+ fi
2486
+ ;;
2487
+ --verifier-model)
2488
+ (( _cli_i++ ))
2489
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
2490
+ VERIFIER_ENGINE="${_cli_parsed%% *}"
2491
+ _cli_rest="${_cli_parsed#* }"
2492
+ VERIFIER_MODEL="${_cli_rest%% *}"
2493
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2494
+ VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
2495
+ VERIFIER_CODEX_REASONING="${_cli_rest##* }"
2496
+ fi
2497
+ ;;
2498
+ --lock-worker-model)
2499
+ LOCK_WORKER_MODEL=1
2500
+ ;;
2501
+ --final-consensus)
2502
+ FINAL_CONSENSUS=1
2503
+ ;;
2504
+ esac
2505
+ (( _cli_i++ ))
2506
+ done
2507
+ unset _cli_i _cli_parsed _cli_rest
2508
+
2013
2509
  # Require tmux — tmux mode only works inside an active tmux session
2014
2510
  if [[ -z "${TMUX:-}" ]]; then
2015
2511
  echo "ERROR: tmux mode requires running inside a tmux session."