@ai-dev-methodologies/rlp-desk 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,14 +69,18 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
69
69
  # --- Verify Mode ---
70
70
  VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
71
71
  VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
72
+ FINAL_CONSENSUS="${FINAL_CONSENSUS:-0}" # 0|1 — consensus for final ALL verify only (independent of VERIFY_CONSENSUS)
72
73
  CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
73
- CB_THRESHOLD="${CB_THRESHOLD:-3}" # consecutive failures before BLOCKED (default: 3)
74
+ CONSENSUS_FAIL_FAST="${CONSENSUS_FAIL_FAST:-0}" # 0|1 skip second verifier if first fails
75
+ CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
74
76
  # Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
75
77
  if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
76
78
  EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
77
79
  else
78
80
  EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
79
81
  fi
82
+ _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
83
+ _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
80
84
 
81
85
  # --- Derived Paths ---
82
86
  DESK="$ROOT/.claude/ralph-desk"
@@ -84,6 +88,14 @@ PROMPTS_DIR="$DESK/prompts"
84
88
  CONTEXT_DIR="$DESK/context"
85
89
  MEMOS_DIR="$DESK/memos"
86
90
  LOGS_DIR="$DESK/logs/$SLUG"
91
+ RUNTIME_DIR="$LOGS_DIR/runtime"
92
+ PRD_FILE="$DESK/plans/prd-$SLUG.md"
93
+ TEST_SPEC_FILE="$DESK/plans/test-spec-$SLUG.md"
94
+ # --- Analytics Directory (user-level, cross-project) ---
95
+ ANALYTICS_SLUG_HASH=$(echo -n "$ROOT" | md5 -q 2>/dev/null || md5sum <<< "$ROOT" | cut -d' ' -f1)
96
+ ANALYTICS_DIR="$HOME/.claude/ralph-desk/analytics/${SLUG}--${ANALYTICS_SLUG_HASH:0:8}"
97
+ CAMPAIGN_JSONL="$ANALYTICS_DIR/campaign.jsonl"
98
+ METADATA_FILE="$ANALYTICS_DIR/metadata.json"
87
99
  WORKER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.worker.prompt.md"
88
100
  VERIFIER_PROMPT_BASE="$PROMPTS_DIR/${SLUG}.verifier.prompt.md"
89
101
  CONTEXT_FILE="$CONTEXT_DIR/${SLUG}-latest.md"
@@ -93,10 +105,11 @@ DONE_CLAIM_FILE="$MEMOS_DIR/${SLUG}-done-claim.json"
93
105
  VERDICT_FILE="$MEMOS_DIR/${SLUG}-verify-verdict.json"
94
106
  COMPLETE_SENTINEL="$MEMOS_DIR/${SLUG}-complete.md"
95
107
  BLOCKED_SENTINEL="$MEMOS_DIR/${SLUG}-blocked.md"
96
- STATUS_FILE="$LOGS_DIR/status.json"
97
- SESSION_CONFIG="$LOGS_DIR/session-config.json"
98
- WORKER_HEARTBEAT="$LOGS_DIR/worker-heartbeat.json"
99
- VERIFIER_HEARTBEAT="$LOGS_DIR/verifier-heartbeat.json"
108
+ LOCKFILE_PATH="$DESK/logs/.rlp-desk-${SLUG}.lock"
109
+ STATUS_FILE="$RUNTIME_DIR/status.json"
110
+ SESSION_CONFIG="$RUNTIME_DIR/session-config.json"
111
+ WORKER_HEARTBEAT="$RUNTIME_DIR/worker-heartbeat.json"
112
+ VERIFIER_HEARTBEAT="$RUNTIME_DIR/verifier-heartbeat.json"
100
113
  COST_LOG="$LOGS_DIR/cost-log.jsonl"
101
114
 
102
115
  # --- Session Naming ---
@@ -112,43 +125,265 @@ HEARTBEAT_STALE_COUNT=0
112
125
  MONITOR_FAILURE_COUNT=0
113
126
  CONSECUTIVE_FAILURES=0
114
127
  PREV_CONTEXT_HASH=""
128
+ PREV_PRD_HASH=""
129
+ PREV_PRD_US_LIST=""
130
+ _PRD_CHANGED=0
115
131
  ITERATION=0
116
132
  START_TIME=$(date +%s)
117
133
  BASELINE_COMMIT="" # git HEAD at campaign start (captured before loop)
118
134
  CAMPAIGN_REPORT_GENERATED=0 # guard against double-generation in cleanup trap
135
+ SV_REPORT_GENERATED=0 # guard against double-generation in generate_sv_report
119
136
  VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
120
137
  CONSENSUS_ROUND=0 # current consensus round for current US
121
138
  US_LIST="" # comma-separated US IDs from PRD (per-us mode)
139
+ LOCKFILE_ACQUIRED=0
140
+ LOCK_WORKER_MODEL="${LOCK_WORKER_MODEL:-0}" # 0|1 — set by --lock-worker-model; disables progressive upgrade
141
+ _SAME_US_FAIL_COUNT=0 # consecutive same-US fail counter (upgrade trigger at >= 2)
142
+ _LAST_FAILED_US="" # last failed US ID (same-US tracking for upgrade logic)
143
+ _MODEL_UPGRADED=0 # 1 if Worker model was auto-upgraded during campaign
144
+ _ORIGINAL_WORKER_MODEL="" # WORKER_MODEL saved before first upgrade (for restore on pass)
145
+ _ORIGINAL_WORKER_CODEX_REASONING="" # WORKER_CODEX_REASONING saved before first upgrade
122
146
 
123
147
  # =============================================================================
124
148
  # Utility Functions
125
149
  # =============================================================================
126
150
 
127
151
  DEBUG="${DEBUG:-0}"
128
- DEBUG_LOG="$ROOT/.claude/ralph-desk/logs/${LOOP_NAME:-unknown}/debug.log"
152
+ DEBUG_LOG="$ANALYTICS_DIR/debug.log"
129
153
 
130
- log() {
131
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
154
+ # Source shared business logic
155
+ LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
156
+ source "$LIB_DIR/lib_ralph_desk.zsh"
157
+
158
+ # A16: Warn if running in foreground (may conflict with Claude Code pane)
159
+ if [[ -z "${RLP_BACKGROUND:-}" ]]; then
160
+ echo "⚠ WARNING: Running in foreground. This may conflict with Claude Code's pane." >&2
161
+ echo " Recommended: launch via Bash tool with run_in_background: true" >&2
162
+ echo " Set RLP_BACKGROUND=1 to suppress this warning." >&2
163
+ fi
164
+
165
+ # check_dead_pane() — determine if pane command indicates a dead/exited process
166
+ # Engine-aware: bash is normal for codex workers (trigger runs in bash),
167
+ # but indicates dead pane for claude workers.
168
+ # Args: $1=pane_current_command $2=engine (claude|codex) $3=role (worker|verifier)
169
+ # Returns: 0 if dead, 1 if alive
170
+ check_dead_pane() {
171
+ local poll_cmd="$1"
172
+ local engine="${2:-claude}"
173
+ local role="${3:-worker}"
174
+
175
+ if [[ -z "$poll_cmd" ]]; then
176
+ return 0 # empty = dead
177
+ elif [[ "$poll_cmd" == "zsh" ]]; then
178
+ return 0 # bare zsh = dead
179
+ elif [[ "$poll_cmd" == "bash" && "$engine" != "codex" ]]; then
180
+ return 0 # bash = dead for claude (codex uses bash trigger)
181
+ fi
182
+ return 1 # alive
132
183
  }
133
184
 
134
- log_debug() {
135
- if (( DEBUG )); then
136
- mkdir -p "$(dirname "$DEBUG_LOG")" 2>/dev/null
137
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >> "$DEBUG_LOG"
185
+ # launch_worker_codex() — launch codex Worker via trigger script (non-interactive exec)
186
+ # Args: $1=pane_id $2=trigger_file $3=iteration
187
+ # Returns: 0 always (codex failures detected by poll_for_signal)
188
+ launch_worker_codex() {
189
+ local pane_id="$1"
190
+ local trigger_file="$2"
191
+ local iter="$3"
192
+
193
+ log " Launching Worker codex via trigger script in pane $pane_id..."
194
+ paste_to_pane "$pane_id" "bash $trigger_file"
195
+ tmux send-keys -t "$pane_id" C-m
196
+ log_debug "Worker codex trigger sent: $trigger_file"
197
+ sleep 3 # brief wait for codex to start
198
+ return 0
199
+ }
200
+
201
+ # launch_worker_claude() — launch claude Worker TUI, send instruction, verify submission
202
+ # Handles: TUI startup, wait_for_pane_ready, instruction send, 15-iteration submit loop,
203
+ # restart recovery on submit failure.
204
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=worker_launch_cmd
205
+ # Returns: 0 on success, 1 on fatal failure (caller writes BLOCKED)
206
+ launch_worker_claude() {
207
+ local pane_id="$1"
208
+ local prompt_file="$2"
209
+ local iter="$3"
210
+ local worker_launch="$4"
211
+
212
+ log " Launching Worker claude in pane $pane_id..."
213
+ paste_to_pane "$pane_id" "$worker_launch"
214
+ tmux send-keys -t "$pane_id" C-m
215
+
216
+ # Wait for claude TUI to be ready
217
+ if ! wait_for_pane_ready "$pane_id" 30; then
218
+ log_error "Worker claude failed to start"
219
+ return 1
220
+ fi
221
+
222
+ # Send instruction to claude TUI
223
+ sleep 3
224
+ local worker_instruction="Read and execute the instructions in $prompt_file"
225
+ paste_to_pane "$pane_id" "$worker_instruction"
226
+ tmux send-keys -t "$pane_id" C-m
227
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
228
+
229
+ # 15-iteration submit loop — verify claude started working
230
+ local submit_attempts=0
231
+ while (( submit_attempts < 15 )); do
232
+ sleep 2
233
+ local pane_check
234
+ pane_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
235
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored\|Prestidigitating\|Undulating\|Reading\|Bash\|Edit\|Write\|Grep\|Glob" 2>/dev/null; then
236
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
237
+ log_debug "[FLOW] iter=$iter worker_submit_check=OK attempts=$((submit_attempts + 1))"
238
+ break
239
+ fi
240
+ # Every 3 failed attempts, re-send full instruction
241
+ if (( submit_attempts > 0 && submit_attempts % 3 == 0 )); then
242
+ log_debug "Re-sending full worker instruction (attempt $submit_attempts)"
243
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
244
+ sleep 0.2
245
+ paste_to_pane "$pane_id" "$worker_instruction"
246
+ sleep 0.15
247
+ tmux send-keys -t "$pane_id" C-m
248
+ sleep 1
249
+ fi
250
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
251
+ sleep 0.3
252
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
253
+ (( submit_attempts++ ))
254
+ done
255
+
256
+ # If 15 attempts failed, restart claude and retry
257
+ if (( submit_attempts >= 15 )); then
258
+ log " WARNING: Worker instruction not consumed after 15 attempts — restarting claude"
259
+ log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
260
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null
261
+ sleep 0.5
262
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
263
+ sleep 2
264
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
265
+ paste_to_pane "$pane_id" "$worker_launch"
266
+ tmux send-keys -t "$pane_id" C-m
267
+ if wait_for_pane_ready "$pane_id" 30; then
268
+ sleep 3
269
+ paste_to_pane "$pane_id" "$worker_instruction"
270
+ tmux send-keys -t "$pane_id" C-m
271
+ log " Worker restarted and instruction re-sent"
272
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
273
+ else
274
+ log_error "Worker restart failed — pane not ready"
275
+ log_debug "[FLOW] iter=$iter worker_restart_recovery=failed"
276
+ fi
138
277
  fi
278
+
279
+ return 0
139
280
  }
140
281
 
141
- log_error() {
142
- echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
282
+ # launch_verifier_codex() — launch codex Verifier in pane (non-interactive)
283
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
284
+ # Returns: 0 always
285
+ launch_verifier_codex() {
286
+ local pane_id="$1"
287
+ local prompt_file="$2"
288
+ local iter="$3"
289
+ local verifier_launch="$4"
290
+
291
+ log " Launching Verifier codex in pane $pane_id..."
292
+ paste_to_pane "$pane_id" "$verifier_launch"
293
+ tmux send-keys -t "$pane_id" C-m
294
+ sleep 3
295
+ return 0
143
296
  }
144
297
 
145
- # --- governance.md s7: Atomic file writes (tmux pattern) ---
146
- # All file writes by the Leader use tmp+mv to prevent corruption.
147
- atomic_write() {
148
- local target="$1"
149
- local tmp="${target}.tmp.$$"
150
- cat > "$tmp"
151
- mv "$tmp" "$target"
298
+ # launch_verifier_claude() launch claude Verifier TUI, send instruction, verify submission
299
+ # Args: $1=pane_id $2=prompt_file $3=iteration $4=launch_cmd
300
+ # Returns: 0 on success
301
+ launch_verifier_claude() {
302
+ local pane_id="$1"
303
+ local prompt_file="$2"
304
+ local iter="$3"
305
+ local verifier_launch="$4"
306
+
307
+ log " Launching Verifier claude in pane $pane_id..."
308
+ paste_to_pane "$pane_id" "$verifier_launch"
309
+ tmux send-keys -t "$pane_id" C-m
310
+
311
+ if ! wait_for_pane_ready "$pane_id" 30; then
312
+ log_error "Verifier failed to start"
313
+ return 1
314
+ fi
315
+
316
+ sleep 3
317
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
318
+ paste_to_pane "$pane_id" "$verifier_instruction"
319
+ tmux send-keys -t "$pane_id" C-m
320
+ log_debug "Verifier instruction sent directly"
321
+
322
+ # Submit loop — verify verifier started working
323
+ local submit_attempts=0
324
+ while (( submit_attempts < 15 )); do
325
+ sleep 2
326
+ local vs_check
327
+ vs_check=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
328
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
329
+ log_debug "Verifier started working after $((submit_attempts + 1)) checks"
330
+ break
331
+ fi
332
+ if (( submit_attempts == 8 )); then
333
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
334
+ tmux send-keys -t "$pane_id" C-u 2>/dev/null
335
+ sleep 0.1
336
+ paste_to_pane "$pane_id" "$verifier_instruction"
337
+ tmux send-keys -t "$pane_id" C-m
338
+ fi
339
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
340
+ sleep 0.3
341
+ tmux send-keys -t "$pane_id" C-m 2>/dev/null
342
+ (( submit_attempts++ ))
343
+ done
344
+ return 0
345
+ }
346
+
347
+ # handle_worker_exit_codex() — handle codex worker process exit (1-shot exec)
348
+ # On exit: check done-claim, auto-generate iter-signal.
349
+ # Args: $1=iteration $2=signal_file
350
+ # Returns: 0 (signal generated), 1 (error)
351
+ handle_worker_exit_codex() {
352
+ local iter="$1"
353
+ local signal_file="$2"
354
+
355
+ log " Codex worker process exited. Checking for done-claim..."
356
+ if [[ -f "$DONE_CLAIM_FILE" ]]; then
357
+ local dc_us_id
358
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
359
+ log " Codex worker completed with done-claim (us_id=$dc_us_id). Auto-generating signal."
360
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exec exit","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
361
+ else
362
+ log " WARNING: Codex worker exited without done-claim. Generating verify signal for current US."
363
+ local current_us
364
+ current_us=$(jq -r '.us_id // "US-001"' "$DESK/memos/${SLUG}-iter-signal.json" 2>/dev/null || echo "US-001")
365
+ local mem_us
366
+ mem_us=$(sed -n 's/.*Next.*US-\([0-9]*\).*/US-\1/p' "$DESK/memos/${SLUG}-memory.md" 2>/dev/null | head -1)
367
+ [[ -n "$mem_us" ]] && current_us="$mem_us"
368
+ echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$current_us"'","summary":"auto-generated after codex exec exit (no done-claim)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
369
+ fi
370
+ return 0
371
+ }
372
+
373
+ # handle_worker_exit_claude() — handle claude worker process exit (restart with backoff)
374
+ # Args: $1=pane_id $2=iteration $3=trigger_file
375
+ # Returns: 0 (restarted), 1 (max restarts exceeded)
376
+ handle_worker_exit_claude() {
377
+ local pane_id="$1"
378
+ local iter="$2"
379
+ local trigger_file="$3"
380
+
381
+ log_error "Worker exited without writing signal file"
382
+ if restart_worker "$pane_id" "$iter" "$trigger_file"; then
383
+ return 0
384
+ else
385
+ return 1
386
+ fi
152
387
  }
153
388
 
154
389
  # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
@@ -205,9 +440,13 @@ check_dependencies() {
205
440
  missing=1
206
441
  fi
207
442
 
208
- if ! command -v claude >/dev/null 2>&1; then
209
- log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
210
- missing=1
443
+ # claude required only when claude engine is used for Worker or Verifier execution;
444
+ # codex-only campaigns can run without claude generate_sv_report degrades gracefully
445
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
446
+ if ! command -v claude >/dev/null 2>&1; then
447
+ log_error "claude CLI is required but not found. See: https://docs.anthropic.com/en/docs/claude-cli"
448
+ missing=1
449
+ fi
211
450
  fi
212
451
 
213
452
  if ! command -v jq >/dev/null 2>&1; then
@@ -216,14 +455,9 @@ check_dependencies() {
216
455
  fi
217
456
 
218
457
  # Codex binary required only when engine=codex or consensus verification is enabled
219
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
458
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
220
459
  if ! command -v codex >/dev/null 2>&1; then
221
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
222
- log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
223
- else
224
- log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
225
- fi
226
- log_error "Install with: npm install -g @openai/codex"
460
+ log_error "codex CLI not found. Install: npm install -g @openai/codex"
227
461
  missing=1
228
462
  fi
229
463
  fi
@@ -232,52 +466,19 @@ check_dependencies() {
232
466
  exit 1
233
467
  fi
234
468
 
235
- # Resolve full path to claude binary for reliable launches
236
- CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
237
- log " Claude binary: $CLAUDE_BIN"
469
+ # Resolve full path to claude binary when claude engine is in use
470
+ if [[ "$WORKER_ENGINE" != "codex" || "$VERIFIER_ENGINE" != "codex" ]]; then
471
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
472
+ log " Claude binary: $CLAUDE_BIN"
473
+ fi
238
474
 
239
475
  # Resolve codex binary if needed
240
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
476
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
241
477
  CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
242
478
  log " Codex binary: $CODEX_BIN"
243
479
  fi
244
480
  }
245
481
 
246
- # =============================================================================
247
- # Scaffold Validation
248
- # =============================================================================
249
-
250
- validate_scaffold() {
251
- local errors=0
252
-
253
- if [[ ! -f "$WORKER_PROMPT_BASE" ]]; then
254
- log_error "Worker prompt not found: $WORKER_PROMPT_BASE"
255
- errors=1
256
- fi
257
-
258
- if [[ ! -f "$VERIFIER_PROMPT_BASE" ]]; then
259
- log_error "Verifier prompt not found: $VERIFIER_PROMPT_BASE"
260
- errors=1
261
- fi
262
-
263
- if [[ ! -f "$CONTEXT_FILE" ]]; then
264
- log_error "Context file not found: $CONTEXT_FILE"
265
- errors=1
266
- fi
267
-
268
- if [[ ! -f "$MEMORY_FILE" ]]; then
269
- log_error "Memory file not found: $MEMORY_FILE"
270
- errors=1
271
- fi
272
-
273
- if (( errors )); then
274
- log_error "Scaffold validation failed. Run init_ralph_desk.zsh first."
275
- exit 1
276
- fi
277
-
278
- mkdir -p "$LOGS_DIR"
279
- }
280
-
281
482
  # =============================================================================
282
483
  # Session Management (tmux pattern: pane IDs)
283
484
  # =============================================================================
@@ -423,6 +624,17 @@ check_copy_mode() {
423
624
  # Verification-Based Send Retry (tmux pattern)
424
625
  # =============================================================================
425
626
 
627
+ # --- Reliable text paste via tmux buffer (avoids send-keys -l char-by-char issues) ---
628
+ paste_to_pane() {
629
+ local pane_id="$1"
630
+ local text="$2"
631
+ local tmpbuf="/tmp/.rlp-desk-paste-$$.tmp"
632
+ echo -n "$text" > "$tmpbuf"
633
+ tmux load-buffer -b rlp-paste "$tmpbuf" 2>/dev/null
634
+ tmux paste-buffer -b rlp-paste -d -t "$pane_id" 2>/dev/null
635
+ rm -f "$tmpbuf"
636
+ }
637
+
426
638
  # --- governance.md s7 step 5: Send with copy-mode guard and retry ---
427
639
  safe_send_keys() {
428
640
  local pane_id="$1"
@@ -451,18 +663,18 @@ safe_send_keys() {
451
663
  # Auto-approve permission prompts ("Do you want to create/overwrite X?")
452
664
  if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
453
665
  log_debug " Permission prompt detected, auto-approving"
454
- tmux send-keys -t "$pane_id" Enter
666
+ tmux send-keys -t "$pane_id" C-m
455
667
  sleep 0.3
456
668
  fi
457
669
  # Auto-dismiss codex update prompt (select Skip)
458
670
  if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
459
671
  log_debug " Codex update prompt detected, selecting Skip"
460
- tmux send-keys -t "$pane_id" "2" Enter
672
+ tmux send-keys -t "$pane_id" "2" C-m
461
673
  sleep 0.2
462
674
  fi
463
- # Send text in literal mode with -- separator
464
- log_debug " Sending text to pane $pane_id (${#text} chars)"
465
- tmux send-keys -t "$pane_id" -l -- "$text"
675
+ # Send text via buffer paste (reliable for long strings)
676
+ log_debug " Pasting text to pane $pane_id (${#text} chars)"
677
+ paste_to_pane "$pane_id" "$text"
466
678
 
467
679
  # Allow input buffer to settle (tmux: 150ms)
468
680
  sleep 0.15
@@ -472,9 +684,7 @@ safe_send_keys() {
472
684
  while (( round < 6 )); do
473
685
  sleep 0.1
474
686
  if (( round == 0 && pane_busy )); then
475
- # Busy pane: Tab+C-m queue semantics (tmux pattern)
476
- tmux send-keys -t "$pane_id" Tab
477
- sleep 0.08
687
+ # Busy pane: just C-m (DO NOT send Tab — it toggles Claude Code permission mode)
478
688
  tmux send-keys -t "$pane_id" C-m
479
689
  else
480
690
  tmux send-keys -t "$pane_id" C-m
@@ -507,7 +717,7 @@ safe_send_keys() {
507
717
  if ! check_copy_mode "$pane_id"; then
508
718
  return 1
509
719
  fi
510
- tmux send-keys -t "$pane_id" -l -- "$text"
720
+ paste_to_pane "$pane_id" "$text"
511
721
  sleep 0.12
512
722
  local retry_round=0
513
723
  while (( retry_round < 4 )); do
@@ -551,9 +761,9 @@ wait_for_pane_ready() {
551
761
  # Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
552
762
  if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
553
763
  log " Trust prompt detected, auto-dismissing..."
554
- tmux send-keys -t "$pane_id" Enter
764
+ tmux send-keys -t "$pane_id" C-m
555
765
  sleep 0.12
556
- tmux send-keys -t "$pane_id" Enter
766
+ tmux send-keys -t "$pane_id" C-m
557
767
  sleep 2
558
768
  continue
559
769
  fi
@@ -561,7 +771,7 @@ wait_for_pane_ready() {
561
771
  # Auto-approve permission prompts ("Do you want to create/overwrite X?")
562
772
  if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
563
773
  log " Permission prompt detected, auto-approving..."
564
- tmux send-keys -t "$pane_id" Enter
774
+ tmux send-keys -t "$pane_id" C-m
565
775
  sleep 0.5
566
776
  continue
567
777
  fi
@@ -569,7 +779,7 @@ wait_for_pane_ready() {
569
779
  # Auto-dismiss codex update prompt (select Skip = option 2)
570
780
  if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
571
781
  log " Codex update prompt detected, selecting Skip..."
572
- tmux send-keys -t "$pane_id" "2" Enter
782
+ tmux send-keys -t "$pane_id" "2" C-m
573
783
  sleep 0.5
574
784
  continue
575
785
  fi
@@ -655,12 +865,19 @@ check_and_nudge_idle_pane() {
655
865
  local now
656
866
  now=$(date +%s)
657
867
  if (( now - idle_since > IDLE_NUDGE_THRESHOLD )); then
658
- local count=${(P)nudge_count_var}
659
- if (( count < MAX_NUDGES )); then
660
- log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
661
- safe_send_keys "$pane_id" ""
662
- (( count++ ))
663
- eval "$nudge_count_var=$count"
868
+ # A12 fix: NEVER nudge if pane is busy (thinking/working) — nudge interrupts claude
869
+ local _nudge_capture
870
+ _nudge_capture=$(tmux capture-pane -t "$pane_id" -p -S -5 2>/dev/null)
871
+ if echo "$_nudge_capture" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|razzle\|bunning\|zesting\|fermenting\|actualizing\|composing\|evaporating\|churning" 2>/dev/null; then
872
+ log_debug " Pane $pane_id appears busy (thinking/working), skipping nudge"
873
+ else
874
+ local count=${(P)nudge_count_var}
875
+ if (( count < MAX_NUDGES )); then
876
+ log " Nudging idle pane $pane_id (nudge $((count + 1))/$MAX_NUDGES)"
877
+ safe_send_keys "$pane_id" ""
878
+ (( count++ ))
879
+ eval "$nudge_count_var=$count"
880
+ fi
664
881
  fi
665
882
  fi
666
883
  else
@@ -678,6 +895,13 @@ restart_worker() {
678
895
  local pane_id="$1"
679
896
  local iter="$2"
680
897
  local trigger_file="$3"
898
+
899
+ # Codex workers are 1-shot exec; restart is not applicable
900
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
901
+ log_debug "restart_worker called for codex engine — no-op (1-shot exec)"
902
+ return 1
903
+ fi
904
+
681
905
  local restart_count="${WORKER_RESTARTS[$iter]:-0}"
682
906
 
683
907
  if (( restart_count >= MAX_RESTARTS )); then
@@ -693,7 +917,7 @@ restart_worker() {
693
917
 
694
918
  # Kill existing claude, wait for shell prompt
695
919
  tmux send-keys -t "$pane_id" C-c 2>/dev/null
696
- tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
920
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
697
921
  sleep 2
698
922
 
699
923
  # Re-launch worker (tmux interactive pattern)
@@ -710,6 +934,25 @@ restart_worker() {
710
934
  # Write-Then-Notify: Trigger Script Generation (tmux CRITICAL pattern)
711
935
  # =============================================================================
712
936
 
937
+ # Per-US PRD injection helper
938
+ # Substitutes the full PRD path with a per-US split path in the Worker prompt base.
939
+ # Falls back to the full PRD with a stderr warning if the split file is missing.
940
+ # Args: $1=prompt_base_file $2=full_prd_path $3=per_us_prd_path (empty = no substitution)
941
+ inject_per_us_prd() {
942
+ local prompt_base="$1"
943
+ local full_prd="$2"
944
+ local per_us_prd="${3:-}"
945
+
946
+ if [[ -n "$per_us_prd" && -f "$per_us_prd" ]]; then
947
+ sed "s|$full_prd|$per_us_prd|g" "$prompt_base"
948
+ else
949
+ if [[ -n "$per_us_prd" ]]; then
950
+ echo "WARNING: per-US split file not found: $per_us_prd — falling back to full PRD injection" >&2
951
+ fi
952
+ cat "$prompt_base"
953
+ fi
954
+ }
955
+
713
956
  # --- governance.md s7 step 4+5: Write prompt and trigger to files ---
714
957
  # NEVER send prompt content through tmux send-keys.
715
958
  # Write payloads to files, send only short trigger commands (<200 chars).
@@ -727,14 +970,31 @@ write_worker_trigger() {
727
970
  local prev_iter=$((iter - 1))
728
971
  local fix_contract_file="$LOGS_DIR/iter-$(printf '%03d' $prev_iter).fix-contract.md"
729
972
 
973
+ # Compute next unverified US before prompt assembly (required for per-US PRD injection)
974
+ local next_us=""
975
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
976
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
977
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
978
+ next_us="$us"
979
+ break
980
+ fi
981
+ done
982
+ fi
983
+
730
984
  {
731
- cat "$WORKER_PROMPT_BASE"
985
+ # Per-US PRD injection: substitute full PRD path with per-US split path when available
986
+ local per_us_prd=""
987
+ [[ -n "$next_us" ]] && per_us_prd="$DESK/plans/prd-${SLUG}-${next_us}.md"
988
+ inject_per_us_prd "$WORKER_PROMPT_BASE" "$DESK/plans/prd-${SLUG}.md" "$per_us_prd"
732
989
  echo ""
733
990
  echo "---"
734
991
  echo "## Iteration Context"
735
992
  echo "- **Iteration**: $iter"
736
993
  echo "- **Memory Stop Status**: $(sed -n '/^## Stop Status$/,/^$/{ /^## /d; /^$/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1)"
737
994
  echo "- **Next Iteration Contract**: ${contract:-Start from the beginning}"
995
+ if (( _PRD_CHANGED )); then
996
+ echo "NOTE: PRD was updated since last iteration. New/changed US may exist."
997
+ fi
738
998
 
739
999
  # Include fix contract if previous verifier failed
740
1000
  if [[ -f "$fix_contract_file" ]]; then
@@ -749,15 +1009,6 @@ write_worker_trigger() {
749
1009
 
750
1010
  # Per-US mode: tell Worker exactly which US to work on
751
1011
  if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
752
- # Find next unverified US
753
- local next_us=""
754
- for us in $(echo "$US_LIST" | tr ',' ' '); do
755
- if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
756
- next_us="$us"
757
- break
758
- fi
759
- done
760
-
761
1012
  if [[ -n "$next_us" ]]; then
762
1013
  echo ""
763
1014
  echo "---"
@@ -766,6 +1017,13 @@ write_worker_trigger() {
766
1017
  echo "The Leader has determined that **${next_us}** is the next unverified story."
767
1018
  echo "You MUST implement ONLY **${next_us}** in this iteration."
768
1019
  echo "Do NOT implement any other user stories."
1020
+ # Per-US test-spec injection: point Worker to scoped test-spec if available
1021
+ local per_us_test_spec="$DESK/plans/test-spec-${SLUG}-${next_us}.md"
1022
+ if [[ -f "$per_us_test_spec" ]]; then
1023
+ echo "- **Test Spec**: Read ONLY \`$per_us_test_spec\` (scoped to ${next_us})"
1024
+ else
1025
+ echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1026
+ fi
769
1027
  echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
770
1028
  echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
771
1029
  echo ""
@@ -793,12 +1051,12 @@ write_worker_trigger() {
793
1051
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
794
1052
  # Engine-specific launch command (expanded at write time)
795
1053
  if [[ "$WORKER_ENGINE" = "codex" ]]; then
796
- local engine_cmd="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL \\
1054
+ local engine_cmd="${CODEX_BIN:-codex} exec \\
1055
+ -m $WORKER_CODEX_MODEL \\
797
1056
  -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
798
1057
  --dangerously-bypass-approvals-and-sandbox \\
799
- \"\$(cat $prompt_file)\" \\
800
- 2>&1 | tee $output_log"
801
- local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
1058
+ \"\$(cat $prompt_file)\""
1059
+ local engine_comment="# Run codex exec with fresh context (no pipe — codex requires terminal)"
802
1060
  else
803
1061
  local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
804
1062
  --model $WORKER_MODEL \\
@@ -929,282 +1187,6 @@ TRIGGER_EOF
929
1187
  log " Verifier trigger: $trigger_file"
930
1188
  }
931
1189
 
932
- # =============================================================================
933
- # Status Updates
934
- # =============================================================================
935
-
936
- # --- governance.md s7 step 8: Update status.json ---
937
- update_status() {
938
- local phase="$1"
939
- local last_result="$2"
940
-
941
- # Build verified_us as JSON array
942
- local verified_us_json="[]"
943
- if [[ -n "$VERIFIED_US" ]]; then
944
- verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
945
- fi
946
-
947
- # Build consensus fields
948
- local consensus_json=""
949
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
950
- consensus_json=',
951
- "consensus_scope": "'"$CONSENSUS_SCOPE"'",
952
- "consensus_round": '"$CONSENSUS_ROUND"',
953
- "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
954
- "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
955
- fi
956
-
957
- echo '{
958
- "slug": "'"$SLUG"'",
959
- "baseline_commit": "'"${BASELINE_COMMIT:-none}"'",
960
- "iteration": '"$ITERATION"',
961
- "max_iter": '"$MAX_ITER"',
962
- "phase": "'"$phase"'",
963
- "worker_model": "'"$WORKER_MODEL"'",
964
- "verifier_model": "'"$VERIFIER_MODEL"'",
965
- "worker_engine": "'"$WORKER_ENGINE"'",
966
- "verifier_engine": "'"$VERIFIER_ENGINE"'",
967
- "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
968
- "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
969
- "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
970
- "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
971
- "verify_mode": "'"$VERIFY_MODE"'",
972
- "verify_consensus": '"$VERIFY_CONSENSUS"',
973
- "last_result": "'"$last_result"'",
974
- "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
975
- "verified_us": '"$verified_us_json"''"$consensus_json"',
976
- "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
977
- }' | atomic_write "$STATUS_FILE"
978
- }
979
-
980
- # --- governance.md s7 step 8: Write result log ---
981
- write_result_log() {
982
- local iter="$1"
983
- local result="$2"
984
- local result_file="$LOGS_DIR/iter-$(printf '%03d' $iter).result.md"
985
-
986
- local git_diff=""
987
- if git -C "$ROOT" rev-parse HEAD &>/dev/null; then
988
- git_diff=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(no git diff available)")
989
- else
990
- git_diff="(no commits in repo — cannot diff)"
991
- fi
992
- # Include untracked new files in result log
993
- local result_untracked
994
- result_untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
995
- if [[ -n "$result_untracked" ]]; then
996
- git_diff="${git_diff}
997
-
998
- Untracked new files:
999
- ${result_untracked}"
1000
- fi
1001
-
1002
- {
1003
- echo "# Iteration $iter Result"
1004
- echo ""
1005
- echo "## Status"
1006
- echo "$result [leader-measured]"
1007
- echo ""
1008
- echo "## Files Changed"
1009
- echo '```'
1010
- echo "$git_diff"
1011
- echo '```'
1012
- echo "[git-measured]"
1013
- echo ""
1014
- echo "## Timestamp"
1015
- echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
1016
- } | atomic_write "$result_file"
1017
- }
1018
-
1019
- # --- step 7d: Archive iteration artifacts (done-claim + verdict) to logs/ ---
1020
- archive_iter_artifacts() {
1021
- local iter="$1"
1022
- local iter_padded
1023
- iter_padded=$(printf '%03d' "$iter")
1024
- if [[ -f "$DONE_CLAIM_FILE" ]]; then
1025
- cp "$DONE_CLAIM_FILE" "$LOGS_DIR/iter-${iter_padded}-done-claim.json" 2>/dev/null
1026
- fi
1027
- if [[ -f "$VERDICT_FILE" ]]; then
1028
- cp "$VERDICT_FILE" "$LOGS_DIR/iter-${iter_padded}-verify-verdict.json" 2>/dev/null
1029
- fi
1030
- }
1031
-
1032
- # --- AC5: Write per-iteration cost estimate to cost-log.jsonl ---
1033
- write_cost_log() {
1034
- local iter="$1"
1035
- local iter_padded
1036
- iter_padded=$(printf '%03d' "$iter")
1037
-
1038
- local prompt_bytes=0 claim_bytes=0 verdict_bytes=0
1039
- local worker_prompt_file="$LOGS_DIR/iter-${iter_padded}.worker-prompt.md"
1040
- [[ -f "$worker_prompt_file" ]] && prompt_bytes=$(wc -c < "$worker_prompt_file" 2>/dev/null || echo 0)
1041
- [[ -f "$DONE_CLAIM_FILE" ]] && claim_bytes=$(wc -c < "$DONE_CLAIM_FILE" 2>/dev/null || echo 0)
1042
- [[ -f "$VERDICT_FILE" ]] && verdict_bytes=$(wc -c < "$VERDICT_FILE" 2>/dev/null || echo 0)
1043
-
1044
- local estimated_tokens=$(( (prompt_bytes + claim_bytes + verdict_bytes) / 4 ))
1045
-
1046
- echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
1047
- }
1048
-
1049
- # --- AC4: Generate campaign-report.md on all terminal states ---
1050
- generate_campaign_report() {
1051
- # Guard: idempotent — only generate once per campaign run
1052
- if (( CAMPAIGN_REPORT_GENERATED )); then return 0; fi
1053
- CAMPAIGN_REPORT_GENERATED=1
1054
-
1055
- local final_status="UNKNOWN"
1056
- if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
1057
- elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
1058
- else final_status="TIMEOUT"; fi
1059
-
1060
- local report_file="$LOGS_DIR/campaign-report.md"
1061
-
1062
- # AC9: Version existing report before writing new one
1063
- if [[ -f "$report_file" ]]; then
1064
- local v=1
1065
- while [[ -f "${report_file%.md}-v${v}.md" ]]; do (( v++ )); done
1066
- mv "$report_file" "${report_file%.md}-v${v}.md"
1067
- fi
1068
-
1069
- local end_time
1070
- end_time=$(date +%s)
1071
- local elapsed=$(( end_time - START_TIME ))
1072
-
1073
- local baseline_commit_val="${BASELINE_COMMIT:-none}"
1074
- local files_changed=""
1075
- if [[ "$baseline_commit_val" != "none" ]]; then
1076
- files_changed=$(git -C "$ROOT" diff --stat "${baseline_commit_val}" 2>/dev/null || echo "(git diff unavailable)")
1077
- elif git -C "$ROOT" rev-parse HEAD &>/dev/null; then
1078
- files_changed=$(git -C "$ROOT" diff --stat HEAD 2>/dev/null || echo "(git diff unavailable)")
1079
- else
1080
- files_changed="(no commits in repo — cannot diff)"
1081
- fi
1082
- # Include untracked new files
1083
- local untracked
1084
- untracked=$(git -C "$ROOT" ls-files --others --exclude-standard 2>/dev/null | head -20)
1085
- if [[ -n "$untracked" ]]; then
1086
- files_changed="${files_changed}
1087
-
1088
- Untracked new files:
1089
- ${untracked}"
1090
- fi
1091
-
1092
- local sv_summary=""
1093
- if (( WITH_SELF_VERIFICATION )); then
1094
- local sv_report
1095
- sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
1096
- if [[ -n "$sv_report" ]]; then
1097
- sv_summary="See: $(basename "$sv_report")"
1098
- else
1099
- sv_summary="SV report generation requires Agent mode. Flag recorded in session-config."
1100
- fi
1101
- else
1102
- sv_summary="N/A — --with-self-verification not enabled"
1103
- fi
1104
-
1105
- {
1106
- echo "# Campaign Report: $SLUG"
1107
- echo ""
1108
- echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ) | Status: $final_status | Iterations: $ITERATION"
1109
- echo ""
1110
- echo "## Objective"
1111
- local prd_file="$DESK/plans/prd-$SLUG.md"
1112
- if [[ -f "$prd_file" ]]; then
1113
- grep '^## Objective' -A3 "$prd_file" 2>/dev/null | tail -n +2 | head -3
1114
- else
1115
- echo "(PRD not found)"
1116
- fi
1117
- echo ""
1118
- echo "## Execution Summary"
1119
- echo "- Terminal state: $final_status"
1120
- echo "- Iterations run: $ITERATION / $MAX_ITER"
1121
- echo "- Elapsed: ${elapsed}s"
1122
- echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
1123
- echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
1124
- echo ""
1125
- echo "## US Status"
1126
- echo "- Verified: ${VERIFIED_US:-none}"
1127
- echo "- Consecutive failures at end: $CONSECUTIVE_FAILURES"
1128
- echo ""
1129
- echo "## Verification Results"
1130
- local ri=1
1131
- while (( ri <= ITERATION )); do
1132
- local iter_dc="$LOGS_DIR/iter-$(printf '%03d' $ri)-done-claim.json"
1133
- if [[ -f "$iter_dc" ]]; then
1134
- local us_id
1135
- us_id=$(jq -r '.us_id // "unknown"' "$iter_dc" 2>/dev/null)
1136
- echo "- $(basename "$iter_dc"): us_id=$us_id"
1137
- fi
1138
- (( ri++ ))
1139
- done
1140
- echo ""
1141
- echo "## Issues Encountered"
1142
- local fi_found=0
1143
- local fi_i=1
1144
- while (( fi_i <= ITERATION )); do
1145
- local fix_f="$LOGS_DIR/iter-$(printf '%03d' $fi_i).fix-contract.md"
1146
- if [[ -f "$fix_f" ]]; then
1147
- echo "- $(basename "$fix_f")"
1148
- fi_found=1
1149
- fi
1150
- (( fi_i++ ))
1151
- done
1152
- (( fi_found == 0 )) && echo "- None"
1153
- echo ""
1154
- echo "## Cost & Performance"
1155
- if [[ -f "$COST_LOG" ]]; then
1156
- local total_tokens=0
1157
- while IFS= read -r line; do
1158
- local t
1159
- t=$(echo "$line" | jq -r '.estimated_tokens // 0' 2>/dev/null || echo 0)
1160
- total_tokens=$(( total_tokens + t ))
1161
- done < "$COST_LOG"
1162
- echo "- Total estimated tokens: $total_tokens (source: estimated, tmux mode)"
1163
- echo "- See: cost-log.jsonl for per-iteration breakdown"
1164
- else
1165
- echo "- No cost data available"
1166
- fi
1167
- echo ""
1168
- echo "## SV Summary"
1169
- echo "$sv_summary"
1170
- echo ""
1171
- echo "## Files Changed"
1172
- echo '```'
1173
- echo "$files_changed"
1174
- echo '```'
1175
- echo "Note: Files Changed may include pre-existing uncommitted changes if the campaign started in a dirty worktree."
1176
- } | atomic_write "$report_file"
1177
-
1178
- log "Campaign report written: $report_file"
1179
- }
1180
-
1181
- # =============================================================================
1182
- # Sentinel Writers
1183
- # =============================================================================
1184
-
1185
- # --- governance.md s7: Only the Leader writes sentinels ---
1186
- write_complete_sentinel() {
1187
- local summary="$1"
1188
- echo "# Campaign Complete
1189
-
1190
- Completed at iteration $ITERATION.
1191
- $summary
1192
-
1193
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$COMPLETE_SENTINEL"
1194
- log "COMPLETE sentinel written: $COMPLETE_SENTINEL"
1195
- }
1196
-
1197
- write_blocked_sentinel() {
1198
- local reason="$1"
1199
- echo "# Campaign Blocked
1200
-
1201
- Blocked at iteration $ITERATION.
1202
- Reason: $reason
1203
-
1204
- Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
1205
- log "BLOCKED sentinel written: $BLOCKED_SENTINEL"
1206
- }
1207
-
1208
1190
  # =============================================================================
1209
1191
  # Cleanup (trap handler)
1210
1192
  # =============================================================================
@@ -1213,17 +1195,21 @@ cleanup() {
1213
1195
  log "Cleaning up..."
1214
1196
 
1215
1197
  # Remove lockfile
1216
- rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
1198
+ if (( LOCKFILE_ACQUIRED )); then
1199
+ rm -f "$LOCKFILE_PATH" 2>/dev/null
1200
+ else
1201
+ log_debug "cleanup: lockfile not owned by this process, skipping removal"
1202
+ fi
1217
1203
 
1218
1204
  # Kill claude processes then kill panes
1219
1205
  log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
1220
1206
  if [[ -n "${WORKER_PANE:-}" ]]; then
1221
1207
  tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1222
- tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
1208
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
1223
1209
  fi
1224
1210
  if [[ -n "${VERIFIER_PANE:-}" ]]; then
1225
1211
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1226
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1212
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1227
1213
  fi
1228
1214
  sleep 2
1229
1215
  # Kill panes on completion
@@ -1242,6 +1228,9 @@ cleanup() {
1242
1228
  # AC4: Generate campaign report on all terminal states (always-on)
1243
1229
  generate_campaign_report
1244
1230
 
1231
+ # US-001: Generate SV report after campaign report (tmux mode)
1232
+ generate_sv_report
1233
+
1245
1234
  # Print summary
1246
1235
  local end_time
1247
1236
  end_time=$(date +%s)
@@ -1254,6 +1243,13 @@ cleanup() {
1254
1243
  elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
1255
1244
  else final_status="TIMEOUT"; fi
1256
1245
 
1246
+ # --- Update metadata.json with final status ---
1247
+ if [[ -f "$METADATA_FILE" ]]; then
1248
+ jq --arg status "$final_status" --arg end_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1249
+ '.campaign_status = $status | .end_time = $end_time' \
1250
+ "$METADATA_FILE" > "${METADATA_FILE}.tmp" && mv "${METADATA_FILE}.tmp" "$METADATA_FILE"
1251
+ fi
1252
+
1257
1253
  if (( DEBUG )); then
1258
1254
  local end_ts=$(date +%s)
1259
1255
  local elapsed=$((end_ts - START_TIME))
@@ -1350,6 +1346,7 @@ poll_for_signal() {
1350
1346
  local trigger_file="$4"
1351
1347
  local role="$5" # "worker" or "verifier"
1352
1348
  local nudge_count=0
1349
+ local api_retry_count=0
1353
1350
  local poll_start
1354
1351
  poll_start=$(date +%s)
1355
1352
 
@@ -1374,6 +1371,54 @@ poll_for_signal() {
1374
1371
  return 0 # success
1375
1372
  fi
1376
1373
 
1374
+ # A4 fallback: done-claim exists but no signal → Worker forgot iter-signal
1375
+ # ONLY for Worker polling — Verifier waits for verdict file, not done-claim
1376
+ if [[ "$role" != *erifier* && -f "$DONE_CLAIM_FILE" && ! -f "$signal_file" ]]; then
1377
+ local dc_us_id
1378
+ dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
1379
+ if [[ -n "$dc_us_id" && "$dc_us_id" != "null" ]]; then
1380
+ log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Auto-generating signal (A4 fallback)."
1381
+ log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
1382
+ echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim without signal)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
1383
+ return 0
1384
+ fi
1385
+ fi
1386
+
1387
+ # API transient-error recovery with bounded backoff
1388
+ local pane_output_for_retry
1389
+ pane_output_for_retry=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null || true)
1390
+ local is_api_text_retry=0
1391
+ if [[ -n "$pane_output_for_retry" ]] &&
1392
+ ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
1393
+ || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
1394
+ || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
1395
+ || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
1396
+ || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
1397
+ is_api_text_retry=1
1398
+ fi
1399
+
1400
+ if (( is_api_text_retry )) || is_api_error "$pane_id"; then
1401
+ (( api_retry_count++ ))
1402
+ log_debug "[FLOW] iter=$ITERATION api_retry=${api_retry_count}/${_API_MAX_RETRIES} role=${role} reason=tmux_pane_api_error"
1403
+ if (( api_retry_count >= _API_MAX_RETRIES )); then
1404
+ log_error "API unavailable after ${_API_MAX_RETRIES} retries"
1405
+ write_blocked_sentinel "API unavailable after ${_API_MAX_RETRIES} retries"
1406
+ return 2
1407
+ fi
1408
+ # A5: If pane shows "queued messages" or rate-limit corruption, restart pane
1409
+ if echo "$pane_output_for_retry" | grep -qi 'queued messages'; then
1410
+ log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
1411
+ log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
1412
+ tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
1413
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
1414
+ wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
1415
+ fi
1416
+ sleep "$_API_RETRY_INTERVAL_S"
1417
+ continue
1418
+ else
1419
+ api_retry_count=0
1420
+ fi
1421
+
1377
1422
  # Check heartbeat freshness (tmux pattern)
1378
1423
  if [[ -f "$heartbeat_file" ]]; then
1379
1424
  if check_heartbeat_exited "$heartbeat_file"; then
@@ -1383,9 +1428,13 @@ poll_for_signal() {
1383
1428
  log " Signal file detected after process exit: $signal_file"
1384
1429
  return 0
1385
1430
  fi
1386
- log_error "$role exited without writing signal file"
1387
- # Attempt restart with exponential backoff
1388
- if restart_worker "$pane_id" "$ITERATION" "$trigger_file"; then
1431
+ # Dispatch to engine-specific exit handler
1432
+ if [[ "$WORKER_ENGINE" = "codex" && "$role" != *erifier* ]]; then
1433
+ handle_worker_exit_codex "$ITERATION" "$signal_file"
1434
+ return 0
1435
+ fi
1436
+ # Claude path (or verifier of any engine)
1437
+ if handle_worker_exit_claude "$pane_id" "$ITERATION" "$trigger_file"; then
1389
1438
  # Reset poll timer for the restart
1390
1439
  poll_start=$(date +%s)
1391
1440
  nudge_count=0
@@ -1421,13 +1470,24 @@ poll_for_signal() {
1421
1470
  fi
1422
1471
  fi
1423
1472
 
1473
+ # Dead pane detection during poll: check if claude/codex process died
1474
+ local poll_cmd
1475
+ poll_cmd=$(tmux display-message -p -t "$pane_id" '#{pane_current_command}' 2>/dev/null)
1476
+ # Dead pane detection — delegates to check_dead_pane() for engine-aware logic
1477
+ if check_dead_pane "$poll_cmd" "$WORKER_ENGINE" "$role"; then
1478
+ log " WARNING: $role pane $pane_id has bare shell ($poll_cmd) — process died during execution"
1479
+ log_debug "[GOV] iter=$ITERATION pane_dead_during_poll=true pane=$pane_id cmd=$poll_cmd role=$role"
1480
+ # Return failure so caller can handle recovery
1481
+ return 1
1482
+ fi
1483
+
1424
1484
  # Auto-approve permission prompts during poll
1425
1485
  local poll_capture
1426
1486
  poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
1427
1487
  if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1428
1488
  log " Permission prompt detected during poll, auto-approving..."
1429
1489
  log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
1430
- tmux send-keys -t "$pane_id" Enter
1490
+ tmux send-keys -t "$pane_id" C-m
1431
1491
  sleep 0.5
1432
1492
  fi
1433
1493
 
@@ -1438,38 +1498,6 @@ poll_for_signal() {
1438
1498
  done
1439
1499
  }
1440
1500
 
1441
- # =============================================================================
1442
- # Circuit Breaker: Stale Context Detection
1443
- # =============================================================================
1444
-
1445
- # --- governance.md s7 step 8: Stale context detection ---
1446
- compute_context_hash() {
1447
- if [[ -f "$CONTEXT_FILE" ]]; then
1448
- md5 -q "$CONTEXT_FILE" 2>/dev/null || md5sum "$CONTEXT_FILE" 2>/dev/null | cut -d' ' -f1
1449
- else
1450
- echo "no-context"
1451
- fi
1452
- }
1453
-
1454
- check_stale_context() {
1455
- local current_hash
1456
- current_hash=$(compute_context_hash)
1457
-
1458
- if [[ "$current_hash" == "$PREV_CONTEXT_HASH" ]]; then
1459
- (( STALE_CONTEXT_COUNT++ ))
1460
- log " WARNING: Context unchanged ($STALE_CONTEXT_COUNT/3 stale iterations)"
1461
- if (( STALE_CONTEXT_COUNT >= 3 )); then
1462
- log_error "Circuit breaker: context unchanged for 3 consecutive iterations"
1463
- return 1
1464
- fi
1465
- else
1466
- STALE_CONTEXT_COUNT=0
1467
- fi
1468
-
1469
- PREV_CONTEXT_HASH="$current_hash"
1470
- return 0
1471
- }
1472
-
1473
1501
  # =============================================================================
1474
1502
  # Consensus Verification (run two verifiers sequentially in same pane)
1475
1503
  # =============================================================================
@@ -1487,13 +1515,26 @@ run_single_verifier() {
1487
1515
  local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1488
1516
  local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1489
1517
 
1490
- # Clean previous Verifier session
1518
+ # Clean previous Verifier session (with dead pane detection)
1491
1519
  local verifier_cmd
1492
1520
  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1493
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1521
+ if [[ -z "$verifier_cmd" ]]; then
1522
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
1523
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
1524
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
1525
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
1526
+ log " New verifier pane: $VERIFIER_PANE"
1527
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
1528
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
1529
+ log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
1530
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
1531
+ sleep 0.2
1532
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
1533
+ sleep 0.3
1534
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1494
1535
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1495
1536
  sleep 0.5
1496
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1537
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1497
1538
  sleep 2
1498
1539
  fi
1499
1540
  # Always ensure clean shell state before launching new verifier
@@ -1505,55 +1546,19 @@ run_single_verifier() {
1505
1546
  # Remove previous verdict file
1506
1547
  rm -f "$VERDICT_FILE" 2>/dev/null
1507
1548
 
1508
- # Launch verifier
1549
+ # Launch verifier — dispatch to engine-specific function
1550
+ local verifier_launch
1509
1551
  if [[ "$engine" = "codex" ]]; then
1510
- # Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
1511
- local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1512
- log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
1513
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
1514
- tmux send-keys -t "$VERIFIER_PANE" Enter
1515
- log_debug "Verifier$suffix codex exec sent directly"
1552
+ verifier_launch="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1553
+ launch_verifier_codex "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"
1554
+ log_debug "Verifier$suffix codex exec dispatched"
1516
1555
  else
1517
- # Claude: use interactive TUI
1518
- local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1519
- log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
1520
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1521
- tmux send-keys -t "$VERIFIER_PANE" Enter
1522
-
1523
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1556
+ verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1557
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$prompt_file" "$iter" "$verifier_launch"; then
1524
1558
  log_error "Verifier$suffix failed to start"
1525
1559
  return 1
1526
1560
  fi
1527
-
1528
- sleep 3
1529
- local verifier_instruction="Read and execute the instructions in $prompt_file"
1530
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1531
- tmux send-keys -t "$VERIFIER_PANE" Enter
1532
- log_debug "Verifier$suffix instruction sent directly"
1533
-
1534
- # Verify claude actually started working
1535
- local v_submit=0
1536
- while (( v_submit < 15 )); do
1537
- sleep 2
1538
- local v_check
1539
- v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1540
- if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
1541
- log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
1542
- break
1543
- fi
1544
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1545
- if (( v_submit == 8 )); then
1546
- log_debug "Adaptive instruction retry: clearing line and re-typing"
1547
- tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
1548
- sleep 0.1
1549
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1550
- tmux send-keys -t "$VERIFIER_PANE" Enter
1551
- fi
1552
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1553
- sleep 0.3
1554
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1555
- (( v_submit++ ))
1556
- done
1561
+ log_debug "Verifier$suffix claude dispatched"
1557
1562
  fi
1558
1563
 
1559
1564
  # Poll for verdict
@@ -1581,6 +1586,10 @@ run_single_verifier() {
1581
1586
  # Claude: use full poll_for_signal with heartbeat/nudge
1582
1587
  log " Polling for verify-verdict.json ($suffix)..."
1583
1588
  if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
1589
+ local verifier_poll_rc=$?
1590
+ if (( verifier_poll_rc == 2 )); then
1591
+ return 1
1592
+ fi
1584
1593
  log_error "Verifier$suffix poll failed"
1585
1594
  return 1
1586
1595
  fi
@@ -1592,6 +1601,110 @@ run_single_verifier() {
1592
1601
  return 0
1593
1602
  }
1594
1603
 
1604
+ # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
1605
+ # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
1606
+ # Sets FAILED_US global on failure.
1607
+ run_sequential_final_verify() {
1608
+ local iter="$1"
1609
+ FAILED_US=""
1610
+
1611
+ log " Sequential final verify: ${US_LIST} (${VERIFY_MODE} mode)"
1612
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify us_list=$US_LIST"
1613
+
1614
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
1615
+ log " Final verify: checking $us..."
1616
+
1617
+ # Temporarily override signal file to scope verifier to this US
1618
+ local orig_signal
1619
+ orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
1620
+ echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
1621
+
1622
+ # Write scoped verifier trigger
1623
+ write_verifier_trigger "$iter"
1624
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
1625
+
1626
+ # Clean verifier pane
1627
+ local verifier_cmd
1628
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1629
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1630
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
1631
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
1632
+ fi
1633
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1634
+
1635
+ # Launch verifier
1636
+ local verifier_launch
1637
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1638
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1639
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
1640
+ else
1641
+ verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1642
+ launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
1643
+ log_error "Failed to launch verifier for $us"
1644
+ FAILED_US="$us"
1645
+ return 1
1646
+ }
1647
+ fi
1648
+
1649
+ # Poll for verdict
1650
+ rm -f "$VERDICT_FILE"
1651
+ local poll_rc=0
1652
+ poll_for_signal "$VERDICT_FILE" "$ITER_TIMEOUT" "verdict" || poll_rc=$?
1653
+ if (( poll_rc != 0 )); then
1654
+ log_error "Verifier poll failed for $us (rc=$poll_rc)"
1655
+ FAILED_US="$us"
1656
+ return 1
1657
+ fi
1658
+
1659
+ # Check verdict
1660
+ local verdict
1661
+ verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
1662
+ if [[ "$verdict" != "pass" ]]; then
1663
+ FAILED_US="$us"
1664
+ log " Sequential final verify FAILED at $us"
1665
+ log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
1666
+ return 1
1667
+ fi
1668
+ log " Sequential final verify: $us PASSED"
1669
+
1670
+ # Archive per-US final verdict
1671
+ cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
1672
+ done
1673
+
1674
+ # Integration check: run tests if VERIFICATION_CMD is set
1675
+ if [[ -n "${VERIFICATION_CMD:-}" ]]; then
1676
+ log " Running integration test suite after sequential verify..."
1677
+ log_debug "[FLOW] iter=$iter phase=integration_check cmd=$VERIFICATION_CMD"
1678
+ if ! eval "$VERIFICATION_CMD" > /dev/null 2>&1; then
1679
+ log " Integration test suite FAILED"
1680
+ FAILED_US="integration"
1681
+ return 2
1682
+ fi
1683
+ log " Integration test suite PASSED"
1684
+ fi
1685
+
1686
+ log " Sequential final verify: ALL PASSED"
1687
+ return 0
1688
+ }
1689
+
1690
+ # --- US-005: Determine whether consensus verification should run for this signal ---
1691
+ # Returns 0 (use consensus) or 1 (single engine).
1692
+ # VERIFY_CONSENSUS + CONSENSUS_SCOPE handles per-US consensus.
1693
+ # FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
1694
+ _should_use_consensus() {
1695
+ local signal_us_id="${1:-}"
1696
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1697
+ case "$CONSENSUS_SCOPE" in
1698
+ all) return 0 ;;
1699
+ final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
1700
+ esac
1701
+ fi
1702
+ if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
1703
+ return 0
1704
+ fi
1705
+ return 1
1706
+ }
1707
+
1595
1708
  # --- US-004: Run consensus verification (claude + codex sequentially) ---
1596
1709
  run_consensus_verification() {
1597
1710
  local iter="$1"
@@ -1607,18 +1720,45 @@ run_consensus_verification() {
1607
1720
  log " Consensus round $CONSENSUS_ROUND/6..."
1608
1721
 
1609
1722
  # Run claude verifier first
1723
+ local _claude_t0=$(date +%s)
1610
1724
  if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1611
1725
  log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
1612
1726
  return 1
1613
1727
  fi
1728
+ ITER_VERIFIER_CLAUDE_DURATION_S=$(( $(date +%s) - _claude_t0 ))
1614
1729
  CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1730
+ # A12 fix: validate claude verdict is not null/empty — if so, retry once before proceeding
1731
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1732
+ log " WARNING: Claude verdict is '$CLAUDE_VERDICT' — likely interrupted. Retrying claude verifier..."
1733
+ log_debug "[GOV] iter=$iter phase=consensus_claude_retry reason=null_verdict"
1734
+ rm -f "$claude_verdict_file" 2>/dev/null
1735
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1736
+ log_error "Claude verifier retry also failed"
1737
+ return 1
1738
+ fi
1739
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1740
+ if [[ -z "$CLAUDE_VERDICT" || "$CLAUDE_VERDICT" == "null" ]]; then
1741
+ log_error "Claude verdict still null after retry — consensus cannot proceed"
1742
+ return 1
1743
+ fi
1744
+ fi
1615
1745
  log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1616
1746
 
1747
+ # F8: --consensus-fail-fast — skip second verifier if first fails
1748
+ if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
1749
+ log " Consensus fail-fast: claude=fail, skipping codex verifier"
1750
+ log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
1751
+ CODEX_VERDICT="skipped"
1752
+ return 2 # disagreement/fail signal
1753
+ fi
1754
+
1617
1755
  # Run codex verifier second
1756
+ local _codex_t0=$(date +%s)
1618
1757
  if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
1619
1758
  log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
1620
1759
  return 1
1621
1760
  fi
1761
+ ITER_VERIFIER_CODEX_DURATION_S=$(( $(date +%s) - _codex_t0 ))
1622
1762
  CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
1623
1763
  log_debug "[GOV] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
1624
1764
 
@@ -1722,43 +1862,35 @@ run_consensus_verification() {
1722
1862
  return 1
1723
1863
  }
1724
1864
 
1725
- # =============================================================================
1726
- # Security Warning
1727
- # =============================================================================
1728
-
1729
- print_security_warning() {
1730
- echo ""
1731
- echo "================================================================"
1732
- echo " WARNING: Running with --dangerously-skip-permissions"
1733
- echo ""
1734
- echo " The claude CLI will execute tools (file writes, shell commands)"
1735
- echo " without asking for confirmation. Only run this on code you"
1736
- echo " trust in an environment you control."
1737
- echo "================================================================"
1738
- echo ""
1739
- }
1740
-
1741
1865
  # =============================================================================
1742
1866
  # Main Leader Loop
1743
1867
  # =============================================================================
1744
1868
 
1745
1869
  main() {
1746
1870
  # --- Lockfile: prevent duplicate execution ---
1747
- local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
1871
+ local lockfile="$LOCKFILE_PATH"
1748
1872
  mkdir -p "$(dirname "$lockfile")" 2>/dev/null
1749
1873
  if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
1750
1874
  local lock_pid
1751
1875
  lock_pid=$(cat "$lockfile" 2>/dev/null)
1752
1876
  if kill -0 "$lock_pid" 2>/dev/null; then
1753
- log_error "Another instance is already running (PID $lock_pid)"
1877
+ log_error "Another instance is already running (PID $lock_pid). Kill $lock_pid or rm $lockfile"
1754
1878
  exit 1
1755
1879
  fi
1756
1880
  # Stale lock — overwrite
1881
+ log "Stale lock detected (PID ${lock_pid:-unknown} not running), recovering"
1757
1882
  echo $$ > "$lockfile"
1883
+ LOCKFILE_ACQUIRED=1
1884
+ else
1885
+ LOCKFILE_ACQUIRED=1
1758
1886
  fi
1759
- mkdir -p "$LOGS_DIR" 2>/dev/null
1887
+ trap cleanup EXIT INT TERM
1888
+ mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
1889
+
1890
+ # --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
1891
+ mkdir -p "$ANALYTICS_DIR" 2>/dev/null
1760
1892
 
1761
- # --- AC7: debug.log versioning: rename existing debug.log before fresh run ---
1893
+ # --- debug.log versioning (in analytics dir, --debug only) ---
1762
1894
  if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
1763
1895
  local dbg_n=1
1764
1896
  while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
@@ -1767,6 +1899,31 @@ main() {
1767
1899
  mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
1768
1900
  fi
1769
1901
 
1902
+ # --- campaign.jsonl versioning (always-on) ---
1903
+ if [[ -f "$CAMPAIGN_JSONL" ]]; then
1904
+ local cj_n=1
1905
+ while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
1906
+ (( cj_n++ ))
1907
+ done
1908
+ mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
1909
+ fi
1910
+
1911
+ # --- metadata.json: always write at campaign start (cross-project identification) ---
1912
+ jq -n \
1913
+ --arg slug "$SLUG" \
1914
+ --arg project_root "$ROOT" \
1915
+ --arg project_name "$(basename "$ROOT")" \
1916
+ --arg campaign_status "running" \
1917
+ --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1918
+ --arg end_time "" \
1919
+ --arg worker_model "$WORKER_MODEL" \
1920
+ --arg verifier_model "$VERIFIER_MODEL" \
1921
+ --argjson debug "$DEBUG" \
1922
+ --argjson with_sv "$WITH_SELF_VERIFICATION" \
1923
+ --argjson consensus "$VERIFY_CONSENSUS" \
1924
+ '{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
1925
+ > "$METADATA_FILE"
1926
+
1770
1927
  # --- Startup ---
1771
1928
  log "Ralph Desk Tmux Runner starting..."
1772
1929
  log " Slug: $SLUG"
@@ -1776,6 +1933,7 @@ main() {
1776
1933
  log " Verifier model: $VERIFIER_MODEL"
1777
1934
  log " Verify mode: $VERIFY_MODE"
1778
1935
  log " Verify consensus:$VERIFY_CONSENSUS"
1936
+ log " Final consensus: $FINAL_CONSENSUS"
1779
1937
  log " Consensus scope: $CONSENSUS_SCOPE"
1780
1938
  log " Poll interval: ${POLL_INTERVAL}s"
1781
1939
  log " Iter timeout: ${ITER_TIMEOUT}s"
@@ -1819,9 +1977,9 @@ main() {
1819
1977
  US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1820
1978
  fi
1821
1979
 
1822
- # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
1823
- local memory_file="$DESK/memos/${SLUG}-memory.md"
1824
- if [[ -f "$memory_file" ]]; then
1980
+ # Initialize VERIFIED_US from memory's Completed Stories (carry over previous runs)
1981
+ local memory_file="$DESK/memos/${SLUG}-memory.md"
1982
+ if [[ -f "$memory_file" ]]; then
1825
1983
  local completed_us
1826
1984
  completed_us=$(sed -n '/^## Completed Stories$/,/^## /p' "$memory_file" 2>/dev/null | grep '^- US-' | sed 's/^- \(US-[0-9]*\):.*/\1/' | sort -u | tr '\n' ',' | sed 's/,$//')
1827
1985
  if [[ -n "$completed_us" ]]; then
@@ -1830,8 +1988,23 @@ main() {
1830
1988
  log_debug "[FLOW] loaded_verified_us_from_memory=$VERIFIED_US"
1831
1989
  fi
1832
1990
  fi
1991
+
1992
+ # D1: Fallback — restore verified_us from status.json if memory had none
1993
+ if [[ -z "$VERIFIED_US" && -f "$STATUS_FILE" ]]; then
1994
+ local status_verified
1995
+ status_verified=$(jq -r '.verified_us // [] | join(",")' "$STATUS_FILE" 2>/dev/null)
1996
+ if [[ -n "$status_verified" ]]; then
1997
+ VERIFIED_US="$status_verified"
1998
+ log " Restored verified_us from status.json: $VERIFIED_US"
1999
+ log_debug "[FLOW] restored_verified_us_from_status=$VERIFIED_US"
2000
+ fi
2001
+ fi
1833
2002
  fi
1834
2003
 
2004
+ # Initialize PRD snapshot state for live update detection
2005
+ PREV_PRD_HASH=$(compute_prd_hash)
2006
+ PREV_PRD_US_LIST=$(count_prd_us)
2007
+
1835
2008
  # Dependency checks
1836
2009
  check_dependencies
1837
2010
 
@@ -1854,7 +2027,7 @@ main() {
1854
2027
  PREV_CONTEXT_HASH=$(compute_context_hash)
1855
2028
 
1856
2029
  # --- governance.md s7: Leader Loop ---
1857
- local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # absolute max per iteration (no extensions beyond this)
2030
+ local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # logged but NOT enforced Worker extends indefinitely when active
1858
2031
 
1859
2032
  for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
1860
2033
  log ""
@@ -1887,7 +2060,7 @@ main() {
1887
2060
  # Send C-c first (in case claude is mid-task), then /exit
1888
2061
  tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1889
2062
  sleep 1
1890
- tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
2063
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
1891
2064
  sleep 2
1892
2065
  # Wait for shell prompt before proceeding
1893
2066
  wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
@@ -1896,96 +2069,66 @@ main() {
1896
2069
  # Reset per-iteration state
1897
2070
  local worker_nudge_count=0
1898
2071
  local verifier_nudge_count=0
2072
+ ITER_VERIFIER_START=""
2073
+ ITER_VERIFIER_END=""
2074
+
2075
+ # --- US-004: detect PRD changes for live update + re-split ---
2076
+ check_prd_update
1899
2077
 
1900
2078
  # --- governance.md s7 step 4: Build worker prompt + trigger ---
1901
2079
  write_worker_trigger "$ITERATION"
1902
2080
  local worker_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-prompt.md"
1903
2081
 
2082
+ # AC1: capture worker start timestamp
2083
+ ITER_WORKER_START=$(date +%s)
2084
+
1904
2085
  update_status "worker" "running"
1905
2086
 
1906
- # --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
1907
- # Step 5a: Launch interactive worker engine in Worker pane
2087
+ # --- governance.md s7 step 5: Execute Worker (dispatched to engine-specific function) ---
2088
+ log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
2089
+
1908
2090
  local worker_launch
1909
2091
  if [[ "$WORKER_ENGINE" = "codex" ]]; then
1910
- worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1911
- log " Launching Worker codex in pane $WORKER_PANE..."
2092
+ local worker_trigger="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).worker-trigger.sh"
2093
+ worker_launch="bash $worker_trigger"
2094
+ launch_worker_codex "$WORKER_PANE" "$worker_trigger" "$ITERATION"
1912
2095
  else
1913
2096
  worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1914
- log " Launching Worker claude in pane $WORKER_PANE..."
1915
- fi
1916
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
1917
- tmux send-keys -t "$WORKER_PANE" Enter
1918
- log_debug "[FLOW] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
1919
-
1920
- # Step 5b: Wait for claude TUI to be ready (tmux pattern)
1921
- if ! wait_for_pane_ready "$WORKER_PANE" 30; then
1922
- log_error "Worker claude failed to start"
1923
- write_blocked_sentinel "Worker claude failed to start in pane"
1924
- update_status "blocked" "worker_start_failed"
1925
- return 1
1926
- fi
1927
-
1928
- # Step 5c: Wait for claude to fully initialize, then send instruction directly
1929
- sleep 3
1930
- local worker_instruction="Read and execute the instructions in $worker_prompt"
1931
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1932
- tmux send-keys -t "$WORKER_PANE" Enter
1933
- log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
1934
-
1935
- # Verify claude actually started working — keep sending C-m until activity detected
1936
- local submit_attempts=0
1937
- while (( submit_attempts < 15 )); do
1938
- sleep 2
1939
- local pane_check
1940
- pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
1941
- if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1942
- log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
1943
- log_debug "[FLOW] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
1944
- break
1945
- fi
1946
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1947
- if (( submit_attempts == 8 )); then
1948
- log_debug "Adaptive instruction retry: clearing line and re-typing"
1949
- tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
1950
- sleep 0.1
1951
- tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1952
- tmux send-keys -t "$WORKER_PANE" Enter
2097
+ if ! launch_worker_claude "$WORKER_PANE" "$worker_prompt" "$ITERATION" "$worker_launch"; then
2098
+ write_blocked_sentinel "Worker claude failed to start in pane"
2099
+ update_status "blocked" "worker_start_failed"
2100
+ return 1
1953
2101
  fi
1954
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1955
- sleep 0.3
1956
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1957
- (( submit_attempts++ ))
1958
- done
1959
- if (( submit_attempts >= 15 )); then
1960
- log " WARNING: Could not confirm Worker started working after 15 attempts"
1961
- log_debug "[FLOW] iter=$ITERATION worker_submit_check=FAILED attempts=15"
1962
2102
  fi
1963
2103
 
1964
2104
  # --- governance.md s7 step 5+6: Poll for Worker completion ---
1965
2105
  log " Polling for iter-signal.json..."
1966
2106
  local worker_poll_done=0
1967
2107
  while (( ! worker_poll_done )); do
2108
+ local worker_poll_rc=0
1968
2109
  if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1969
2110
  worker_poll_done=1
1970
2111
  log_debug "[FLOW] iter=$ITERATION poll_signal_received=true"
1971
2112
  else
2113
+ worker_poll_rc=$?
2114
+ if (( worker_poll_rc == 2 )); then
2115
+ return 1
2116
+ fi
1972
2117
  # Check if Worker is still actively running (not stuck)
1973
2118
  local worker_cmd
1974
2119
  worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1975
2120
  if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
1976
- # Check hard ceiling before extending
2121
+ # Process alive — extend indefinitely (no hard ceiling kill)
2122
+ # Stale-context breaker and nudge system handle truly stuck workers
1977
2123
  local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
2124
+ local ceiling_exceeded=""
1978
2125
  if (( iter_elapsed >= HARD_CEILING )); then
1979
- log_error "Worker hit hard ceiling (${HARD_CEILING}s = 3x iter_timeout). Killing iteration."
1980
- log_debug "[GOV] iter=$ITERATION hard_ceiling_hit=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd"
1981
- tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1982
- sleep 1
1983
- write_blocked_sentinel "Worker hit hard ceiling (${HARD_CEILING}s). Pane preserved for inspection."
1984
- update_status "blocked" "hard_timeout"
1985
- return 1
2126
+ ceiling_exceeded=" [EXCEEDED hard_ceiling=${HARD_CEILING}s not enforced, logged only]"
2127
+ log " WARNING: Worker exceeded soft hard-ceiling (${iter_elapsed}s >= ${HARD_CEILING}s) but still active. Continuing..."
2128
+ log_debug "[GOV] iter=$ITERATION hard_ceiling_exceeded=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd action=log_only_no_kill"
1986
2129
  fi
1987
- log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s/${HARD_CEILING}s)"
1988
- log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s"
2130
+ log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s, no ceiling)${ceiling_exceeded}"
2131
+ log_debug "[GOV] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s action=extend_indefinitely"
1989
2132
  log_debug "[FLOW] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
1990
2133
  update_status "worker" "slow"
1991
2134
  # Loop continues — re-poll same iteration
@@ -2019,6 +2162,11 @@ main() {
2019
2162
  # Reset monitor failure count on success
2020
2163
  MONITOR_FAILURE_COUNT=0
2021
2164
 
2165
+ # AC1: capture worker end timestamp; reset consensus timing
2166
+ ITER_WORKER_END=$(date +%s)
2167
+ ITER_VERIFIER_CLAUDE_DURATION_S=""
2168
+ ITER_VERIFIER_CODEX_DURATION_S=""
2169
+
2022
2170
  # --- governance.md s7 step 6: Read iter-signal.json via jq (JSON only, no markdown) ---
2023
2171
  local signal_status
2024
2172
  signal_status=$(jq -r '.status' "$SIGNAL_FILE" 2>/dev/null)
@@ -2045,17 +2193,34 @@ main() {
2045
2193
  signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
2046
2194
  log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
2047
2195
 
2196
+ # AC1: capture verifier start timestamp
2197
+ ITER_VERIFIER_START=$(date +%s)
2198
+
2048
2199
  update_status "verifier" "running"
2049
2200
 
2050
- # --- Consensus scope check ---
2051
- local use_consensus=0
2052
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
2053
- case "$CONSENSUS_SCOPE" in
2054
- all) use_consensus=1 ;;
2055
- final-only) [[ "$signal_us_id" == "ALL" ]] && use_consensus=1 ;;
2056
- esac
2201
+ # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
2202
+ if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
2203
+ log " Final ALL verify: using sequential per-US strategy (timeout prevention)"
2204
+ local seq_rc=0
2205
+ run_sequential_final_verify "$ITERATION" || seq_rc=$?
2206
+ if (( seq_rc == 0 )); then
2207
+ write_complete_sentinel "Sequential final verify passed (all US verified individually)"
2208
+ update_status "complete" "pass"
2209
+ write_campaign_jsonl "$ITERATION" "ALL" "pass"
2210
+ return 0
2211
+ else
2212
+ # Sequential verify failed — fall through to fix loop with failed US
2213
+ log " Sequential final verify failed at ${FAILED_US:-unknown}. Entering fix loop."
2214
+ signal_us_id="${FAILED_US:-ALL}"
2215
+ # Synthesize a fail verdict for the fix loop
2216
+ echo "{\"verdict\":\"fail\",\"summary\":\"Sequential final verify failed at ${FAILED_US:-unknown}\",\"issues\":[{\"severity\":\"critical\",\"criterion\":\"${FAILED_US:-ALL}\",\"description\":\"Failed during sequential final verification\"}]}" | atomic_write "$VERDICT_FILE"
2217
+ fi
2057
2218
  fi
2058
2219
 
2220
+ # --- Consensus scope check (US-005: _should_use_consensus handles VERIFY_CONSENSUS + FINAL_CONSENSUS) ---
2221
+ local use_consensus=0
2222
+ _should_use_consensus "$signal_us_id" && use_consensus=1
2223
+
2059
2224
  # --- Consensus vs single verification ---
2060
2225
  if (( use_consensus )); then
2061
2226
  # US-004: Run consensus verification (claude + codex sequentially)
@@ -2077,70 +2242,54 @@ main() {
2077
2242
  write_verifier_trigger "$ITERATION"
2078
2243
  local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
2079
2244
 
2080
- # Step 7a: Clean previous Verifier session if running
2245
+ # Step 7a: Clean previous Verifier session (with dead pane detection)
2081
2246
  local verifier_cmd
2082
2247
  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
2083
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2248
+ if [[ -z "$verifier_cmd" ]]; then
2249
+ log " Verifier pane $VERIFIER_PANE is gone — replacing..."
2250
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE action=replace_pane"
2251
+ replace_worker_pane "$VERIFIER_PANE" "verifier"
2252
+ VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
2253
+ log " New verifier pane: $VERIFIER_PANE"
2254
+ elif [[ "$verifier_cmd" == "zsh" || "$verifier_cmd" == "bash" ]]; then
2255
+ log " Verifier pane $VERIFIER_PANE has bare shell ($verifier_cmd) — resetting..."
2256
+ log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2257
+ tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2258
+ sleep 0.2
2259
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
2260
+ sleep 0.3
2261
+ elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2084
2262
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2085
2263
  sleep 0.5
2086
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
2264
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2087
2265
  sleep 2
2088
- wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
2089
2266
  fi
2267
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
2090
2268
 
2091
2269
  local verifier_launch
2092
2270
  if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2093
2271
  verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
2094
- log " Launching Verifier codex in pane $VERIFIER_PANE..."
2095
2272
  else
2096
2273
  verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
2097
- log " Launching Verifier claude in pane $VERIFIER_PANE..."
2098
2274
  fi
2099
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
2100
- tmux send-keys -t "$VERIFIER_PANE" Enter
2101
2275
  log_debug "[FLOW] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
2102
2276
 
2103
- # Step 7b: Wait for TUI to be ready
2104
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
2105
- log_error "Verifier failed to start"
2106
- update_status "verifier" "start_failed"
2107
- continue
2108
- fi
2109
-
2110
- # Step 7c: Send instruction
2111
- sleep 3
2112
- local verifier_instruction="Read and execute the instructions in $verifier_prompt"
2113
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
2114
- tmux send-keys -t "$VERIFIER_PANE" Enter
2115
- log_debug "Verifier instruction sent directly"
2116
-
2117
- # Verify verifier actually started working
2118
- local vs_submit=0
2119
- while (( vs_submit < 15 )); do
2120
- sleep 2
2121
- local vs_check
2122
- vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
2123
- if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
2124
- log_debug "Verifier started working after $((vs_submit + 1)) checks"
2125
- break
2126
- fi
2127
- # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
2128
- if (( vs_submit == 8 )); then
2129
- log_debug "Adaptive instruction retry: clearing line and re-typing"
2130
- tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
2131
- sleep 0.1
2132
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
2133
- tmux send-keys -t "$VERIFIER_PANE" Enter
2277
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2278
+ launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"
2279
+ else
2280
+ if ! launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$ITERATION" "$verifier_launch"; then
2281
+ update_status "verifier" "start_failed"
2282
+ continue
2134
2283
  fi
2135
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
2136
- sleep 0.3
2137
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
2138
- (( vs_submit++ ))
2139
- done
2284
+ fi
2140
2285
 
2141
2286
  # Poll for verify-verdict.json
2142
2287
  log " Polling for verify-verdict.json..."
2143
2288
  if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
2289
+ local verifier_poll_rc=$?
2290
+ if (( verifier_poll_rc == 2 )); then
2291
+ return 1
2292
+ fi
2144
2293
  log_error "Verifier poll failed"
2145
2294
  # Verifier is dead/stuck — BLOCK and let user decide
2146
2295
  write_blocked_sentinel "Verifier process dead/stuck (poll failed). Pane preserved for inspection."
@@ -2149,6 +2298,9 @@ main() {
2149
2298
  fi
2150
2299
  fi
2151
2300
 
2301
+ # AC1: capture verifier end timestamp
2302
+ ITER_VERIFIER_END=$(date +%s)
2303
+
2152
2304
  # --- governance.md s7 step 7: Read verdict via jq ---
2153
2305
  local verdict
2154
2306
  verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
@@ -2166,6 +2318,18 @@ main() {
2166
2318
  pass)
2167
2319
  CONSECUTIVE_FAILURES=0
2168
2320
  CONSENSUS_ROUND=0
2321
+ _SAME_US_FAIL_COUNT=0
2322
+ _LAST_FAILED_US=""
2323
+ if (( _MODEL_UPGRADED )); then
2324
+ log " Worker model restored: ${WORKER_MODEL} → ${_ORIGINAL_WORKER_MODEL} (pass verdict)"
2325
+ log_debug "[DECIDE] iter=$ITERATION phase=model_select model_restore=true from=${WORKER_MODEL} to=${_ORIGINAL_WORKER_MODEL}"
2326
+ WORKER_MODEL="$_ORIGINAL_WORKER_MODEL"
2327
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2328
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
2329
+ WORKER_CODEX_REASONING="$_ORIGINAL_WORKER_CODEX_REASONING"
2330
+ fi
2331
+ _MODEL_UPGRADED=0
2332
+ fi
2169
2333
 
2170
2334
  # --- Per-US tracking ---
2171
2335
  if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
@@ -2183,6 +2347,7 @@ main() {
2183
2347
  # Final full verify passed or complete recommended
2184
2348
  write_complete_sentinel "$verdict_summary"
2185
2349
  update_status "complete" "pass"
2350
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-ALL}" "pass"
2186
2351
  return 0
2187
2352
  else
2188
2353
  log " Verifier passed but did not recommend complete. Continuing."
@@ -2192,6 +2357,7 @@ main() {
2192
2357
  fail)
2193
2358
  # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
2194
2359
  (( CONSECUTIVE_FAILURES++ ))
2360
+ check_model_upgrade "${signal_us_id:-unknown}"
2195
2361
  local verdict_summary_fail
2196
2362
  verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
2197
2363
  log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
@@ -2213,11 +2379,19 @@ main() {
2213
2379
  log " Fix contract: $fix_contract"
2214
2380
  log_debug "[DECIDE] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
2215
2381
 
2216
- # Circuit breaker: consecutive failures
2382
+ # Circuit breaker: consecutive failures (with architecture escalation when at model ceiling)
2217
2383
  if (( CONSECUTIVE_FAILURES >= EFFECTIVE_CB_THRESHOLD )); then
2218
- log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
2219
- log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2220
- write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2384
+ # For codex: use full model:reasoning string (WORKER_MODEL loses reasoning suffix after upgrade)
2385
+ _ceiling_model_str="$([[ "$WORKER_ENGINE" = "codex" ]] && echo "${WORKER_CODEX_MODEL}:${WORKER_CODEX_REASONING}" || echo "$WORKER_MODEL")"
2386
+ if (( _MODEL_UPGRADED )) && [[ -z "$(get_next_model "$_ceiling_model_str")" ]]; then
2387
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"architecture escalation: Worker at ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures\""
2388
+ log_error "Circuit breaker: architecture escalation — Worker upgraded to ceiling (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive failures"
2389
+ write_blocked_sentinel "architecture escalation: Worker upgraded to ceiling model (${WORKER_MODEL}), ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2390
+ else
2391
+ log_debug "[GOV] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"${EFFECTIVE_CB_THRESHOLD} consecutive verification failures\""
2392
+ log_error "Circuit breaker: ${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2393
+ write_blocked_sentinel "${EFFECTIVE_CB_THRESHOLD} consecutive verification failures"
2394
+ fi
2221
2395
  update_status "blocked" "consecutive_failures"
2222
2396
  return 1
2223
2397
  fi
@@ -2261,6 +2435,7 @@ main() {
2261
2435
 
2262
2436
  # --- AC5: Write per-iteration cost estimate ---
2263
2437
  write_cost_log "$ITERATION"
2438
+ write_campaign_jsonl "$ITERATION" "${signal_us_id:-unknown}" "${signal_status:-unknown}"
2264
2439
 
2265
2440
  # --- governance.md s7 step 8: Write result log ---
2266
2441
  write_result_log "$ITERATION" "$signal_status"
@@ -2279,7 +2454,6 @@ main() {
2279
2454
 
2280
2455
  # Max iterations reached
2281
2456
  log "Max iterations ($MAX_ITER) reached."
2282
- generate_campaign_report # AC4: TIMEOUT terminal path
2283
2457
  update_status "timeout" "max_iter"
2284
2458
  return 1
2285
2459
  }
@@ -2288,6 +2462,45 @@ main() {
2288
2462
  # Entry Point
2289
2463
  # =============================================================================
2290
2464
 
2465
+ # --- CLI: parse --worker-model / --verifier-model flags ---
2466
+ # These flags override env-var defaults (WORKER_ENGINE, WORKER_MODEL, etc.)
2467
+ # Format: "model:reasoning" → codex engine; "model-name" → claude engine
2468
+ _cli_i=1
2469
+ while (( _cli_i <= $# )); do
2470
+ case "${@[$_cli_i]}" in
2471
+ --worker-model)
2472
+ (( _cli_i++ ))
2473
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "worker") || exit 1
2474
+ WORKER_ENGINE="${_cli_parsed%% *}"
2475
+ _cli_rest="${_cli_parsed#* }"
2476
+ WORKER_MODEL="${_cli_rest%% *}"
2477
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
2478
+ WORKER_CODEX_MODEL="$WORKER_MODEL"
2479
+ WORKER_CODEX_REASONING="${_cli_rest##* }"
2480
+ fi
2481
+ ;;
2482
+ --verifier-model)
2483
+ (( _cli_i++ ))
2484
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "verifier") || exit 1
2485
+ VERIFIER_ENGINE="${_cli_parsed%% *}"
2486
+ _cli_rest="${_cli_parsed#* }"
2487
+ VERIFIER_MODEL="${_cli_rest%% *}"
2488
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
2489
+ VERIFIER_CODEX_MODEL="$VERIFIER_MODEL"
2490
+ VERIFIER_CODEX_REASONING="${_cli_rest##* }"
2491
+ fi
2492
+ ;;
2493
+ --lock-worker-model)
2494
+ LOCK_WORKER_MODEL=1
2495
+ ;;
2496
+ --final-consensus)
2497
+ FINAL_CONSENSUS=1
2498
+ ;;
2499
+ esac
2500
+ (( _cli_i++ ))
2501
+ done
2502
+ unset _cli_i _cli_parsed _cli_rest
2503
+
2291
2504
  # Require tmux — tmux mode only works inside an active tmux session
2292
2505
  if [[ -z "${TMUX:-}" ]]; then
2293
2506
  echo "ERROR: tmux mode requires running inside a tmux session."