@ai-dev-methodologies/rlp-desk 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -95,12 +95,7 @@ get_next_model() {
95
95
  gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
96
96
  gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
97
97
  gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling (full name)
98
- # Codex standard (gpt-5.3-codex) upgrade path
99
- gpt-5.3-codex:low) echo "gpt-5.3-codex:medium" ;;
100
- gpt-5.3-codex:medium) echo "gpt-5.3-codex:high" ;;
101
- gpt-5.3-codex:high) echo "gpt-5.3-codex:xhigh" ;;
102
- gpt-5.3-codex:xhigh) echo "" ;; # codex ceiling
103
- # Codex Non-Pro / upper path
98
+ # Codex Non-Pro upgrade path
104
99
  gpt-5.4:low) echo "gpt-5.4:medium" ;;
105
100
  gpt-5.4:medium) echo "gpt-5.4:high" ;;
106
101
  gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
@@ -160,6 +155,7 @@ check_model_upgrade() {
160
155
  fi
161
156
 
162
157
  log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
158
+ log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
163
159
  log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
164
160
  _SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
165
161
  fi
@@ -167,6 +163,26 @@ check_model_upgrade() {
167
163
  return 0
168
164
  }
169
165
 
166
+ # record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
167
+ # Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
168
+ # This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
169
+ # Usage: record_us_failure <us_id>
170
+ record_us_failure() {
171
+ local us_id="$1"
172
+ [[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
173
+
174
+ local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
175
+ US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
176
+
177
+ # Prior-failure warning: if this US has failed before, it's showing fragility
178
+ if (( prev_count > 0 )); then
179
+ log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
180
+ log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
181
+ fi
182
+
183
+ return 0
184
+ }
185
+
170
186
  # --- governance.md s7: Atomic file writes (tmux pattern) ---
171
187
  # All file writes by the Leader use tmp+mv to prevent corruption.
172
188
  atomic_write() {
@@ -228,7 +244,7 @@ update_status() {
228
244
 
229
245
  # Build consensus fields
230
246
  local consensus_json=""
231
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
247
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
232
248
  consensus_json=',
233
249
  "consensus_scope": "'"$CONSENSUS_SCOPE"'",
234
250
  "consensus_round": '"$CONSENSUS_ROUND"',
@@ -251,7 +267,7 @@ update_status() {
251
267
  "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
252
268
  "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
253
269
  "verify_mode": "'"$VERIFY_MODE"'",
254
- "verify_consensus": '"$VERIFY_CONSENSUS"',
270
+ "consensus_mode": "'"$CONSENSUS_MODE"'",
255
271
  "last_result": "'"$last_result"'",
256
272
  "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
257
273
  "verified_us": '"$verified_us_json"''"$consensus_json"',
@@ -351,9 +367,8 @@ write_cost_log() {
351
367
  echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
352
368
  }
353
369
 
354
- # --- Analytics: write per-iteration structured data to campaign.jsonl ---
370
+ # --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
355
371
  write_campaign_jsonl() {
356
- if (( ! DEBUG )) && (( ! WITH_SELF_VERIFICATION )); then return 0; fi
357
372
  local iter="$1"
358
373
  local us_id="${2:-unknown}"
359
374
  local verdict="${3:-unknown}"
@@ -367,6 +382,19 @@ write_campaign_jsonl() {
367
382
  verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
368
383
  fi
369
384
 
385
+ # Build us_fail_history JSON object from associative array
386
+ local us_fail_history_json="{}"
387
+ if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
388
+ us_fail_history_json="{"
389
+ local first=1
390
+ for key in "${(@k)US_FAIL_HISTORY}"; do
391
+ (( first )) || us_fail_history_json+=","
392
+ us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
393
+ first=0
394
+ done
395
+ us_fail_history_json+="}"
396
+ fi
397
+
370
398
  jq -nc \
371
399
  --argjson iter "$iter" \
372
400
  --arg us_id "$us_id" \
@@ -375,13 +403,16 @@ write_campaign_jsonl() {
375
403
  --arg verifier_engine "$VERIFIER_ENGINE" \
376
404
  --arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
377
405
  --arg codex_verdict "${CODEX_VERDICT:-N/A}" \
378
- --argjson consensus "$VERIFY_CONSENSUS" \
406
+ --arg consensus_mode "$CONSENSUS_MODE" \
407
+ --argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
408
+ --argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
409
+ --argjson us_fail_history "$us_fail_history_json" \
379
410
  --argjson duration_worker_s "$worker_duration_s" \
380
411
  --argjson duration_verifier_s "$verifier_duration_s" \
381
412
  --arg project_root "$ROOT" \
382
413
  --arg slug "$SLUG" \
383
414
  --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
384
- '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus: $consensus, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
415
+ '{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
385
416
  >> "$CAMPAIGN_JSONL"
386
417
  }
387
418
 
@@ -431,7 +462,7 @@ ${untracked}"
431
462
  local sv_summary=""
432
463
  if (( WITH_SELF_VERIFICATION )); then
433
464
  local sv_report
434
- sv_report=$(ls -t "$ANALYTICS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
465
+ sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
435
466
  if [[ -n "$sv_report" ]]; then
436
467
  sv_summary="See: $sv_report"
437
468
  else
@@ -460,7 +491,7 @@ ${untracked}"
460
491
  echo "- Elapsed: ${elapsed}s"
461
492
  echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
462
493
  echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
463
- echo "- Consensus: verify_consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE final_consensus=$FINAL_CONSENSUS"
494
+ echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
464
495
  echo ""
465
496
  echo "## US Status"
466
497
  echo "- Verified: ${VERIFIED_US:-none}"
@@ -47,8 +47,9 @@ set -uo pipefail
47
47
  SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
48
48
  ROOT="${ROOT:-$PWD}"
49
49
  MAX_ITER="${MAX_ITER:-20}"
50
- WORKER_MODEL="${WORKER_MODEL:-sonnet}"
51
- VERIFIER_MODEL="${VERIFIER_MODEL:-opus}"
50
+ WORKER_MODEL="${WORKER_MODEL:-haiku}"
51
+ VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
52
+ FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
52
53
  POLL_INTERVAL="${POLL_INTERVAL:-5}"
53
54
  ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
54
55
  HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
@@ -60,6 +61,7 @@ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
60
61
  # --- Engine Selection ---
61
62
  WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
62
63
  VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
64
+ FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}" # claude|codex (derived from FINAL_VERIFIER_MODEL)
63
65
  WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
64
66
  WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
65
67
  VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
@@ -68,13 +70,19 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
68
70
 
69
71
  # --- Verify Mode ---
70
72
  VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
71
- VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
72
- FINAL_CONSENSUS="${FINAL_CONSENSUS:-0}" # 0|1 — consensus for final ALL verify only (independent of VERIFY_CONSENSUS)
73
- CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
74
- CONSENSUS_FAIL_FAST="${CONSENSUS_FAIL_FAST:-0}" # 0|1 — skip second verifier if first fails
75
- CB_THRESHOLD="${CB_THRESHOLD:-3}" # consecutive failures before BLOCKED (default: 3)
76
- # Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
73
+ # Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
74
+ CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
75
+ CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.4:medium}" # per-US cross-verifier (lighter)
76
+ FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.4:high}" # final cross-verifier (stricter)
77
+ # Legacy compat: map old flags to CONSENSUS_MODE
77
78
  if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
79
+ CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
80
+ elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
81
+ CONSENSUS_MODE="final-only"
82
+ fi
83
+ CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
84
+ # Effective CB threshold: doubled when consensus mode active
85
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
78
86
  EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
79
87
  else
80
88
  EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
@@ -120,6 +128,7 @@ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
120
128
  typeset -A LAST_PANE_CONTENT
121
129
  typeset -A PANE_IDLE_SINCE
122
130
  typeset -A WORKER_RESTARTS
131
+ typeset -A US_FAIL_HISTORY
123
132
  STALE_CONTEXT_COUNT=0
124
133
  HEARTBEAT_STALE_COUNT=0
125
134
  MONITOR_FAILURE_COUNT=0
@@ -192,7 +201,7 @@ launch_worker_codex() {
192
201
 
193
202
  log " Launching Worker codex via trigger script in pane $pane_id..."
194
203
  paste_to_pane "$pane_id" "bash $trigger_file"
195
- tmux send-keys -t "$pane_id" Enter
204
+ tmux send-keys -t "$pane_id" C-m
196
205
  log_debug "Worker codex trigger sent: $trigger_file"
197
206
  sleep 3 # brief wait for codex to start
198
207
  return 0
@@ -211,7 +220,7 @@ launch_worker_claude() {
211
220
 
212
221
  log " Launching Worker claude in pane $pane_id..."
213
222
  paste_to_pane "$pane_id" "$worker_launch"
214
- tmux send-keys -t "$pane_id" Enter
223
+ tmux send-keys -t "$pane_id" C-m
215
224
 
216
225
  # Wait for claude TUI to be ready
217
226
  if ! wait_for_pane_ready "$pane_id" 30; then
@@ -223,7 +232,7 @@ launch_worker_claude() {
223
232
  sleep 3
224
233
  local worker_instruction="Read and execute the instructions in $prompt_file"
225
234
  paste_to_pane "$pane_id" "$worker_instruction"
226
- tmux send-keys -t "$pane_id" Enter
235
+ tmux send-keys -t "$pane_id" C-m
227
236
  log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
228
237
 
229
238
  # 15-iteration submit loop — verify claude started working
@@ -244,7 +253,7 @@ launch_worker_claude() {
244
253
  sleep 0.2
245
254
  paste_to_pane "$pane_id" "$worker_instruction"
246
255
  sleep 0.15
247
- tmux send-keys -t "$pane_id" Enter
256
+ tmux send-keys -t "$pane_id" C-m
248
257
  sleep 1
249
258
  fi
250
259
  tmux send-keys -t "$pane_id" C-m 2>/dev/null
@@ -259,15 +268,15 @@ launch_worker_claude() {
259
268
  log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
260
269
  tmux send-keys -t "$pane_id" C-c 2>/dev/null
261
270
  sleep 0.5
262
- tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
271
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
263
272
  sleep 2
264
273
  wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
265
274
  paste_to_pane "$pane_id" "$worker_launch"
266
- tmux send-keys -t "$pane_id" Enter
275
+ tmux send-keys -t "$pane_id" C-m
267
276
  if wait_for_pane_ready "$pane_id" 30; then
268
277
  sleep 3
269
278
  paste_to_pane "$pane_id" "$worker_instruction"
270
- tmux send-keys -t "$pane_id" Enter
279
+ tmux send-keys -t "$pane_id" C-m
271
280
  log " Worker restarted and instruction re-sent"
272
281
  log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
273
282
  else
@@ -290,7 +299,7 @@ launch_verifier_codex() {
290
299
 
291
300
  log " Launching Verifier codex in pane $pane_id..."
292
301
  paste_to_pane "$pane_id" "$verifier_launch"
293
- tmux send-keys -t "$pane_id" Enter
302
+ tmux send-keys -t "$pane_id" C-m
294
303
  sleep 3
295
304
  return 0
296
305
  }
@@ -306,7 +315,7 @@ launch_verifier_claude() {
306
315
 
307
316
  log " Launching Verifier claude in pane $pane_id..."
308
317
  paste_to_pane "$pane_id" "$verifier_launch"
309
- tmux send-keys -t "$pane_id" Enter
318
+ tmux send-keys -t "$pane_id" C-m
310
319
 
311
320
  if ! wait_for_pane_ready "$pane_id" 30; then
312
321
  log_error "Verifier failed to start"
@@ -316,7 +325,7 @@ launch_verifier_claude() {
316
325
  sleep 3
317
326
  local verifier_instruction="Read and execute the instructions in $prompt_file"
318
327
  paste_to_pane "$pane_id" "$verifier_instruction"
319
- tmux send-keys -t "$pane_id" Enter
328
+ tmux send-keys -t "$pane_id" C-m
320
329
  log_debug "Verifier instruction sent directly"
321
330
 
322
331
  # Submit loop — verify verifier started working
@@ -334,7 +343,7 @@ launch_verifier_claude() {
334
343
  tmux send-keys -t "$pane_id" C-u 2>/dev/null
335
344
  sleep 0.1
336
345
  paste_to_pane "$pane_id" "$verifier_instruction"
337
- tmux send-keys -t "$pane_id" Enter
346
+ tmux send-keys -t "$pane_id" C-m
338
347
  fi
339
348
  tmux send-keys -t "$pane_id" C-m 2>/dev/null
340
349
  sleep 0.3
@@ -455,7 +464,7 @@ check_dependencies() {
455
464
  fi
456
465
 
457
466
  # Codex binary required only when engine=codex or consensus verification is enabled
458
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
467
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
459
468
  if ! command -v codex >/dev/null 2>&1; then
460
469
  log_error "codex CLI not found. Install: npm install -g @openai/codex"
461
470
  missing=1
@@ -473,7 +482,7 @@ check_dependencies() {
473
482
  fi
474
483
 
475
484
  # Resolve codex binary if needed
476
- if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" || "$FINAL_CONSENSUS" = "1" ]]; then
485
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
477
486
  CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
478
487
  log " Codex binary: $CODEX_BIN"
479
488
  fi
@@ -531,7 +540,7 @@ create_session() {
531
540
  # Set pane titles and enable border labels for visual distinction
532
541
  local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
533
542
  local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
534
- [[ "$VERIFY_CONSENSUS" = "1" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + codex:$VERIFIER_CODEX_MODEL)"
543
+ [[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
535
544
  tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
536
545
  tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
537
546
  tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
@@ -585,8 +594,7 @@ create_session() {
585
594
  },
586
595
  "verification": {
587
596
  "verify_mode": "'"$VERIFY_MODE"'",
588
- "verify_consensus": '"$VERIFY_CONSENSUS"',
589
- "consensus_scope": "'"$CONSENSUS_SCOPE"'"
597
+ "consensus_mode": "'"$CONSENSUS_MODE"'"
590
598
  },
591
599
  "config": {
592
600
  "max_iter": '"$MAX_ITER"',
@@ -663,13 +671,13 @@ safe_send_keys() {
663
671
  # Auto-approve permission prompts ("Do you want to create/overwrite X?")
664
672
  if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
665
673
  log_debug " Permission prompt detected, auto-approving"
666
- tmux send-keys -t "$pane_id" Enter
674
+ tmux send-keys -t "$pane_id" C-m
667
675
  sleep 0.3
668
676
  fi
669
677
  # Auto-dismiss codex update prompt (select Skip)
670
678
  if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
671
679
  log_debug " Codex update prompt detected, selecting Skip"
672
- tmux send-keys -t "$pane_id" "2" Enter
680
+ tmux send-keys -t "$pane_id" "2" C-m
673
681
  sleep 0.2
674
682
  fi
675
683
  # Send text via buffer paste (reliable for long strings)
@@ -761,9 +769,9 @@ wait_for_pane_ready() {
761
769
  # Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
762
770
  if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
763
771
  log " Trust prompt detected, auto-dismissing..."
764
- tmux send-keys -t "$pane_id" Enter
772
+ tmux send-keys -t "$pane_id" C-m
765
773
  sleep 0.12
766
- tmux send-keys -t "$pane_id" Enter
774
+ tmux send-keys -t "$pane_id" C-m
767
775
  sleep 2
768
776
  continue
769
777
  fi
@@ -771,7 +779,7 @@ wait_for_pane_ready() {
771
779
  # Auto-approve permission prompts ("Do you want to create/overwrite X?")
772
780
  if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
773
781
  log " Permission prompt detected, auto-approving..."
774
- tmux send-keys -t "$pane_id" Enter
782
+ tmux send-keys -t "$pane_id" C-m
775
783
  sleep 0.5
776
784
  continue
777
785
  fi
@@ -779,7 +787,7 @@ wait_for_pane_ready() {
779
787
  # Auto-dismiss codex update prompt (select Skip = option 2)
780
788
  if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
781
789
  log " Codex update prompt detected, selecting Skip..."
782
- tmux send-keys -t "$pane_id" "2" Enter
790
+ tmux send-keys -t "$pane_id" "2" C-m
783
791
  sleep 0.5
784
792
  continue
785
793
  fi
@@ -917,7 +925,7 @@ restart_worker() {
917
925
 
918
926
  # Kill existing claude, wait for shell prompt
919
927
  tmux send-keys -t "$pane_id" C-c 2>/dev/null
920
- tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
928
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
921
929
  sleep 2
922
930
 
923
931
  # Re-launch worker (tmux interactive pattern)
@@ -1205,11 +1213,11 @@ cleanup() {
1205
1213
  log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
1206
1214
  if [[ -n "${WORKER_PANE:-}" ]]; then
1207
1215
  tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1208
- tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
1216
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
1209
1217
  fi
1210
1218
  if [[ -n "${VERIFIER_PANE:-}" ]]; then
1211
1219
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1212
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1220
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1213
1221
  fi
1214
1222
  sleep 2
1215
1223
  # Kill panes on completion
@@ -1284,11 +1292,11 @@ cleanup() {
1284
1292
  fi
1285
1293
 
1286
1294
  # 3. Consensus: were both engines used?
1287
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1295
+ if [[ "$CONSENSUS_MODE" != "off" ]]; then
1288
1296
  if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
1289
- log_debug "[FLOW] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1297
+ log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1290
1298
  else
1291
- log_debug "[FLOW] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1299
+ log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1292
1300
  fi
1293
1301
  fi
1294
1302
 
@@ -1410,7 +1418,7 @@ poll_for_signal() {
1410
1418
  log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
1411
1419
  log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
1412
1420
  tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
1413
- tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null; sleep 2
1421
+ tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
1414
1422
  wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
1415
1423
  fi
1416
1424
  sleep "$_API_RETRY_INTERVAL_S"
@@ -1487,7 +1495,7 @@ poll_for_signal() {
1487
1495
  if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1488
1496
  log " Permission prompt detected during poll, auto-approving..."
1489
1497
  log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
1490
- tmux send-keys -t "$pane_id" Enter
1498
+ tmux send-keys -t "$pane_id" C-m
1491
1499
  sleep 0.5
1492
1500
  fi
1493
1501
 
@@ -1529,12 +1537,12 @@ run_single_verifier() {
1529
1537
  log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
1530
1538
  tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
1531
1539
  sleep 0.2
1532
- tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
1540
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
1533
1541
  sleep 0.3
1534
1542
  elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1535
1543
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1536
1544
  sleep 0.5
1537
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1545
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
1538
1546
  sleep 2
1539
1547
  fi
1540
1548
  # Always ensure clean shell state before launching new verifier
@@ -1628,7 +1636,7 @@ run_sequential_final_verify() {
1628
1636
  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1629
1637
  if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1630
1638
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
1631
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null; sleep 2
1639
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
1632
1640
  fi
1633
1641
  wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1634
1642
 
@@ -1689,20 +1697,14 @@ run_sequential_final_verify() {
1689
1697
 
1690
1698
  # --- US-005: Determine whether consensus verification should run for this signal ---
1691
1699
  # Returns 0 (use consensus) or 1 (single engine).
1692
- # VERIFY_CONSENSUS + CONSENSUS_SCOPE handles per-US consensus.
1693
- # FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
1700
+ # Uses unified CONSENSUS_MODE: off|all|final-only
1694
1701
  _should_use_consensus() {
1695
1702
  local signal_us_id="${1:-}"
1696
- if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1697
- case "$CONSENSUS_SCOPE" in
1698
- all) return 0 ;;
1699
- final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
1700
- esac
1701
- fi
1702
- if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
1703
- return 0
1704
- fi
1705
- return 1
1703
+ case "$CONSENSUS_MODE" in
1704
+ all) return 0 ;;
1705
+ final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
1706
+ off|*) return 1 ;;
1707
+ esac
1706
1708
  }
1707
1709
 
1708
1710
  # --- US-004: Run consensus verification (claude + codex sequentially) ---
@@ -1744,13 +1746,7 @@ run_consensus_verification() {
1744
1746
  fi
1745
1747
  log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1746
1748
 
1747
- # F8: --consensus-fail-fast skip second verifier if first fails
1748
- if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
1749
- log " Consensus fail-fast: claude=fail, skipping codex verifier"
1750
- log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
1751
- CODEX_VERDICT="skipped"
1752
- return 2 # disagreement/fail signal
1753
- fi
1749
+ # consensus-fail-fast removed (complexity vs value too low)
1754
1750
 
1755
1751
  # Run codex verifier second
1756
1752
  local _codex_t0=$(date +%s)
@@ -1887,12 +1883,10 @@ main() {
1887
1883
  trap cleanup EXIT INT TERM
1888
1884
  mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
1889
1885
 
1890
- # --- Analytics directory: create only when --debug or --with-self-verification ---
1891
- if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1892
- mkdir -p "$ANALYTICS_DIR" 2>/dev/null
1893
- fi
1886
+ # --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
1887
+ mkdir -p "$ANALYTICS_DIR" 2>/dev/null
1894
1888
 
1895
- # --- debug.log versioning (in analytics dir) ---
1889
+ # --- debug.log versioning (in analytics dir, --debug only) ---
1896
1890
  if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
1897
1891
  local dbg_n=1
1898
1892
  while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
@@ -1901,33 +1895,30 @@ main() {
1901
1895
  mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
1902
1896
  fi
1903
1897
 
1904
- # --- campaign.jsonl versioning (in analytics dir, after mkdir) ---
1905
- if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1906
- if [[ -f "$CAMPAIGN_JSONL" ]]; then
1907
- local cj_n=1
1908
- while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
1909
- (( cj_n++ ))
1910
- done
1911
- mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
1912
- fi
1898
+ # --- campaign.jsonl versioning (always-on) ---
1899
+ if [[ -f "$CAMPAIGN_JSONL" ]]; then
1900
+ local cj_n=1
1901
+ while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
1902
+ (( cj_n++ ))
1903
+ done
1904
+ mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
1913
1905
  fi
1914
1906
 
1915
- # --- metadata.json: write at campaign start ---
1916
- if (( DEBUG )) || (( WITH_SELF_VERIFICATION )); then
1917
- jq -n \
1918
- --arg slug "$SLUG" \
1919
- --arg project_root "$ROOT" \
1920
- --arg campaign_status "running" \
1921
- --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1922
- --arg end_time "" \
1923
- --arg worker_model "$WORKER_MODEL" \
1924
- --arg verifier_model "$VERIFIER_MODEL" \
1925
- --argjson debug "$DEBUG" \
1926
- --argjson with_sv "$WITH_SELF_VERIFICATION" \
1927
- --argjson consensus "$VERIFY_CONSENSUS" \
1928
- '{slug: $slug, project_root: $project_root, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
1929
- > "$METADATA_FILE"
1930
- fi
1907
+ # --- metadata.json: always write at campaign start (cross-project identification) ---
1908
+ jq -n \
1909
+ --arg slug "$SLUG" \
1910
+ --arg project_root "$ROOT" \
1911
+ --arg project_name "$(basename "$ROOT")" \
1912
+ --arg campaign_status "running" \
1913
+ --arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1914
+ --arg end_time "" \
1915
+ --arg worker_model "$WORKER_MODEL" \
1916
+ --arg verifier_model "$VERIFIER_MODEL" \
1917
+ --argjson debug "$DEBUG" \
1918
+ --argjson with_sv "$WITH_SELF_VERIFICATION" \
1919
+ --argjson consensus "$VERIFY_CONSENSUS" \
1920
+ '{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
1921
+ > "$METADATA_FILE"
1931
1922
 
1932
1923
  # --- Startup ---
1933
1924
  log "Ralph Desk Tmux Runner starting..."
@@ -1935,11 +1926,10 @@ main() {
1935
1926
  log " Root: $ROOT"
1936
1927
  log " Max iterations: $MAX_ITER"
1937
1928
  log " Worker model: $WORKER_MODEL"
1938
- log " Verifier model: $VERIFIER_MODEL"
1929
+ log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
1939
1930
  log " Verify mode: $VERIFY_MODE"
1940
- log " Verify consensus:$VERIFY_CONSENSUS"
1941
- log " Final consensus: $FINAL_CONSENSUS"
1942
- log " Consensus scope: $CONSENSUS_SCOPE"
1931
+ log " Consensus mode: $CONSENSUS_MODE"
1932
+ log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
1943
1933
  log " Poll interval: ${POLL_INTERVAL}s"
1944
1934
  log " Iter timeout: ${ITER_TIMEOUT}s"
1945
1935
  # --- Debug: Log execution plan ---
@@ -1955,7 +1945,7 @@ main() {
1955
1945
  log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
1956
1946
  log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
1957
1947
  log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
1958
- log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
1948
+ log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
1959
1949
  log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
1960
1950
 
1961
1951
  if [[ "$VERIFY_MODE" = "per-us" ]]; then
@@ -2065,7 +2055,7 @@ main() {
2065
2055
  # Send C-c first (in case claude is mid-task), then /exit
2066
2056
  tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
2067
2057
  sleep 1
2068
- tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
2058
+ tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
2069
2059
  sleep 2
2070
2060
  # Wait for shell prompt before proceeding
2071
2061
  wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
@@ -2222,7 +2212,7 @@ main() {
2222
2212
  fi
2223
2213
  fi
2224
2214
 
2225
- # --- Consensus scope check (US-005: _should_use_consensus handles VERIFY_CONSENSUS + FINAL_CONSENSUS) ---
2215
+ # --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
2226
2216
  local use_consensus=0
2227
2217
  _should_use_consensus "$signal_us_id" && use_consensus=1
2228
2218
 
@@ -2261,12 +2251,12 @@ main() {
2261
2251
  log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
2262
2252
  tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
2263
2253
  sleep 0.2
2264
- tmux send-keys -t "$VERIFIER_PANE" "clear" Enter 2>/dev/null
2254
+ tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
2265
2255
  sleep 0.3
2266
2256
  elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
2267
2257
  tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
2268
2258
  sleep 0.5
2269
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
2259
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
2270
2260
  sleep 2
2271
2261
  fi
2272
2262
  wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
@@ -2362,7 +2352,14 @@ main() {
2362
2352
  fail)
2363
2353
  # --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
2364
2354
  (( CONSECUTIVE_FAILURES++ ))
2355
+ record_us_failure "${signal_us_id:-unknown}"
2365
2356
  check_model_upgrade "${signal_us_id:-unknown}"
2357
+
2358
+ # Mid-CB warning: alert at halfway point (governance §8 early warning)
2359
+ if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
2360
+ log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
2361
+ log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
2362
+ fi
2366
2363
  local verdict_summary_fail
2367
2364
  verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
2368
2365
  log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
@@ -2498,8 +2495,36 @@ while (( _cli_i <= $# )); do
2498
2495
  --lock-worker-model)
2499
2496
  LOCK_WORKER_MODEL=1
2500
2497
  ;;
2498
+ --final-verifier-model)
2499
+ (( _cli_i++ ))
2500
+ _cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
2501
+ FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
2502
+ _cli_rest="${_cli_parsed#* }"
2503
+ FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
2504
+ if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
2505
+ FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
2506
+ FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
2507
+ fi
2508
+ ;;
2509
+ --consensus)
2510
+ (( _cli_i++ ))
2511
+ CONSENSUS_MODE="${@[$_cli_i]:-off}"
2512
+ ;;
2513
+ --consensus-model)
2514
+ (( _cli_i++ ))
2515
+ CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:medium}"
2516
+ ;;
2517
+ --final-consensus-model)
2518
+ (( _cli_i++ ))
2519
+ FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:high}"
2520
+ ;;
2501
2521
  --final-consensus)
2502
- FINAL_CONSENSUS=1
2522
+ # Legacy: map to new --consensus final-only
2523
+ CONSENSUS_MODE="final-only"
2524
+ ;;
2525
+ --verify-consensus)
2526
+ # Legacy: map to new --consensus all
2527
+ CONSENSUS_MODE="all"
2503
2528
  ;;
2504
2529
  esac
2505
2530
  (( _cli_i++ ))