@ai-dev-methodologies/rlp-desk 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ set -uo pipefail
30
30
  # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
31
  #
32
32
  # Dependencies: tmux, claude CLI, jq
33
+ # Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
33
34
  # =============================================================================
34
35
 
35
36
  # --- Environment Variables ---
@@ -45,6 +46,17 @@ MAX_RESTARTS="${MAX_RESTARTS:-3}"
45
46
  IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
46
47
  MAX_NUDGES="${MAX_NUDGES:-3}"
47
48
 
49
+ # --- Engine Selection ---
50
+ WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
51
+ VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
52
+ CODEX_MODEL="${CODEX_MODEL:-gpt-5.4}"
53
+ CODEX_REASONING="${CODEX_REASONING:-high}" # low|medium|high
54
+ CODEX_BIN="" # resolved by check_dependencies when engine=codex
55
+
56
+ # --- Verify Mode ---
57
+ VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
58
+ VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
59
+
48
60
  # --- Derived Paths ---
49
61
  DESK="$ROOT/.claude/ralph-desk"
50
62
  PROMPTS_DIR="$DESK/prompts"
@@ -80,6 +92,9 @@ CONSECUTIVE_FAILURES=0
80
92
  PREV_CONTEXT_HASH=""
81
93
  ITERATION=0
82
94
  START_TIME=$(date +%s)
95
+ VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
96
+ CONSENSUS_ROUND=0 # current consensus round for current US
97
+ US_LIST="" # comma-separated US IDs from PRD (per-us mode)
83
98
 
84
99
  # =============================================================================
85
100
  # Utility Functions
@@ -135,6 +150,19 @@ check_dependencies() {
135
150
  missing=1
136
151
  fi
137
152
 
153
+ # Codex binary required only when engine=codex or consensus verification is enabled
154
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
155
+ if ! command -v codex >/dev/null 2>&1; then
156
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
157
+ log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
158
+ else
159
+ log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
160
+ fi
161
+ log_error "Install with: npm install -g @openai/codex"
162
+ missing=1
163
+ fi
164
+ fi
165
+
138
166
  if (( missing )); then
139
167
  exit 1
140
168
  fi
@@ -142,6 +170,12 @@ check_dependencies() {
142
170
  # Resolve full path to claude binary for reliable launches
143
171
  CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
144
172
  log " Claude binary: $CLAUDE_BIN"
173
+
174
+ # Resolve codex binary if needed
175
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
176
+ CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
177
+ log " Codex binary: $CODEX_BIN"
178
+ fi
145
179
  }
146
180
 
147
181
  # =============================================================================
@@ -305,10 +339,19 @@ safe_send_keys() {
305
339
  log_debug " Trust prompt detected, dismissing"
306
340
  tmux send-keys -t "$pane_id" C-m
307
341
  sleep 0.12
308
- tmux send-keys -t "$pane_id" C-m
342
+ fi
343
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
344
+ if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
345
+ log_debug " Permission prompt detected, auto-approving"
346
+ tmux send-keys -t "$pane_id" Enter
347
+ sleep 0.3
348
+ fi
349
+ # Auto-dismiss codex update prompt (select Skip)
350
+ if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
351
+ log_debug " Codex update prompt detected, selecting Skip"
352
+ tmux send-keys -t "$pane_id" "2" Enter
309
353
  sleep 0.2
310
354
  fi
311
-
312
355
  # Send text in literal mode with -- separator
313
356
  log_debug " Sending text to pane $pane_id (${#text} chars)"
314
357
  tmux send-keys -t "$pane_id" -l -- "$text"
@@ -407,6 +450,22 @@ wait_for_pane_ready() {
407
450
  continue
408
451
  fi
409
452
 
453
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
454
+ if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
455
+ log " Permission prompt detected, auto-approving..."
456
+ tmux send-keys -t "$pane_id" Enter
457
+ sleep 0.5
458
+ continue
459
+ fi
460
+
461
+ # Auto-dismiss codex update prompt (select Skip = option 2)
462
+ if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
463
+ log " Codex update prompt detected, selecting Skip..."
464
+ tmux send-keys -t "$pane_id" "2" Enter
465
+ sleep 0.5
466
+ continue
467
+ fi
468
+
410
469
  # tmux paneLooksReady: check each line for prompt char at line start
411
470
  local ready=0
412
471
  echo "$captured" | while IFS= read -r line; do
@@ -529,8 +588,12 @@ restart_worker() {
529
588
  tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
530
589
  sleep 2
531
590
 
532
- # Re-launch claude (tmux interactive pattern)
533
- safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
591
+ # Re-launch worker (tmux interactive pattern)
592
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
593
+ safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
594
+ else
595
+ safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
596
+ fi
534
597
  WORKER_RESTARTS[$iter]=$((restart_count + 1))
535
598
  return 0
536
599
  }
@@ -575,9 +638,64 @@ write_worker_trigger() {
575
638
  echo ""
576
639
  cat "$fix_contract_file"
577
640
  fi
641
+
642
+ # Per-US mode: tell Worker exactly which US to work on
643
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
644
+ # Find next unverified US
645
+ local next_us=""
646
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
647
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
648
+ next_us="$us"
649
+ break
650
+ fi
651
+ done
652
+
653
+ if [[ -n "$next_us" ]]; then
654
+ echo ""
655
+ echo "---"
656
+ echo "## PER-US SCOPE LOCK (this iteration)"
657
+ echo "You MUST implement ONLY **${next_us}** in this iteration."
658
+ echo "Do NOT implement any other user stories."
659
+ echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
660
+ echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
661
+ elif [[ -n "$VERIFIED_US" ]]; then
662
+ # All individual US verified — this is the final full verify iteration
663
+ echo ""
664
+ echo "---"
665
+ echo "## FINAL VERIFICATION ITERATION"
666
+ echo "All individual US have been verified: $VERIFIED_US"
667
+ echo "Run all tests and verification commands to confirm everything works together."
668
+ echo "Signal verify with us_id=\"ALL\" for the final full verification."
669
+ fi
670
+ elif [[ "$VERIFY_MODE" = "batch" ]]; then
671
+ echo ""
672
+ echo "---"
673
+ echo "## BATCH MODE OVERRIDE"
674
+ echo "Ignore any per-US signal instructions above. In batch mode:"
675
+ echo "- Implement ALL user stories in this iteration"
676
+ echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
677
+ echo "- Do NOT signal verify after individual stories"
678
+ fi
578
679
  } | atomic_write "$prompt_file"
579
680
 
580
681
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
682
+ # Engine-specific launch command (expanded at write time)
683
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
684
+ local engine_cmd="${CODEX_BIN:-codex} -m $CODEX_MODEL \\
685
+ -c model_reasoning_effort=\"$CODEX_REASONING\" \\
686
+ --dangerously-bypass-approvals-and-sandbox \\
687
+ \"\$(cat $prompt_file)\" \\
688
+ 2>&1 | tee $output_log"
689
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
690
+ else
691
+ local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
692
+ --model $WORKER_MODEL \\
693
+ --dangerously-skip-permissions \\
694
+ --output-format text \\
695
+ 2>&1 | tee $output_log"
696
+ local engine_comment="# Run claude with fresh context (governance.md s7 step 5)"
697
+ fi
698
+
581
699
  {
582
700
  cat <<TRIGGER_EOF
583
701
  #!/bin/zsh
@@ -596,12 +714,8 @@ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
596
714
  ) &
597
715
  HEARTBEAT_PID=\$!
598
716
 
599
- # Run claude with fresh context (governance.md s7 step 5)
600
- claude -p "\$(cat $prompt_file)" \\
601
- --model $WORKER_MODEL \\
602
- --dangerously-skip-permissions \\
603
- --output-format text \\
604
- 2>&1 | tee $output_log
717
+ $engine_comment
718
+ $engine_cmd
605
719
 
606
720
  # Cleanup heartbeat writer
607
721
  kill \$HEARTBEAT_PID 2>/dev/null
@@ -618,11 +732,20 @@ TRIGGER_EOF
618
732
 
619
733
  write_verifier_trigger() {
620
734
  local iter="$1"
621
- local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
622
- local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-trigger.sh"
623
- local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-output.log"
735
+ local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
736
+ local verifier_model="${3:-$VERIFIER_MODEL}"
737
+ local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
738
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
739
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
740
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
741
+
742
+ # Read us_id from iter-signal.json for per-US scoping
743
+ local us_id=""
744
+ if [[ -f "$SIGNAL_FILE" ]]; then
745
+ us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
746
+ fi
624
747
 
625
- # Build verifier prompt from base
748
+ # Build verifier prompt from base with US scope
626
749
  {
627
750
  cat "$VERIFIER_PROMPT_BASE"
628
751
  echo ""
@@ -630,13 +753,40 @@ write_verifier_trigger() {
630
753
  echo "## Verification Context"
631
754
  echo "- **Iteration**: $iter"
632
755
  echo "- **Done Claim**: $DONE_CLAIM_FILE"
756
+ echo "- **Verify Mode**: $VERIFY_MODE"
757
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$us_id" ]]; then
758
+ if [[ "$us_id" = "ALL" ]]; then
759
+ echo "- **Scope**: FINAL FULL VERIFY — check ALL acceptance criteria from the PRD"
760
+ echo "- **Previously verified US**: $VERIFIED_US"
761
+ else
762
+ echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
763
+ echo "- **Previously verified US**: $VERIFIED_US"
764
+ fi
765
+ fi
633
766
  } | atomic_write "$prompt_file"
634
767
 
635
768
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
769
+ # Engine-specific launch command (expanded at write time)
770
+ if [[ "$verifier_engine" = "codex" ]]; then
771
+ local engine_cmd="${CODEX_BIN:-codex} -m $CODEX_MODEL \\
772
+ -c model_reasoning_effort=\"$CODEX_REASONING\" \\
773
+ --dangerously-bypass-approvals-and-sandbox \\
774
+ \"\$(cat $prompt_file)\" \\
775
+ 2>&1 | tee $output_log"
776
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 7)"
777
+ else
778
+ local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
779
+ --model $verifier_model \\
780
+ --dangerously-skip-permissions \\
781
+ --output-format text \\
782
+ 2>&1 | tee $output_log"
783
+ local engine_comment="# Run claude with fresh context (governance.md s7 step 7)"
784
+ fi
785
+
636
786
  {
637
787
  cat <<TRIGGER_EOF
638
788
  #!/bin/zsh
639
- # Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
789
+ # Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
640
790
  # DO NOT use exec here -- it breaks heartbeat cleanup
641
791
 
642
792
  HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
@@ -651,12 +801,8 @@ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
651
801
  ) &
652
802
  HEARTBEAT_PID=\$!
653
803
 
654
- # Run claude with fresh context (governance.md s7 step 7)
655
- claude -p "\$(cat $prompt_file)" \\
656
- --model $VERIFIER_MODEL \\
657
- --dangerously-skip-permissions \\
658
- --output-format text \\
659
- 2>&1 | tee $output_log
804
+ $engine_comment
805
+ $engine_cmd
660
806
 
661
807
  # Cleanup heartbeat writer
662
808
  kill \$HEARTBEAT_PID 2>/dev/null
@@ -680,6 +826,21 @@ update_status() {
680
826
  local phase="$1"
681
827
  local last_result="$2"
682
828
 
829
+ # Build verified_us as JSON array
830
+ local verified_us_json="[]"
831
+ if [[ -n "$VERIFIED_US" ]]; then
832
+ verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
833
+ fi
834
+
835
+ # Build consensus fields
836
+ local consensus_json=""
837
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
838
+ consensus_json=',
839
+ "consensus_round": '"$CONSENSUS_ROUND"',
840
+ "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
841
+ "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
842
+ fi
843
+
683
844
  echo '{
684
845
  "slug": "'"$SLUG"'",
685
846
  "iteration": '"$ITERATION"',
@@ -687,8 +848,13 @@ update_status() {
687
848
  "phase": "'"$phase"'",
688
849
  "worker_model": "'"$WORKER_MODEL"'",
689
850
  "verifier_model": "'"$VERIFIER_MODEL"'",
851
+ "worker_engine": "'"$WORKER_ENGINE"'",
852
+ "verifier_engine": "'"$VERIFIER_ENGINE"'",
853
+ "verify_mode": "'"$VERIFY_MODE"'",
854
+ "verify_consensus": '"$VERIFY_CONSENSUS"',
690
855
  "last_result": "'"$last_result"'",
691
856
  "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
857
+ "verified_us": '"$verified_us_json"''"$consensus_json"',
692
858
  "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
693
859
  }' | atomic_write "$STATUS_FILE"
694
860
  }
@@ -753,6 +919,9 @@ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
753
919
  cleanup() {
754
920
  log "Cleaning up..."
755
921
 
922
+ # Remove lockfile
923
+ rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
924
+
756
925
  # Kill claude processes then kill panes
757
926
  log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
758
927
  if [[ -n "${WORKER_PANE:-}" ]]; then
@@ -764,10 +933,14 @@ cleanup() {
764
933
  tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
765
934
  fi
766
935
  sleep 2
767
- # Kill the panes themselves
768
- log_debug "cleanup: killing panes $WORKER_PANE $VERIFIER_PANE"
769
- tmux kill-pane -t "$WORKER_PANE" 2>&1 | while read -r line; do log_debug "kill worker: $line"; done
770
- tmux kill-pane -t "$VERIFIER_PANE" 2>&1 | while read -r line; do log_debug "kill verifier: $line"; done
936
+ # Kill panes on completion
937
+ if [[ -n "${WORKER_PANE:-}" ]]; then
938
+ tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
939
+ fi
940
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
941
+ tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
942
+ fi
943
+ log " Panes cleaned up."
771
944
 
772
945
  # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
773
946
  setopt local_options nonomatch 2>/dev/null
@@ -780,6 +953,70 @@ cleanup() {
780
953
  local minutes=$(( elapsed / 60 ))
781
954
  local seconds=$(( elapsed % 60 ))
782
955
 
956
+ local final_status="UNKNOWN"
957
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
958
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
959
+ else final_status="TIMEOUT"; fi
960
+
961
+ if (( DEBUG )); then
962
+ local end_ts=$(date +%s)
963
+ local elapsed=$((end_ts - START_TIME))
964
+
965
+ log_debug "[EXEC] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
966
+
967
+ # --- Validation ---
968
+ log_debug "[VALIDATE] === Execution Validation ==="
969
+
970
+ # 1. Did the correct verify mode run?
971
+ log_debug "[VALIDATE] verify_mode=$VERIFY_MODE configured=true"
972
+
973
+ # 2. Per-US: were all US individually verified?
974
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
975
+ local prd_file="$DESK/plans/prd-$SLUG.md"
976
+ local expected_us=""
977
+ if [[ -f "$prd_file" ]]; then
978
+ expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
979
+ fi
980
+ local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
981
+ local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
982
+
983
+ if [[ "$final_status" = "COMPLETE" ]]; then
984
+ if (( verified_count >= expected_count )); then
985
+ log_debug "[VALIDATE] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
986
+ else
987
+ log_debug "[VALIDATE] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
988
+ fi
989
+ else
990
+ log_debug "[VALIDATE] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
991
+ fi
992
+ fi
993
+
994
+ # 3. Consensus: were both engines used?
995
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
996
+ if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
997
+ log_debug "[VALIDATE] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
998
+ else
999
+ log_debug "[VALIDATE] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1000
+ fi
1001
+ fi
1002
+
1003
+ # 4. Engine match: did the configured engines actually run?
1004
+ local worker_dispatches=$(grep -c '\[EXEC\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1005
+ local verifier_dispatches=$(grep -c '\[EXEC\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1006
+ log_debug "[VALIDATE] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
1007
+
1008
+ # 5. Fix loops: how many fix contracts were generated?
1009
+ local fix_count=$(grep -c '\[EXEC\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
1010
+ log_debug "[VALIDATE] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
1011
+
1012
+ # 6. Circuit breakers: any triggered?
1013
+ local cb_count=$(grep -c '\[EXEC\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
1014
+ log_debug "[VALIDATE] circuit_breakers_triggered=$cb_count"
1015
+
1016
+ # 7. Overall result
1017
+ log_debug "[VALIDATE] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
1018
+ fi
1019
+
783
1020
  echo ""
784
1021
  echo "============================================================"
785
1022
  echo " Ralph Desk Tmux Runner - Session Complete"
@@ -870,6 +1107,7 @@ poll_for_signal() {
870
1107
  (( HEARTBEAT_STALE_COUNT++ ))
871
1108
  # Circuit breaker: 3 consecutive heartbeat stale events
872
1109
  if (( HEARTBEAT_STALE_COUNT >= 3 )); then
1110
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
873
1111
  log_error "Circuit breaker: 3 consecutive heartbeat stale events"
874
1112
  return 1
875
1113
  fi
@@ -887,6 +1125,16 @@ poll_for_signal() {
887
1125
  fi
888
1126
  fi
889
1127
 
1128
+ # Auto-approve permission prompts during poll
1129
+ local poll_capture
1130
+ poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
1131
+ if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1132
+ log " Permission prompt detected during poll, auto-approving..."
1133
+ log_debug "[EXEC] iter=$ITERATION permission_prompt_auto_approved=true"
1134
+ tmux send-keys -t "$pane_id" Enter
1135
+ sleep 0.5
1136
+ fi
1137
+
890
1138
  # Idle pane nudging (tmux pattern)
891
1139
  check_and_nudge_idle_pane "$pane_id" "nudge_count"
892
1140
 
@@ -926,6 +1174,218 @@ check_stale_context() {
926
1174
  return 0
927
1175
  }
928
1176
 
1177
+ # =============================================================================
1178
+ # Consensus Verification (run two verifiers sequentially in same pane)
1179
+ # =============================================================================
1180
+
1181
+ # --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
1182
+ run_single_verifier() {
1183
+ local iter="$1"
1184
+ local engine="$2" # claude|codex
1185
+ local model="$3" # model for this verifier
1186
+ local suffix="$4" # "-claude" or "-codex"
1187
+ local verdict_dest="$5" # where to copy the verdict file
1188
+
1189
+ # Write trigger for this engine
1190
+ write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
1191
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1192
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1193
+
1194
+ # Clean previous Verifier session
1195
+ local verifier_cmd
1196
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1197
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1198
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1199
+ sleep 0.5
1200
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1201
+ sleep 2
1202
+ fi
1203
+ # Always ensure clean shell state before launching new verifier
1204
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1205
+ # Clear pane to avoid residual text interference
1206
+ tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
1207
+ sleep 0.5
1208
+
1209
+ # Remove previous verdict file
1210
+ rm -f "$VERDICT_FILE" 2>/dev/null
1211
+
1212
+ # Launch verifier
1213
+ if [[ "$engine" = "codex" ]]; then
1214
+ # Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
1215
+ local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1216
+ log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
1217
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
1218
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1219
+ log_debug "Verifier$suffix codex exec sent directly"
1220
+ else
1221
+ # Claude: use interactive TUI
1222
+ local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1223
+ log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
1224
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1225
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1226
+
1227
+ if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1228
+ log_error "Verifier$suffix failed to start"
1229
+ return 1
1230
+ fi
1231
+
1232
+ sleep 3
1233
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
1234
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1235
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1236
+ log_debug "Verifier$suffix instruction sent directly"
1237
+
1238
+ # Verify claude actually started working
1239
+ local v_submit=0
1240
+ while (( v_submit < 10 )); do
1241
+ sleep 2
1242
+ local v_check
1243
+ v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1244
+ if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
1245
+ log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
1246
+ break
1247
+ fi
1248
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1249
+ sleep 0.3
1250
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1251
+ (( v_submit++ ))
1252
+ done
1253
+ fi
1254
+
1255
+ # Poll for verdict
1256
+ if [[ "$engine" = "codex" ]]; then
1257
+ # Codex exec: simple file poll (non-interactive, no heartbeat/nudge needed)
1258
+ log " Polling for verify-verdict.json ($suffix, codex exec)..."
1259
+ local codex_poll_start
1260
+ codex_poll_start=$(date +%s)
1261
+ while true; do
1262
+ if [[ -f "$VERDICT_FILE" ]]; then
1263
+ # Validate JSON
1264
+ if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
1265
+ log " Verdict file detected: $VERDICT_FILE"
1266
+ break
1267
+ fi
1268
+ fi
1269
+ local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
1270
+ if (( codex_elapsed >= ITER_TIMEOUT )); then
1271
+ log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
1272
+ return 1
1273
+ fi
1274
+ sleep "$POLL_INTERVAL"
1275
+ done
1276
+ else
1277
+ # Claude: use full poll_for_signal with heartbeat/nudge
1278
+ log " Polling for verify-verdict.json ($suffix)..."
1279
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
1280
+ log_error "Verifier$suffix poll failed"
1281
+ return 1
1282
+ fi
1283
+ fi
1284
+
1285
+ # Copy verdict to destination
1286
+ cp "$VERDICT_FILE" "$verdict_dest"
1287
+ log " Verifier$suffix verdict saved to $verdict_dest"
1288
+ return 0
1289
+ }
1290
+
1291
+ # --- US-004: Run consensus verification (claude + codex sequentially) ---
1292
+ run_consensus_verification() {
1293
+ local iter="$1"
1294
+ local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
1295
+ local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
1296
+
1297
+ CONSENSUS_ROUND=0
1298
+ CLAUDE_VERDICT=""
1299
+ CODEX_VERDICT=""
1300
+
1301
+ while (( CONSENSUS_ROUND < 3 )); do
1302
+ (( CONSENSUS_ROUND++ ))
1303
+ log " Consensus round $CONSENSUS_ROUND/3..."
1304
+
1305
+ # Run claude verifier first
1306
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1307
+ log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
1308
+ return 1
1309
+ fi
1310
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1311
+
1312
+ # Run codex verifier second
1313
+ if ! run_single_verifier "$iter" "codex" "$CODEX_MODEL" "-codex" "$codex_verdict_file"; then
1314
+ log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
1315
+ return 1
1316
+ fi
1317
+ CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
1318
+
1319
+ log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
1320
+ local _combined_action="retry"
1321
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
1322
+ elif (( CONSENSUS_ROUND >= 3 )); then _combined_action="blocked"
1323
+ fi
1324
+ log_debug "[EXEC] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
1325
+
1326
+ # Both pass → success
1327
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
1328
+ # Merge verdicts: use claude verdict as primary, note codex agreement
1329
+ cp "$claude_verdict_file" "$VERDICT_FILE"
1330
+ return 0
1331
+ fi
1332
+
1333
+ # Either fails → build combined fix contract
1334
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
1335
+ {
1336
+ echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
1337
+ echo ""
1338
+ echo "## Claude Verdict: $CLAUDE_VERDICT"
1339
+ if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
1340
+ echo "### Claude Issues"
1341
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1342
+ fi
1343
+ echo ""
1344
+ echo "## Codex Verdict: $CODEX_VERDICT"
1345
+ if [[ "$CODEX_VERDICT" = "fail" ]]; then
1346
+ echo "### Codex Issues"
1347
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1348
+ fi
1349
+ echo ""
1350
+ echo "## Traceability"
1351
+ echo "Only changes that resolve a listed issue are allowed."
1352
+ } | atomic_write "$fix_contract"
1353
+
1354
+ log " Combined fix contract: $fix_contract"
1355
+
1356
+ # If this is not the last round, the caller will dispatch the Worker with the fix contract
1357
+ # For now, write a fail verdict so the main loop can handle the fix loop
1358
+ if (( CONSENSUS_ROUND < 3 )); then
1359
+ # Create a merged fail verdict for the main loop
1360
+ {
1361
+ echo '{'
1362
+ echo ' "verdict": "fail",'
1363
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1364
+ echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/3): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1365
+ echo ' "issues": [],'
1366
+ echo ' "recommended_state_transition": "continue",'
1367
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
1368
+ echo '}'
1369
+ } | atomic_write "$VERDICT_FILE"
1370
+ return 2 # special return: consensus disagreement, needs retry
1371
+ fi
1372
+ done
1373
+
1374
+ # Max consensus rounds exceeded
1375
+ log_error "Consensus failed after 3 rounds"
1376
+ {
1377
+ echo '{'
1378
+ echo ' "verdict": "fail",'
1379
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1380
+ echo ' "summary": "Consensus failed after 3 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1381
+ echo ' "issues": [],'
1382
+ echo ' "recommended_state_transition": "blocked",'
1383
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 3 }'
1384
+ echo '}'
1385
+ } | atomic_write "$VERDICT_FILE"
1386
+ return 1
1387
+ }
1388
+
929
1389
  # =============================================================================
930
1390
  # Security Warning
931
1391
  # =============================================================================
@@ -947,6 +1407,21 @@ print_security_warning() {
947
1407
  # =============================================================================
948
1408
 
949
1409
  main() {
1410
+ # --- Lockfile: prevent duplicate execution ---
1411
+ local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
1412
+ mkdir -p "$(dirname "$lockfile")" 2>/dev/null
1413
+ if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
1414
+ local lock_pid
1415
+ lock_pid=$(cat "$lockfile" 2>/dev/null)
1416
+ if kill -0 "$lock_pid" 2>/dev/null; then
1417
+ log_error "Another instance is already running (PID $lock_pid)"
1418
+ exit 1
1419
+ fi
1420
+ # Stale lock — overwrite
1421
+ echo $$ > "$lockfile"
1422
+ fi
1423
+ mkdir -p "$LOGS_DIR" 2>/dev/null
1424
+
950
1425
  # --- Startup ---
951
1426
  log "Ralph Desk Tmux Runner starting..."
952
1427
  log " Slug: $SLUG"
@@ -954,8 +1429,49 @@ main() {
954
1429
  log " Max iterations: $MAX_ITER"
955
1430
  log " Worker model: $WORKER_MODEL"
956
1431
  log " Verifier model: $VERIFIER_MODEL"
1432
+ log " Verify mode: $VERIFY_MODE"
1433
+ log " Verify consensus:$VERIFY_CONSENSUS"
957
1434
  log " Poll interval: ${POLL_INTERVAL}s"
958
1435
  log " Iter timeout: ${ITER_TIMEOUT}s"
1436
+ # --- Debug: Log execution plan ---
1437
+ if (( DEBUG )); then
1438
+ # Extract US IDs from PRD
1439
+ local prd_file="$DESK/plans/prd-$SLUG.md"
1440
+ local us_list=""
1441
+ if [[ -f "$prd_file" ]]; then
1442
+ us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1443
+ fi
1444
+ local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
1445
+
1446
+ log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
1447
+ log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
1448
+ log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
1449
+ log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS max_iter=$MAX_ITER"
1450
+
1451
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
1452
+ # Build expected flow
1453
+ local expected_flow=""
1454
+ for us in $(echo "$us_list" | tr ',' ' '); do
1455
+ expected_flow="${expected_flow}worker->verify($us)->"
1456
+ done
1457
+ expected_flow="${expected_flow}verify(ALL)->COMPLETE"
1458
+ log_debug "[PLAN] expected_flow=$expected_flow"
1459
+ else
1460
+ log_debug "[PLAN] expected_flow=worker(all)->verify(ALL)->COMPLETE"
1461
+ fi
1462
+
1463
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1464
+ log_debug "[PLAN] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
1465
+ fi
1466
+ fi
1467
+
1468
+ # Extract US list for per-US sequencing
1469
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
1470
+ local prd_file="$DESK/plans/prd-$SLUG.md"
1471
+ if [[ -f "$prd_file" ]]; then
1472
+ US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1473
+ fi
1474
+ fi
959
1475
 
960
1476
  # Dependency checks
961
1477
  check_dependencies
@@ -982,6 +1498,9 @@ main() {
982
1498
  for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
983
1499
  log ""
984
1500
  log "========== Iteration $ITERATION / $MAX_ITER =========="
1501
+ local _iter_contract=""
1502
+ _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
1503
+ log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
985
1504
 
986
1505
  # --- governance.md s7 step 1: Check sentinels ---
987
1506
  if [[ -f "$COMPLETE_SENTINEL" ]]; then
@@ -1021,12 +1540,19 @@ main() {
1021
1540
 
1022
1541
  update_status "worker" "running"
1023
1542
 
1024
- # --- governance.md s7 step 5: Execute Worker (interactive claude, tmux pattern) ---
1025
- # Step 5a: Launch interactive claude in Worker pane
1026
- local worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1027
- log " Launching Worker claude in pane $WORKER_PANE..."
1543
+ # --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
1544
+ # Step 5a: Launch interactive worker engine in Worker pane
1545
+ local worker_launch
1546
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1547
+ worker_launch="${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1548
+ log " Launching Worker codex in pane $WORKER_PANE..."
1549
+ else
1550
+ worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1551
+ log " Launching Worker claude in pane $WORKER_PANE..."
1552
+ fi
1028
1553
  tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
1029
1554
  tmux send-keys -t "$WORKER_PANE" Enter
1555
+ log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
1030
1556
 
1031
1557
  # Step 5b: Wait for claude TUI to be ready (tmux pattern)
1032
1558
  if ! wait_for_pane_ready "$WORKER_PANE" 30; then
@@ -1036,39 +1562,76 @@ main() {
1036
1562
  return 1
1037
1563
  fi
1038
1564
 
1039
- # Step 5c: Wait for claude to fully initialize, then send instruction
1565
+ # Step 5c: Wait for claude to fully initialize, then send instruction directly
1040
1566
  sleep 3
1041
1567
  local worker_instruction="Read and execute the instructions in $worker_prompt"
1042
- if ! safe_send_keys "$WORKER_PANE" "$worker_instruction"; then
1043
- log_error "Failed to send instruction to Worker"
1568
+ tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1569
+ tmux send-keys -t "$WORKER_PANE" Enter
1570
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
1571
+
1572
+ # Verify claude actually started working — keep sending C-m until activity detected
1573
+ local submit_attempts=0
1574
+ while (( submit_attempts < 10 )); do
1575
+ sleep 2
1576
+ local pane_check
1577
+ pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
1578
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1579
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
1580
+ log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
1581
+ break
1582
+ fi
1583
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1584
+ sleep 0.3
1585
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1586
+ (( submit_attempts++ ))
1587
+ done
1588
+ if (( submit_attempts >= 10 )); then
1589
+ log " WARNING: Could not confirm Worker started working after 10 attempts"
1590
+ log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=10"
1044
1591
  fi
1045
- # Extra C-m to ensure submission (long text may false-positive the consumed check)
1046
- sleep 0.5
1047
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1048
- sleep 0.3
1049
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1050
1592
 
1051
1593
  # --- governance.md s7 step 5+6: Poll for Worker completion ---
1052
1594
  log " Polling for iter-signal.json..."
1053
- if ! poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1054
- # Check if Worker is still actively running (not stuck)
1055
- local worker_cmd
1056
- worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1057
- if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" ]]; then
1058
- # Worker is still active — timeout but not a failure, just slow
1059
- log " Worker timed out but still active ($worker_cmd). Extending..."
1060
- update_status "worker" "slow"
1061
- continue
1062
- fi
1063
- # Worker is truly dead/stuck
1064
- (( MONITOR_FAILURE_COUNT++ ))
1065
- if (( MONITOR_FAILURE_COUNT >= 3 )); then
1066
- write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
1067
- update_status "blocked" "monitor_failures"
1068
- return 1
1595
+ local worker_poll_done=0
1596
+ while (( ! worker_poll_done )); do
1597
+ if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1598
+ worker_poll_done=1
1599
+ log_debug "[EXEC] iter=$ITERATION poll_signal_received=true"
1600
+ else
1601
+ # Check if Worker is still actively running (not stuck)
1602
+ local worker_cmd
1603
+ worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1604
+ if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
1605
+ log " Worker timed out but still active ($worker_cmd). Extending poll..."
1606
+ log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd"
1607
+ log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
1608
+ update_status "worker" "slow"
1609
+ # Loop continues — re-poll same iteration
1610
+ else
1611
+ # Worker is truly dead/stuck
1612
+ (( MONITOR_FAILURE_COUNT++ ))
1613
+ log_debug "[EXEC] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
1614
+ if (( MONITOR_FAILURE_COUNT >= 3 )); then
1615
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
1616
+ write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
1617
+ update_status "blocked" "monitor_failures"
1618
+ return 1
1619
+ fi
1620
+ log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1621
+ update_status "worker" "poll_failed"
1622
+ worker_poll_done=1 # exit poll loop, continue to next iteration
1623
+ log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
1624
+ # Kill dead worker session so next iteration starts fresh
1625
+ tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
1626
+ tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
1627
+ sleep 1
1628
+ fi
1069
1629
  fi
1070
- log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1071
- update_status "worker" "poll_failed"
1630
+ done
1631
+
1632
+ if [[ ! -f "$SIGNAL_FILE" ]]; then
1633
+ log_debug "[EXEC] iter=$ITERATION no_signal_after_poll=true continuing"
1634
+ # No signal — monitor failure, go to next iteration
1072
1635
  continue
1073
1636
  fi
1074
1637
 
@@ -1083,6 +1646,11 @@ main() {
1083
1646
 
1084
1647
  log " Worker signal: status=$signal_status summary=\"$signal_summary\""
1085
1648
 
1649
+ # Read us_id early for EXEC logging (also used later in verify branch)
1650
+ local signal_us_id_early=""
1651
+ signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1652
+ log_debug "[EXEC] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
1653
+
1086
1654
  case "$signal_status" in
1087
1655
  continue)
1088
1656
  # --- governance.md s7 step 6: continue -> go to step 8 ---
@@ -1091,52 +1659,94 @@ main() {
1091
1659
  ;;
1092
1660
  verify)
1093
1661
  # --- governance.md s7 step 7: Execute Verifier ---
1094
- log " Worker claims done. Dispatching Verifier..."
1095
-
1096
- write_verifier_trigger "$ITERATION"
1097
- local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1662
+ # Read us_id from signal for per-US scoping
1663
+ local signal_us_id=""
1664
+ signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1665
+ log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
1098
1666
 
1099
1667
  update_status "verifier" "running"
1100
1668
 
1101
- # Step 7a: Clean previous Verifier session if claude is running
1102
- local verifier_cmd
1103
- verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1104
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" ]]; then
1105
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1106
- sleep 0.5
1107
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1108
- sleep 2
1109
- wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1110
- fi
1111
-
1112
- local verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1113
- log " Launching Verifier claude in pane $VERIFIER_PANE..."
1114
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1115
- tmux send-keys -t "$VERIFIER_PANE" Enter
1116
-
1117
- # Step 7b: Wait for claude TUI to be ready
1118
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1119
- log_error "Verifier claude failed to start"
1120
- update_status "verifier" "start_failed"
1121
- continue
1122
- fi
1123
-
1124
- # Step 7c: Wait for claude to fully initialize, then send instruction
1125
- sleep 3
1126
- local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1127
- safe_send_keys "$VERIFIER_PANE" "$verifier_instruction"
1128
- # Extra C-m to ensure submission
1129
- sleep 0.5
1130
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1131
- sleep 0.3
1132
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1133
-
1134
- # Poll for verify-verdict.json
1135
- log " Polling for verify-verdict.json..."
1136
- if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
1137
- log_error "Verifier poll failed"
1138
- update_status "verifier" "poll_failed"
1139
- continue
1669
+ # --- Consensus vs single verification ---
1670
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1671
+ # US-004: Run consensus verification (claude + codex sequentially)
1672
+ local consensus_rc=0
1673
+ run_consensus_verification "$ITERATION" || consensus_rc=$?
1674
+
1675
+ if (( consensus_rc == 2 )); then
1676
+ # Consensus disagreement — treat as fail, fix loop will handle
1677
+ log " Consensus disagreement, treating as fail."
1678
+ elif (( consensus_rc != 0 )); then
1679
+ # Consensus verification failed entirely
1680
+ log_error "Consensus verification failed"
1681
+ write_blocked_sentinel "Consensus verification failed after max rounds"
1682
+ update_status "blocked" "consensus_failed"
1683
+ return 1
1684
+ fi
1685
+ else
1686
+ # Standard single-engine verification
1687
+ write_verifier_trigger "$ITERATION"
1688
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1689
+
1690
+ # Step 7a: Clean previous Verifier session if running
1691
+ local verifier_cmd
1692
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1693
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1694
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1695
+ sleep 0.5
1696
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1697
+ sleep 2
1698
+ wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1699
+ fi
1700
+
1701
+ local verifier_launch
1702
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1703
+ verifier_launch="${CODEX_BIN:-codex} -m $CODEX_MODEL -c model_reasoning_effort=\"$CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1704
+ log " Launching Verifier codex in pane $VERIFIER_PANE..."
1705
+ else
1706
+ verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1707
+ log " Launching Verifier claude in pane $VERIFIER_PANE..."
1708
+ fi
1709
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1710
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1711
+ log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
1712
+
1713
+ # Step 7b: Wait for TUI to be ready
1714
+ if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1715
+ log_error "Verifier failed to start"
1716
+ update_status "verifier" "start_failed"
1717
+ continue
1718
+ fi
1719
+
1720
+ # Step 7c: Send instruction
1721
+ sleep 3
1722
+ local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1723
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1724
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1725
+ log_debug "Verifier instruction sent directly"
1726
+
1727
+ # Verify verifier actually started working
1728
+ local vs_submit=0
1729
+ while (( vs_submit < 10 )); do
1730
+ sleep 2
1731
+ local vs_check
1732
+ vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1733
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1734
+ log_debug "Verifier started working after $((vs_submit + 1)) checks"
1735
+ break
1736
+ fi
1737
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1738
+ sleep 0.3
1739
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1740
+ (( vs_submit++ ))
1741
+ done
1742
+
1743
+ # Poll for verify-verdict.json
1744
+ log " Polling for verify-verdict.json..."
1745
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
1746
+ log_error "Verifier poll failed"
1747
+ update_status "verifier" "poll_failed"
1748
+ continue
1749
+ fi
1140
1750
  fi
1141
1751
 
1142
1752
  # --- governance.md s7 step 7: Read verdict via jq ---
@@ -1149,12 +1759,28 @@ main() {
1149
1759
 
1150
1760
  log " Verifier: verdict=$verdict recommended=$recommended"
1151
1761
  log " Verifier summary: \"$verdict_summary\""
1762
+ local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
1763
+ log_debug "[EXEC] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
1152
1764
 
1153
1765
  case "$verdict" in
1154
1766
  pass)
1155
1767
  CONSECUTIVE_FAILURES=0
1156
- if [[ "$recommended" == "complete" ]]; then
1157
- # Write COMPLETE sentinel (only Leader writes sentinels)
1768
+ CONSENSUS_ROUND=0
1769
+
1770
+ # --- Per-US tracking ---
1771
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
1772
+ # Add this US to verified list
1773
+ if [[ -n "$VERIFIED_US" ]]; then
1774
+ VERIFIED_US="${VERIFIED_US},${signal_us_id}"
1775
+ else
1776
+ VERIFIED_US="$signal_us_id"
1777
+ fi
1778
+ log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
1779
+ log_debug "[EXEC] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
1780
+ update_status "verifier" "pass_us"
1781
+ # Worker will do next US on next iteration
1782
+ elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
1783
+ # Final full verify passed or complete recommended
1158
1784
  write_complete_sentinel "$verdict_summary"
1159
1785
  update_status "complete" "pass"
1160
1786
  return 0
@@ -1185,9 +1811,11 @@ main() {
1185
1811
  jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
1186
1812
  } | atomic_write "$fix_contract"
1187
1813
  log " Fix contract: $fix_contract"
1814
+ log_debug "[EXEC] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
1188
1815
 
1189
1816
  # Circuit breaker: consecutive failures
1190
1817
  if (( CONSECUTIVE_FAILURES >= 3 )); then
1818
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"3 consecutive verification failures\""
1191
1819
  log_error "Circuit breaker: 3 consecutive verification failures"
1192
1820
  write_blocked_sentinel "3 consecutive verification failures"
1193
1821
  update_status "blocked" "consecutive_failures"
@@ -1233,6 +1861,7 @@ main() {
1233
1861
 
1234
1862
  # --- governance.md s7 step 8: Circuit breaker - stale context check ---
1235
1863
  if ! check_stale_context; then
1864
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
1236
1865
  write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
1237
1866
  update_status "blocked" "stale_context"
1238
1867
  return 1