@ai-dev-methodologies/rlp-desk 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,18 @@ set -uo pipefail
29
29
  # IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
30
30
  # MAX_NUDGES - max nudges per pane per iteration (default: 3)
31
31
  #
32
+ # Per-role codex config:
33
+ # WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.4)
34
+ # WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
35
+ # VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.4)
36
+ # VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
37
+ #
38
+ # Consensus scope:
39
+ # CONSENSUS_SCOPE - when consensus applies (default: all)
40
+ # all=every verify, final-only=final ALL only
41
+ #
32
42
  # Dependencies: tmux, claude CLI, jq
43
+ # Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
33
44
  # =============================================================================
34
45
 
35
46
  # --- Environment Variables ---
@@ -45,6 +56,20 @@ MAX_RESTARTS="${MAX_RESTARTS:-3}"
45
56
  IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
46
57
  MAX_NUDGES="${MAX_NUDGES:-3}"
47
58
 
59
+ # --- Engine Selection ---
60
+ WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
61
+ VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
62
+ WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
63
+ WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
64
+ VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
65
+ VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
66
+ CODEX_BIN="" # resolved by check_dependencies when engine=codex
67
+
68
+ # --- Verify Mode ---
69
+ VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
70
+ VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
71
+ CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
72
+
48
73
  # --- Derived Paths ---
49
74
  DESK="$ROOT/.claude/ralph-desk"
50
75
  PROMPTS_DIR="$DESK/prompts"
@@ -80,6 +105,9 @@ CONSECUTIVE_FAILURES=0
80
105
  PREV_CONTEXT_HASH=""
81
106
  ITERATION=0
82
107
  START_TIME=$(date +%s)
108
+ VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
109
+ CONSENSUS_ROUND=0 # current consensus round for current US
110
+ US_LIST="" # comma-separated US IDs from PRD (per-us mode)
83
111
 
84
112
  # =============================================================================
85
113
  # Utility Functions
@@ -112,6 +140,31 @@ atomic_write() {
112
140
  mv "$tmp" "$target"
113
141
  }
114
142
 
143
+ # --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
144
+ replace_worker_pane() {
145
+ local old_pane="$1"
146
+ local role="$2" # "worker" or "verifier"
147
+
148
+ log " Replacing dead $role pane $old_pane..."
149
+ tmux kill-pane -t "$old_pane" 2>/dev/null
150
+
151
+ # Create fresh pane via split-window off leader (omc-teams kill-and-replace pattern)
152
+ local new_pane
153
+ new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
154
+
155
+ log " New $role pane: $new_pane (replaced $old_pane)"
156
+ log_debug "[EXEC] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
157
+
158
+ # Update session-config.json with new pane ID
159
+ if [[ -f "$SESSION_CONFIG" ]]; then
160
+ jq --arg role "$role" --arg pane "$new_pane" \
161
+ '.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
162
+ log_debug "Updated session-config.json: $role pane → $new_pane"
163
+ fi
164
+
165
+ echo "$new_pane"
166
+ }
167
+
115
168
  # =============================================================================
116
169
  # Dependency Checks
117
170
  # =============================================================================
@@ -135,6 +188,19 @@ check_dependencies() {
135
188
  missing=1
136
189
  fi
137
190
 
191
+ # Codex binary required only when engine=codex or consensus verification is enabled
192
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
193
+ if ! command -v codex >/dev/null 2>&1; then
194
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
195
+ log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
196
+ else
197
+ log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
198
+ fi
199
+ log_error "Install with: npm install -g @openai/codex"
200
+ missing=1
201
+ fi
202
+ fi
203
+
138
204
  if (( missing )); then
139
205
  exit 1
140
206
  fi
@@ -142,6 +208,12 @@ check_dependencies() {
142
208
  # Resolve full path to claude binary for reliable launches
143
209
  CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
144
210
  log " Claude binary: $CLAUDE_BIN"
211
+
212
+ # Resolve codex binary if needed
213
+ if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
214
+ CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
215
+ log " Codex binary: $CODEX_BIN"
216
+ fi
145
217
  }
146
218
 
147
219
  # =============================================================================
@@ -248,6 +320,19 @@ create_session() {
248
320
  "worker": "'"$WORKER_MODEL"'",
249
321
  "verifier": "'"$VERIFIER_MODEL"'"
250
322
  },
323
+ "engines": {
324
+ "worker": "'"$WORKER_ENGINE"'",
325
+ "verifier": "'"$VERIFIER_ENGINE"'",
326
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
327
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
328
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
329
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
330
+ },
331
+ "verification": {
332
+ "verify_mode": "'"$VERIFY_MODE"'",
333
+ "verify_consensus": '"$VERIFY_CONSENSUS"',
334
+ "consensus_scope": "'"$CONSENSUS_SCOPE"'"
335
+ },
251
336
  "config": {
252
337
  "max_iter": '"$MAX_ITER"',
253
338
  "poll_interval": '"$POLL_INTERVAL"',
@@ -305,10 +390,19 @@ safe_send_keys() {
305
390
  log_debug " Trust prompt detected, dismissing"
306
391
  tmux send-keys -t "$pane_id" C-m
307
392
  sleep 0.12
308
- tmux send-keys -t "$pane_id" C-m
393
+ fi
394
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
395
+ if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
396
+ log_debug " Permission prompt detected, auto-approving"
397
+ tmux send-keys -t "$pane_id" Enter
398
+ sleep 0.3
399
+ fi
400
+ # Auto-dismiss codex update prompt (select Skip)
401
+ if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
402
+ log_debug " Codex update prompt detected, selecting Skip"
403
+ tmux send-keys -t "$pane_id" "2" Enter
309
404
  sleep 0.2
310
405
  fi
311
-
312
406
  # Send text in literal mode with -- separator
313
407
  log_debug " Sending text to pane $pane_id (${#text} chars)"
314
408
  tmux send-keys -t "$pane_id" -l -- "$text"
@@ -407,6 +501,22 @@ wait_for_pane_ready() {
407
501
  continue
408
502
  fi
409
503
 
504
+ # Auto-approve permission prompts ("Do you want to create/overwrite X?")
505
+ if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
506
+ log " Permission prompt detected, auto-approving..."
507
+ tmux send-keys -t "$pane_id" Enter
508
+ sleep 0.5
509
+ continue
510
+ fi
511
+
512
+ # Auto-dismiss codex update prompt (select Skip = option 2)
513
+ if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
514
+ log " Codex update prompt detected, selecting Skip..."
515
+ tmux send-keys -t "$pane_id" "2" Enter
516
+ sleep 0.5
517
+ continue
518
+ fi
519
+
410
520
  # tmux paneLooksReady: check each line for prompt char at line start
411
521
  local ready=0
412
522
  echo "$captured" | while IFS= read -r line; do
@@ -529,8 +639,12 @@ restart_worker() {
529
639
  tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
530
640
  sleep 2
531
641
 
532
- # Re-launch claude (tmux interactive pattern)
533
- safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
642
+ # Re-launch worker (tmux interactive pattern)
643
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
644
+ safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
645
+ else
646
+ safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
647
+ fi
534
648
  WORKER_RESTARTS[$iter]=$((restart_count + 1))
535
649
  return 0
536
650
  }
@@ -575,9 +689,64 @@ write_worker_trigger() {
575
689
  echo ""
576
690
  cat "$fix_contract_file"
577
691
  fi
692
+
693
+ # Per-US mode: tell Worker exactly which US to work on
694
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
695
+ # Find next unverified US
696
+ local next_us=""
697
+ for us in $(echo "$US_LIST" | tr ',' ' '); do
698
+ if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
699
+ next_us="$us"
700
+ break
701
+ fi
702
+ done
703
+
704
+ if [[ -n "$next_us" ]]; then
705
+ echo ""
706
+ echo "---"
707
+ echo "## PER-US SCOPE LOCK (this iteration)"
708
+ echo "You MUST implement ONLY **${next_us}** in this iteration."
709
+ echo "Do NOT implement any other user stories."
710
+ echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
711
+ echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
712
+ elif [[ -n "$VERIFIED_US" ]]; then
713
+ # All individual US verified — this is the final full verify iteration
714
+ echo ""
715
+ echo "---"
716
+ echo "## FINAL VERIFICATION ITERATION"
717
+ echo "All individual US have been verified: $VERIFIED_US"
718
+ echo "Run all tests and verification commands to confirm everything works together."
719
+ echo "Signal verify with us_id=\"ALL\" for the final full verification."
720
+ fi
721
+ elif [[ "$VERIFY_MODE" = "batch" ]]; then
722
+ echo ""
723
+ echo "---"
724
+ echo "## BATCH MODE OVERRIDE"
725
+ echo "Ignore any per-US signal instructions above. In batch mode:"
726
+ echo "- Implement ALL user stories in this iteration"
727
+ echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
728
+ echo "- Do NOT signal verify after individual stories"
729
+ fi
578
730
  } | atomic_write "$prompt_file"
579
731
 
580
732
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
733
+ # Engine-specific launch command (expanded at write time)
734
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
735
+ local engine_cmd="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL \\
736
+ -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
737
+ --dangerously-bypass-approvals-and-sandbox \\
738
+ \"\$(cat $prompt_file)\" \\
739
+ 2>&1 | tee $output_log"
740
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
741
+ else
742
+ local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
743
+ --model $WORKER_MODEL \\
744
+ --dangerously-skip-permissions \\
745
+ --output-format text \\
746
+ 2>&1 | tee $output_log"
747
+ local engine_comment="# Run claude with fresh context (governance.md s7 step 5)"
748
+ fi
749
+
581
750
  {
582
751
  cat <<TRIGGER_EOF
583
752
  #!/bin/zsh
@@ -596,12 +765,8 @@ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
596
765
  ) &
597
766
  HEARTBEAT_PID=\$!
598
767
 
599
- # Run claude with fresh context (governance.md s7 step 5)
600
- claude -p "\$(cat $prompt_file)" \\
601
- --model $WORKER_MODEL \\
602
- --dangerously-skip-permissions \\
603
- --output-format text \\
604
- 2>&1 | tee $output_log
768
+ $engine_comment
769
+ $engine_cmd
605
770
 
606
771
  # Cleanup heartbeat writer
607
772
  kill \$HEARTBEAT_PID 2>/dev/null
@@ -618,11 +783,20 @@ TRIGGER_EOF
618
783
 
619
784
  write_verifier_trigger() {
620
785
  local iter="$1"
621
- local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
622
- local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-trigger.sh"
623
- local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-output.log"
786
+ local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
787
+ local verifier_model="${3:-$VERIFIER_MODEL}"
788
+ local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
789
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
790
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
791
+ local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
792
+
793
+ # Read us_id from iter-signal.json for per-US scoping
794
+ local us_id=""
795
+ if [[ -f "$SIGNAL_FILE" ]]; then
796
+ us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
797
+ fi
624
798
 
625
- # Build verifier prompt from base
799
+ # Build verifier prompt from base with US scope
626
800
  {
627
801
  cat "$VERIFIER_PROMPT_BASE"
628
802
  echo ""
@@ -630,13 +804,40 @@ write_verifier_trigger() {
630
804
  echo "## Verification Context"
631
805
  echo "- **Iteration**: $iter"
632
806
  echo "- **Done Claim**: $DONE_CLAIM_FILE"
807
+ echo "- **Verify Mode**: $VERIFY_MODE"
808
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$us_id" ]]; then
809
+ if [[ "$us_id" = "ALL" ]]; then
810
+ echo "- **Scope**: FINAL FULL VERIFY — check ALL acceptance criteria from the PRD"
811
+ echo "- **Previously verified US**: $VERIFIED_US"
812
+ else
813
+ echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
814
+ echo "- **Previously verified US**: $VERIFIED_US"
815
+ fi
816
+ fi
633
817
  } | atomic_write "$prompt_file"
634
818
 
635
819
  # Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
820
+ # Engine-specific launch command (expanded at write time)
821
+ if [[ "$verifier_engine" = "codex" ]]; then
822
+ local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
823
+ -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
824
+ --dangerously-bypass-approvals-and-sandbox \\
825
+ \"\$(cat $prompt_file)\" \\
826
+ 2>&1 | tee $output_log"
827
+ local engine_comment="# Run codex with fresh context (governance.md s7 step 7)"
828
+ else
829
+ local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
830
+ --model $verifier_model \\
831
+ --dangerously-skip-permissions \\
832
+ --output-format text \\
833
+ 2>&1 | tee $output_log"
834
+ local engine_comment="# Run claude with fresh context (governance.md s7 step 7)"
835
+ fi
836
+
636
837
  {
637
838
  cat <<TRIGGER_EOF
638
839
  #!/bin/zsh
639
- # Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
840
+ # Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
640
841
  # DO NOT use exec here -- it breaks heartbeat cleanup
641
842
 
642
843
  HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
@@ -651,12 +852,8 @@ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
651
852
  ) &
652
853
  HEARTBEAT_PID=\$!
653
854
 
654
- # Run claude with fresh context (governance.md s7 step 7)
655
- claude -p "\$(cat $prompt_file)" \\
656
- --model $VERIFIER_MODEL \\
657
- --dangerously-skip-permissions \\
658
- --output-format text \\
659
- 2>&1 | tee $output_log
855
+ $engine_comment
856
+ $engine_cmd
660
857
 
661
858
  # Cleanup heartbeat writer
662
859
  kill \$HEARTBEAT_PID 2>/dev/null
@@ -680,6 +877,22 @@ update_status() {
680
877
  local phase="$1"
681
878
  local last_result="$2"
682
879
 
880
+ # Build verified_us as JSON array
881
+ local verified_us_json="[]"
882
+ if [[ -n "$VERIFIED_US" ]]; then
883
+ verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
884
+ fi
885
+
886
+ # Build consensus fields
887
+ local consensus_json=""
888
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
889
+ consensus_json=',
890
+ "consensus_scope": "'"$CONSENSUS_SCOPE"'",
891
+ "consensus_round": '"$CONSENSUS_ROUND"',
892
+ "claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
893
+ "codex_verdict": "'"${CODEX_VERDICT:-}"'"'
894
+ fi
895
+
683
896
  echo '{
684
897
  "slug": "'"$SLUG"'",
685
898
  "iteration": '"$ITERATION"',
@@ -687,8 +900,17 @@ update_status() {
687
900
  "phase": "'"$phase"'",
688
901
  "worker_model": "'"$WORKER_MODEL"'",
689
902
  "verifier_model": "'"$VERIFIER_MODEL"'",
903
+ "worker_engine": "'"$WORKER_ENGINE"'",
904
+ "verifier_engine": "'"$VERIFIER_ENGINE"'",
905
+ "worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
906
+ "worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
907
+ "verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
908
+ "verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
909
+ "verify_mode": "'"$VERIFY_MODE"'",
910
+ "verify_consensus": '"$VERIFY_CONSENSUS"',
690
911
  "last_result": "'"$last_result"'",
691
912
  "consecutive_failures": '"$CONSECUTIVE_FAILURES"',
913
+ "verified_us": '"$verified_us_json"''"$consensus_json"',
692
914
  "updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
693
915
  }' | atomic_write "$STATUS_FILE"
694
916
  }
@@ -753,6 +975,9 @@ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
753
975
  cleanup() {
754
976
  log "Cleaning up..."
755
977
 
978
+ # Remove lockfile
979
+ rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
980
+
756
981
  # Kill claude processes then kill panes
757
982
  log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
758
983
  if [[ -n "${WORKER_PANE:-}" ]]; then
@@ -764,10 +989,14 @@ cleanup() {
764
989
  tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
765
990
  fi
766
991
  sleep 2
767
- # Kill the panes themselves
768
- log_debug "cleanup: killing panes $WORKER_PANE $VERIFIER_PANE"
769
- tmux kill-pane -t "$WORKER_PANE" 2>&1 | while read -r line; do log_debug "kill worker: $line"; done
770
- tmux kill-pane -t "$VERIFIER_PANE" 2>&1 | while read -r line; do log_debug "kill verifier: $line"; done
992
+ # Kill panes on completion
993
+ if [[ -n "${WORKER_PANE:-}" ]]; then
994
+ tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
995
+ fi
996
+ if [[ -n "${VERIFIER_PANE:-}" ]]; then
997
+ tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
998
+ fi
999
+ log " Panes cleaned up."
771
1000
 
772
1001
  # Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
773
1002
  setopt local_options nonomatch 2>/dev/null
@@ -780,6 +1009,70 @@ cleanup() {
780
1009
  local minutes=$(( elapsed / 60 ))
781
1010
  local seconds=$(( elapsed % 60 ))
782
1011
 
1012
+ local final_status="UNKNOWN"
1013
+ if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
1014
+ elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
1015
+ else final_status="TIMEOUT"; fi
1016
+
1017
+ if (( DEBUG )); then
1018
+ local end_ts=$(date +%s)
1019
+ local elapsed=$((end_ts - START_TIME))
1020
+
1021
+ log_debug "[EXEC] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
1022
+
1023
+ # --- Validation ---
1024
+ log_debug "[VALIDATE] === Execution Validation ==="
1025
+
1026
+ # 1. Did the correct verify mode run?
1027
+ log_debug "[VALIDATE] verify_mode=$VERIFY_MODE configured=true"
1028
+
1029
+ # 2. Per-US: were all US individually verified?
1030
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
1031
+ local prd_file="$DESK/plans/prd-$SLUG.md"
1032
+ local expected_us=""
1033
+ if [[ -f "$prd_file" ]]; then
1034
+ expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1035
+ fi
1036
+ local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
1037
+ local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
1038
+
1039
+ if [[ "$final_status" = "COMPLETE" ]]; then
1040
+ if (( verified_count >= expected_count )); then
1041
+ log_debug "[VALIDATE] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
1042
+ else
1043
+ log_debug "[VALIDATE] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
1044
+ fi
1045
+ else
1046
+ log_debug "[VALIDATE] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
1047
+ fi
1048
+ fi
1049
+
1050
+ # 3. Consensus: were both engines used?
1051
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1052
+ if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
1053
+ log_debug "[VALIDATE] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
1054
+ else
1055
+ log_debug "[VALIDATE] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
1056
+ fi
1057
+ fi
1058
+
1059
+ # 4. Engine match: did the configured engines actually run?
1060
+ local worker_dispatches=$(grep -c '\[EXEC\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1061
+ local verifier_dispatches=$(grep -c '\[EXEC\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
1062
+ log_debug "[VALIDATE] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
1063
+
1064
+ # 5. Fix loops: how many fix contracts were generated?
1065
+ local fix_count=$(grep -c '\[EXEC\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
1066
+ log_debug "[VALIDATE] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
1067
+
1068
+ # 6. Circuit breakers: any triggered?
1069
+ local cb_count=$(grep -c '\[EXEC\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
1070
+ log_debug "[VALIDATE] circuit_breakers_triggered=$cb_count"
1071
+
1072
+ # 7. Overall result
1073
+ log_debug "[VALIDATE] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
1074
+ fi
1075
+
783
1076
  echo ""
784
1077
  echo "============================================================"
785
1078
  echo " Ralph Desk Tmux Runner - Session Complete"
@@ -870,6 +1163,7 @@ poll_for_signal() {
870
1163
  (( HEARTBEAT_STALE_COUNT++ ))
871
1164
  # Circuit breaker: 3 consecutive heartbeat stale events
872
1165
  if (( HEARTBEAT_STALE_COUNT >= 3 )); then
1166
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
873
1167
  log_error "Circuit breaker: 3 consecutive heartbeat stale events"
874
1168
  return 1
875
1169
  fi
@@ -887,6 +1181,16 @@ poll_for_signal() {
887
1181
  fi
888
1182
  fi
889
1183
 
1184
+ # Auto-approve permission prompts during poll
1185
+ local poll_capture
1186
+ poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
1187
+ if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
1188
+ log " Permission prompt detected during poll, auto-approving..."
1189
+ log_debug "[EXEC] iter=$ITERATION permission_prompt_auto_approved=true"
1190
+ tmux send-keys -t "$pane_id" Enter
1191
+ sleep 0.5
1192
+ fi
1193
+
890
1194
  # Idle pane nudging (tmux pattern)
891
1195
  check_and_nudge_idle_pane "$pane_id" "nudge_count"
892
1196
 
@@ -926,6 +1230,243 @@ check_stale_context() {
926
1230
  return 0
927
1231
  }
928
1232
 
1233
+ # =============================================================================
1234
+ # Consensus Verification (run two verifiers sequentially in same pane)
1235
+ # =============================================================================
1236
+
1237
+ # --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
1238
+ run_single_verifier() {
1239
+ local iter="$1"
1240
+ local engine="$2" # claude|codex
1241
+ local model="$3" # model for this verifier
1242
+ local suffix="$4" # "-claude" or "-codex"
1243
+ local verdict_dest="$5" # where to copy the verdict file
1244
+
1245
+ # Write trigger for this engine
1246
+ write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
1247
+ local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
1248
+ local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
1249
+
1250
+ # Clean previous Verifier session
1251
+ local verifier_cmd
1252
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1253
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1254
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1255
+ sleep 0.5
1256
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1257
+ sleep 2
1258
+ fi
1259
+ # Always ensure clean shell state before launching new verifier
1260
+ wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
1261
+ # Clear pane to avoid residual text interference
1262
+ tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
1263
+ sleep 0.5
1264
+
1265
+ # Remove previous verdict file
1266
+ rm -f "$VERDICT_FILE" 2>/dev/null
1267
+
1268
+ # Launch verifier
1269
+ if [[ "$engine" = "codex" ]]; then
1270
+ # Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
1271
+ local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1272
+ log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
1273
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
1274
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1275
+ log_debug "Verifier$suffix codex exec sent directly"
1276
+ else
1277
+ # Claude: use interactive TUI
1278
+ local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
1279
+ log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
1280
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1281
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1282
+
1283
+ if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1284
+ log_error "Verifier$suffix failed to start"
1285
+ return 1
1286
+ fi
1287
+
1288
+ sleep 3
1289
+ local verifier_instruction="Read and execute the instructions in $prompt_file"
1290
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1291
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1292
+ log_debug "Verifier$suffix instruction sent directly"
1293
+
1294
+ # Verify claude actually started working
1295
+ local v_submit=0
1296
+ while (( v_submit < 15 )); do
1297
+ sleep 2
1298
+ local v_check
1299
+ v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1300
+ if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
1301
+ log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
1302
+ break
1303
+ fi
1304
+ # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1305
+ if (( v_submit == 8 )); then
1306
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
1307
+ tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
1308
+ sleep 0.1
1309
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1310
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1311
+ fi
1312
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1313
+ sleep 0.3
1314
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1315
+ (( v_submit++ ))
1316
+ done
1317
+ fi
1318
+
1319
+ # Poll for verdict
1320
+ if [[ "$engine" = "codex" ]]; then
1321
+ # Codex exec: simple file poll (non-interactive, no heartbeat/nudge needed)
1322
+ log " Polling for verify-verdict.json ($suffix, codex exec)..."
1323
+ local codex_poll_start
1324
+ codex_poll_start=$(date +%s)
1325
+ while true; do
1326
+ if [[ -f "$VERDICT_FILE" ]]; then
1327
+ # Validate JSON
1328
+ if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
1329
+ log " Verdict file detected: $VERDICT_FILE"
1330
+ break
1331
+ fi
1332
+ fi
1333
+ local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
1334
+ if (( codex_elapsed >= ITER_TIMEOUT )); then
1335
+ log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
1336
+ return 1
1337
+ fi
1338
+ sleep "$POLL_INTERVAL"
1339
+ done
1340
+ else
1341
+ # Claude: use full poll_for_signal with heartbeat/nudge
1342
+ log " Polling for verify-verdict.json ($suffix)..."
1343
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
1344
+ log_error "Verifier$suffix poll failed"
1345
+ return 1
1346
+ fi
1347
+ fi
1348
+
1349
+ # Copy verdict to destination
1350
+ cp "$VERDICT_FILE" "$verdict_dest"
1351
+ log " Verifier$suffix verdict saved to $verdict_dest"
1352
+ return 0
1353
+ }
1354
+
1355
+ # --- US-004: Run consensus verification (claude + codex sequentially) ---
1356
+ run_consensus_verification() {
1357
+ local iter="$1"
1358
+ local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
1359
+ local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
1360
+
1361
+ CONSENSUS_ROUND=0
1362
+ CLAUDE_VERDICT=""
1363
+ CODEX_VERDICT=""
1364
+
1365
+ while (( CONSENSUS_ROUND < 3 )); do
1366
+ (( CONSENSUS_ROUND++ ))
1367
+ log " Consensus round $CONSENSUS_ROUND/3..."
1368
+
1369
+ # Run claude verifier first
1370
+ if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
1371
+ log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
1372
+ return 1
1373
+ fi
1374
+ CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
1375
+ log_debug "[EXEC] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
1376
+
1377
+ # Run codex verifier second
1378
+ if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
1379
+ log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
1380
+ return 1
1381
+ fi
1382
+ CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
1383
+ log_debug "[EXEC] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
1384
+
1385
+ log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
1386
+ local _combined_action="retry"
1387
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
1388
+ elif (( CONSENSUS_ROUND >= 3 )); then _combined_action="blocked"
1389
+ fi
1390
+ log_debug "[EXEC] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
1391
+
1392
+ # Both pass → success
1393
+ if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
1394
+ # Create merged verdict with per-engine details
1395
+ {
1396
+ echo '{'
1397
+ echo ' "verdict": "pass",'
1398
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1399
+ echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
1400
+ echo ' "recommended_state_transition": "complete",'
1401
+ echo ' "consensus": {'
1402
+ echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
1403
+ echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
1404
+ echo ' "round": '"$CONSENSUS_ROUND"
1405
+ echo ' }'
1406
+ echo '}'
1407
+ } | atomic_write "$VERDICT_FILE"
1408
+ return 0
1409
+ fi
1410
+
1411
+ # Consensus disagreement
1412
+ log_debug "[EXEC] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
1413
+
1414
+ # Either fails → build combined fix contract
1415
+ local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
1416
+ {
1417
+ echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
1418
+ echo ""
1419
+ echo "## Claude Verdict: $CLAUDE_VERDICT"
1420
+ if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
1421
+ echo "### Claude Issues"
1422
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1423
+ fi
1424
+ echo ""
1425
+ echo "## Codex Verdict: $CODEX_VERDICT"
1426
+ if [[ "$CODEX_VERDICT" = "fail" ]]; then
1427
+ echo "### Codex Issues"
1428
+ jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
1429
+ fi
1430
+ echo ""
1431
+ echo "## Traceability"
1432
+ echo "Only changes that resolve a listed issue are allowed."
1433
+ } | atomic_write "$fix_contract"
1434
+
1435
+ log " Combined fix contract: $fix_contract"
1436
+
1437
+ # If this is not the last round, the caller will dispatch the Worker with the fix contract
1438
+ # For now, write a fail verdict so the main loop can handle the fix loop
1439
+ if (( CONSENSUS_ROUND < 3 )); then
1440
+ # Create a merged fail verdict for the main loop
1441
+ {
1442
+ echo '{'
1443
+ echo ' "verdict": "fail",'
1444
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1445
+ echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/3): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1446
+ echo ' "issues": [],'
1447
+ echo ' "recommended_state_transition": "continue",'
1448
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
1449
+ echo '}'
1450
+ } | atomic_write "$VERDICT_FILE"
1451
+ return 2 # special return: consensus disagreement, needs retry
1452
+ fi
1453
+ done
1454
+
1455
+ # Max consensus rounds exceeded
1456
+ log_error "Consensus failed after 3 rounds"
1457
+ {
1458
+ echo '{'
1459
+ echo ' "verdict": "fail",'
1460
+ echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
1461
+ echo ' "summary": "Consensus failed after 3 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
1462
+ echo ' "issues": [],'
1463
+ echo ' "recommended_state_transition": "blocked",'
1464
+ echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 3 }'
1465
+ echo '}'
1466
+ } | atomic_write "$VERDICT_FILE"
1467
+ return 1
1468
+ }
1469
+
929
1470
  # =============================================================================
930
1471
  # Security Warning
931
1472
  # =============================================================================
@@ -947,6 +1488,21 @@ print_security_warning() {
947
1488
  # =============================================================================
948
1489
 
949
1490
  main() {
1491
+ # --- Lockfile: prevent duplicate execution ---
1492
+ local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
1493
+ mkdir -p "$(dirname "$lockfile")" 2>/dev/null
1494
+ if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
1495
+ local lock_pid
1496
+ lock_pid=$(cat "$lockfile" 2>/dev/null)
1497
+ if kill -0 "$lock_pid" 2>/dev/null; then
1498
+ log_error "Another instance is already running (PID $lock_pid)"
1499
+ exit 1
1500
+ fi
1501
+ # Stale lock — overwrite
1502
+ echo $$ > "$lockfile"
1503
+ fi
1504
+ mkdir -p "$LOGS_DIR" 2>/dev/null
1505
+
950
1506
  # --- Startup ---
951
1507
  log "Ralph Desk Tmux Runner starting..."
952
1508
  log " Slug: $SLUG"
@@ -954,8 +1510,50 @@ main() {
954
1510
  log " Max iterations: $MAX_ITER"
955
1511
  log " Worker model: $WORKER_MODEL"
956
1512
  log " Verifier model: $VERIFIER_MODEL"
1513
+ log " Verify mode: $VERIFY_MODE"
1514
+ log " Verify consensus:$VERIFY_CONSENSUS"
1515
+ log " Consensus scope: $CONSENSUS_SCOPE"
957
1516
  log " Poll interval: ${POLL_INTERVAL}s"
958
1517
  log " Iter timeout: ${ITER_TIMEOUT}s"
1518
+ # --- Debug: Log execution plan ---
1519
+ if (( DEBUG )); then
1520
+ # Extract US IDs from PRD
1521
+ local prd_file="$DESK/plans/prd-$SLUG.md"
1522
+ local us_list=""
1523
+ if [[ -f "$prd_file" ]]; then
1524
+ us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1525
+ fi
1526
+ local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
1527
+
1528
+ log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
1529
+ log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
1530
+ log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
1531
+ log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
1532
+
1533
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
1534
+ # Build expected flow
1535
+ local expected_flow=""
1536
+ for us in $(echo "$us_list" | tr ',' ' '); do
1537
+ expected_flow="${expected_flow}worker->verify($us)->"
1538
+ done
1539
+ expected_flow="${expected_flow}verify(ALL)->COMPLETE"
1540
+ log_debug "[PLAN] expected_flow=$expected_flow"
1541
+ else
1542
+ log_debug "[PLAN] expected_flow=worker(all)->verify(ALL)->COMPLETE"
1543
+ fi
1544
+
1545
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1546
+ log_debug "[PLAN] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
1547
+ fi
1548
+ fi
1549
+
1550
+ # Extract US list for per-US sequencing
1551
+ if [[ "$VERIFY_MODE" = "per-us" ]]; then
1552
+ local prd_file="$DESK/plans/prd-$SLUG.md"
1553
+ if [[ -f "$prd_file" ]]; then
1554
+ US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
1555
+ fi
1556
+ fi
959
1557
 
960
1558
  # Dependency checks
961
1559
  check_dependencies
@@ -982,6 +1580,9 @@ main() {
982
1580
  for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
983
1581
  log ""
984
1582
  log "========== Iteration $ITERATION / $MAX_ITER =========="
1583
+ local _iter_contract=""
1584
+ _iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
1585
+ log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
985
1586
 
986
1587
  # --- governance.md s7 step 1: Check sentinels ---
987
1588
  if [[ -f "$COMPLETE_SENTINEL" ]]; then
@@ -1021,12 +1622,19 @@ main() {
1021
1622
 
1022
1623
  update_status "worker" "running"
1023
1624
 
1024
- # --- governance.md s7 step 5: Execute Worker (interactive claude, tmux pattern) ---
1025
- # Step 5a: Launch interactive claude in Worker pane
1026
- local worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1027
- log " Launching Worker claude in pane $WORKER_PANE..."
1625
+ # --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
1626
+ # Step 5a: Launch interactive worker engine in Worker pane
1627
+ local worker_launch
1628
+ if [[ "$WORKER_ENGINE" = "codex" ]]; then
1629
+ worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1630
+ log " Launching Worker codex in pane $WORKER_PANE..."
1631
+ else
1632
+ worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
1633
+ log " Launching Worker claude in pane $WORKER_PANE..."
1634
+ fi
1028
1635
  tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
1029
1636
  tmux send-keys -t "$WORKER_PANE" Enter
1637
+ log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
1030
1638
 
1031
1639
  # Step 5b: Wait for claude TUI to be ready (tmux pattern)
1032
1640
  if ! wait_for_pane_ready "$WORKER_PANE" 30; then
@@ -1036,39 +1644,82 @@ main() {
1036
1644
  return 1
1037
1645
  fi
1038
1646
 
1039
- # Step 5c: Wait for claude to fully initialize, then send instruction
1647
+ # Step 5c: Wait for claude to fully initialize, then send instruction directly
1040
1648
  sleep 3
1041
1649
  local worker_instruction="Read and execute the instructions in $worker_prompt"
1042
- if ! safe_send_keys "$WORKER_PANE" "$worker_instruction"; then
1043
- log_error "Failed to send instruction to Worker"
1650
+ tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1651
+ tmux send-keys -t "$WORKER_PANE" Enter
1652
+ log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
1653
+
1654
+ # Verify claude actually started working — keep sending C-m until activity detected
1655
+ local submit_attempts=0
1656
+ while (( submit_attempts < 15 )); do
1657
+ sleep 2
1658
+ local pane_check
1659
+ pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
1660
+ if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1661
+ log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
1662
+ log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
1663
+ break
1664
+ fi
1665
+ # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1666
+ if (( submit_attempts == 8 )); then
1667
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
1668
+ tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
1669
+ sleep 0.1
1670
+ tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
1671
+ tmux send-keys -t "$WORKER_PANE" Enter
1672
+ fi
1673
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1674
+ sleep 0.3
1675
+ tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1676
+ (( submit_attempts++ ))
1677
+ done
1678
+ if (( submit_attempts >= 15 )); then
1679
+ log " WARNING: Could not confirm Worker started working after 15 attempts"
1680
+ log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=15"
1044
1681
  fi
1045
- # Extra C-m to ensure submission (long text may false-positive the consumed check)
1046
- sleep 0.5
1047
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1048
- sleep 0.3
1049
- tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
1050
1682
 
1051
1683
  # --- governance.md s7 step 5+6: Poll for Worker completion ---
1052
1684
  log " Polling for iter-signal.json..."
1053
- if ! poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1054
- # Check if Worker is still actively running (not stuck)
1055
- local worker_cmd
1056
- worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1057
- if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" ]]; then
1058
- # Worker is still active — timeout but not a failure, just slow
1059
- log " Worker timed out but still active ($worker_cmd). Extending..."
1060
- update_status "worker" "slow"
1061
- continue
1062
- fi
1063
- # Worker is truly dead/stuck
1064
- (( MONITOR_FAILURE_COUNT++ ))
1065
- if (( MONITOR_FAILURE_COUNT >= 3 )); then
1066
- write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
1067
- update_status "blocked" "monitor_failures"
1068
- return 1
1685
+ local worker_poll_done=0
1686
+ while (( ! worker_poll_done )); do
1687
+ if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
1688
+ worker_poll_done=1
1689
+ log_debug "[EXEC] iter=$ITERATION poll_signal_received=true"
1690
+ else
1691
+ # Check if Worker is still actively running (not stuck)
1692
+ local worker_cmd
1693
+ worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
1694
+ if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
1695
+ log " Worker timed out but still active ($worker_cmd). Extending poll..."
1696
+ log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd"
1697
+ log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
1698
+ update_status "worker" "slow"
1699
+ # Loop continues — re-poll same iteration
1700
+ else
1701
+ # Worker is truly dead/stuck
1702
+ (( MONITOR_FAILURE_COUNT++ ))
1703
+ log_debug "[EXEC] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
1704
+ if (( MONITOR_FAILURE_COUNT >= 3 )); then
1705
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
1706
+ write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
1707
+ update_status "blocked" "monitor_failures"
1708
+ return 1
1709
+ fi
1710
+ log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1711
+ update_status "worker" "poll_failed"
1712
+ worker_poll_done=1 # exit poll loop, continue to next iteration
1713
+ log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
1714
+ # Worker is truly dead/stuck — kill and replace pane (omc-teams pattern)
1715
+ WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
1716
+ fi
1069
1717
  fi
1070
- log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
1071
- update_status "worker" "poll_failed"
1718
+ done
1719
+
1720
+ if [[ ! -f "$SIGNAL_FILE" ]]; then
1721
+ log_debug "[EXEC] iter=$ITERATION no_signal_after_poll=true continuing"
1722
+ # No signal — monitor failure, go to next iteration
1072
1723
  continue
1073
1724
  fi
1074
1725
 
@@ -1083,6 +1734,11 @@ main() {
1083
1734
 
1084
1735
  log " Worker signal: status=$signal_status summary=\"$signal_summary\""
1085
1736
 
1737
+ # Read us_id early for EXEC logging (also used later in verify branch)
1738
+ local signal_us_id_early=""
1739
+ signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1740
+ log_debug "[EXEC] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
1741
+
1086
1742
  case "$signal_status" in
1087
1743
  continue)
1088
1744
  # --- governance.md s7 step 6: continue -> go to step 8 ---
@@ -1091,52 +1747,113 @@ main() {
1091
1747
  ;;
1092
1748
  verify)
1093
1749
  # --- governance.md s7 step 7: Execute Verifier ---
1094
- log " Worker claims done. Dispatching Verifier..."
1095
-
1096
- write_verifier_trigger "$ITERATION"
1097
- local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1750
+ # Read us_id from signal for per-US scoping
1751
+ local signal_us_id=""
1752
+ signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
1753
+ log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
1098
1754
 
1099
1755
  update_status "verifier" "running"
1100
1756
 
1101
- # Step 7a: Clean previous Verifier session if claude is running
1102
- local verifier_cmd
1103
- verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1104
- if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" ]]; then
1105
- tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1106
- sleep 0.5
1107
- tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1108
- sleep 2
1109
- wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1110
- fi
1111
-
1112
- local verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1113
- log " Launching Verifier claude in pane $VERIFIER_PANE..."
1114
- tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1115
- tmux send-keys -t "$VERIFIER_PANE" Enter
1116
-
1117
- # Step 7b: Wait for claude TUI to be ready
1118
- if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1119
- log_error "Verifier claude failed to start"
1120
- update_status "verifier" "start_failed"
1121
- continue
1757
+ # --- Consensus scope check ---
1758
+ local use_consensus=0
1759
+ if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
1760
+ case "$CONSENSUS_SCOPE" in
1761
+ all) use_consensus=1 ;;
1762
+ final-only) [[ "$signal_us_id" == "ALL" ]] && use_consensus=1 ;;
1763
+ esac
1122
1764
  fi
1123
1765
 
1124
- # Step 7c: Wait for claude to fully initialize, then send instruction
1125
- sleep 3
1126
- local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1127
- safe_send_keys "$VERIFIER_PANE" "$verifier_instruction"
1128
- # Extra C-m to ensure submission
1129
- sleep 0.5
1130
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1131
- sleep 0.3
1132
- tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1133
-
1134
- # Poll for verify-verdict.json
1135
- log " Polling for verify-verdict.json..."
1136
- if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
1137
- log_error "Verifier poll failed"
1138
- update_status "verifier" "poll_failed"
1139
- continue
1766
+ # --- Consensus vs single verification ---
1767
+ if (( use_consensus )); then
1768
+ # US-004: Run consensus verification (claude + codex sequentially)
1769
+ local consensus_rc=0
1770
+ run_consensus_verification "$ITERATION" || consensus_rc=$?
1771
+
1772
+ if (( consensus_rc == 2 )); then
1773
+ # Consensus disagreement — treat as fail, fix loop will handle
1774
+ log " Consensus disagreement, treating as fail."
1775
+ elif (( consensus_rc != 0 )); then
1776
+ # Consensus verification failed entirely
1777
+ log_error "Consensus verification failed"
1778
+ write_blocked_sentinel "Consensus verification failed after max rounds"
1779
+ update_status "blocked" "consensus_failed"
1780
+ return 1
1781
+ fi
1782
+ else
1783
+ # Standard single-engine verification
1784
+ write_verifier_trigger "$ITERATION"
1785
+ local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
1786
+
1787
+ # Step 7a: Clean previous Verifier session if running
1788
+ local verifier_cmd
1789
+ verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
1790
+ if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
1791
+ tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
1792
+ sleep 0.5
1793
+ tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
1794
+ sleep 2
1795
+ wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
1796
+ fi
1797
+
1798
+ local verifier_launch
1799
+ if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
1800
+ verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
1801
+ log " Launching Verifier codex in pane $VERIFIER_PANE..."
1802
+ else
1803
+ verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
1804
+ log " Launching Verifier claude in pane $VERIFIER_PANE..."
1805
+ fi
1806
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
1807
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1808
+ log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
1809
+
1810
+ # Step 7b: Wait for TUI to be ready
1811
+ if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
1812
+ log_error "Verifier failed to start"
1813
+ update_status "verifier" "start_failed"
1814
+ continue
1815
+ fi
1816
+
1817
+ # Step 7c: Send instruction
1818
+ sleep 3
1819
+ local verifier_instruction="Read and execute the instructions in $verifier_prompt"
1820
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1821
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1822
+ log_debug "Verifier instruction sent directly"
1823
+
1824
+ # Verify verifier actually started working
1825
+ local vs_submit=0
1826
+ while (( vs_submit < 15 )); do
1827
+ sleep 2
1828
+ local vs_check
1829
+ vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
1830
+ if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
1831
+ log_debug "Verifier started working after $((vs_submit + 1)) checks"
1832
+ break
1833
+ fi
1834
+ # After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
1835
+ if (( vs_submit == 8 )); then
1836
+ log_debug "Adaptive instruction retry: clearing line and re-typing"
1837
+ tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
1838
+ sleep 0.1
1839
+ tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
1840
+ tmux send-keys -t "$VERIFIER_PANE" Enter
1841
+ fi
1842
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1843
+ sleep 0.3
1844
+ tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
1845
+ (( vs_submit++ ))
1846
+ done
1847
+
1848
+ # Poll for verify-verdict.json
1849
+ log " Polling for verify-verdict.json..."
1850
+ if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
1851
+ log_error "Verifier poll failed"
1852
+ update_status "verifier" "poll_failed"
1853
+ # Verifier is dead/stuck — kill and replace pane (omc-teams pattern)
1854
+ VERIFIER_PANE=$(replace_worker_pane "$VERIFIER_PANE" "verifier")
1855
+ continue
1856
+ fi
1140
1857
  fi
1141
1858
 
1142
1859
  # --- governance.md s7 step 7: Read verdict via jq ---
@@ -1149,12 +1866,28 @@ main() {
1149
1866
 
1150
1867
  log " Verifier: verdict=$verdict recommended=$recommended"
1151
1868
  log " Verifier summary: \"$verdict_summary\""
1869
+ local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
1870
+ log_debug "[EXEC] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
1152
1871
 
1153
1872
  case "$verdict" in
1154
1873
  pass)
1155
1874
  CONSECUTIVE_FAILURES=0
1156
- if [[ "$recommended" == "complete" ]]; then
1157
- # Write COMPLETE sentinel (only Leader writes sentinels)
1875
+ CONSENSUS_ROUND=0
1876
+
1877
+ # --- Per-US tracking ---
1878
+ if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
1879
+ # Add this US to verified list
1880
+ if [[ -n "$VERIFIED_US" ]]; then
1881
+ VERIFIED_US="${VERIFIED_US},${signal_us_id}"
1882
+ else
1883
+ VERIFIED_US="$signal_us_id"
1884
+ fi
1885
+ log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
1886
+ log_debug "[EXEC] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
1887
+ update_status "verifier" "pass_us"
1888
+ # Worker will do next US on next iteration
1889
+ elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
1890
+ # Final full verify passed or complete recommended
1158
1891
  write_complete_sentinel "$verdict_summary"
1159
1892
  update_status "complete" "pass"
1160
1893
  return 0
@@ -1185,9 +1918,11 @@ main() {
1185
1918
  jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
1186
1919
  } | atomic_write "$fix_contract"
1187
1920
  log " Fix contract: $fix_contract"
1921
+ log_debug "[EXEC] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
1188
1922
 
1189
1923
  # Circuit breaker: consecutive failures
1190
1924
  if (( CONSECUTIVE_FAILURES >= 3 )); then
1925
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"3 consecutive verification failures\""
1191
1926
  log_error "Circuit breaker: 3 consecutive verification failures"
1192
1927
  write_blocked_sentinel "3 consecutive verification failures"
1193
1928
  update_status "blocked" "consecutive_failures"
@@ -1233,6 +1968,7 @@ main() {
1233
1968
 
1234
1969
  # --- governance.md s7 step 8: Circuit breaker - stale context check ---
1235
1970
  if ! check_stale_context; then
1971
+ log_debug "[EXEC] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
1236
1972
  write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
1237
1973
  update_status "blocked" "stale_context"
1238
1974
  return 1