@ai-dev-methodologies/rlp-desk 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -0
- package/docs/protocol-reference.md +90 -3
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +114 -10
- package/src/governance.md +87 -10
- package/src/scripts/init_ralph_desk.zsh +22 -6
- package/src/scripts/run_ralph_desk.zsh +834 -98
|
@@ -29,7 +29,18 @@ set -uo pipefail
|
|
|
29
29
|
# IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
|
|
30
30
|
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
31
|
#
|
|
32
|
+
# Per-role codex config:
|
|
33
|
+
# WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.4)
|
|
34
|
+
# WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
|
|
35
|
+
# VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.4)
|
|
36
|
+
# VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
|
|
37
|
+
#
|
|
38
|
+
# Consensus scope:
|
|
39
|
+
# CONSENSUS_SCOPE - when consensus applies (default: all)
|
|
40
|
+
# all=every verify, final-only=final ALL only
|
|
41
|
+
#
|
|
32
42
|
# Dependencies: tmux, claude CLI, jq
|
|
43
|
+
# Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
|
|
33
44
|
# =============================================================================
|
|
34
45
|
|
|
35
46
|
# --- Environment Variables ---
|
|
@@ -45,6 +56,20 @@ MAX_RESTARTS="${MAX_RESTARTS:-3}"
|
|
|
45
56
|
IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
|
|
46
57
|
MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
47
58
|
|
|
59
|
+
# --- Engine Selection ---
|
|
60
|
+
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
61
|
+
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
|
|
62
|
+
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
|
|
63
|
+
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
64
|
+
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
|
|
65
|
+
VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
|
|
66
|
+
CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
67
|
+
|
|
68
|
+
# --- Verify Mode ---
|
|
69
|
+
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
70
|
+
VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
|
|
71
|
+
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
|
|
72
|
+
|
|
48
73
|
# --- Derived Paths ---
|
|
49
74
|
DESK="$ROOT/.claude/ralph-desk"
|
|
50
75
|
PROMPTS_DIR="$DESK/prompts"
|
|
@@ -80,6 +105,9 @@ CONSECUTIVE_FAILURES=0
|
|
|
80
105
|
PREV_CONTEXT_HASH=""
|
|
81
106
|
ITERATION=0
|
|
82
107
|
START_TIME=$(date +%s)
|
|
108
|
+
VERIFIED_US="" # comma-separated list of verified US IDs (per-us mode)
|
|
109
|
+
CONSENSUS_ROUND=0 # current consensus round for current US
|
|
110
|
+
US_LIST="" # comma-separated US IDs from PRD (per-us mode)
|
|
83
111
|
|
|
84
112
|
# =============================================================================
|
|
85
113
|
# Utility Functions
|
|
@@ -112,6 +140,31 @@ atomic_write() {
|
|
|
112
140
|
mv "$tmp" "$target"
|
|
113
141
|
}
|
|
114
142
|
|
|
143
|
+
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
144
|
+
replace_worker_pane() {
|
|
145
|
+
local old_pane="$1"
|
|
146
|
+
local role="$2" # "worker" or "verifier"
|
|
147
|
+
|
|
148
|
+
log " Replacing dead $role pane $old_pane..."
|
|
149
|
+
tmux kill-pane -t "$old_pane" 2>/dev/null
|
|
150
|
+
|
|
151
|
+
# Create fresh pane via split-window off leader (omc-teams kill-and-replace pattern)
|
|
152
|
+
local new_pane
|
|
153
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
154
|
+
|
|
155
|
+
log " New $role pane: $new_pane (replaced $old_pane)"
|
|
156
|
+
log_debug "[EXEC] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
|
|
157
|
+
|
|
158
|
+
# Update session-config.json with new pane ID
|
|
159
|
+
if [[ -f "$SESSION_CONFIG" ]]; then
|
|
160
|
+
jq --arg role "$role" --arg pane "$new_pane" \
|
|
161
|
+
'.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
|
|
162
|
+
log_debug "Updated session-config.json: $role pane → $new_pane"
|
|
163
|
+
fi
|
|
164
|
+
|
|
165
|
+
echo "$new_pane"
|
|
166
|
+
}
|
|
167
|
+
|
|
115
168
|
# =============================================================================
|
|
116
169
|
# Dependency Checks
|
|
117
170
|
# =============================================================================
|
|
@@ -135,6 +188,19 @@ check_dependencies() {
|
|
|
135
188
|
missing=1
|
|
136
189
|
fi
|
|
137
190
|
|
|
191
|
+
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
192
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
193
|
+
if ! command -v codex >/dev/null 2>&1; then
|
|
194
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
195
|
+
log_error "codex CLI is required for consensus verification (VERIFY_CONSENSUS=1)."
|
|
196
|
+
else
|
|
197
|
+
log_error "codex CLI is required when WORKER_ENGINE or VERIFIER_ENGINE is 'codex'."
|
|
198
|
+
fi
|
|
199
|
+
log_error "Install with: npm install -g @openai/codex"
|
|
200
|
+
missing=1
|
|
201
|
+
fi
|
|
202
|
+
fi
|
|
203
|
+
|
|
138
204
|
if (( missing )); then
|
|
139
205
|
exit 1
|
|
140
206
|
fi
|
|
@@ -142,6 +208,12 @@ check_dependencies() {
|
|
|
142
208
|
# Resolve full path to claude binary for reliable launches
|
|
143
209
|
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "claude")
|
|
144
210
|
log " Claude binary: $CLAUDE_BIN"
|
|
211
|
+
|
|
212
|
+
# Resolve codex binary if needed
|
|
213
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
214
|
+
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
215
|
+
log " Codex binary: $CODEX_BIN"
|
|
216
|
+
fi
|
|
145
217
|
}
|
|
146
218
|
|
|
147
219
|
# =============================================================================
|
|
@@ -248,6 +320,19 @@ create_session() {
|
|
|
248
320
|
"worker": "'"$WORKER_MODEL"'",
|
|
249
321
|
"verifier": "'"$VERIFIER_MODEL"'"
|
|
250
322
|
},
|
|
323
|
+
"engines": {
|
|
324
|
+
"worker": "'"$WORKER_ENGINE"'",
|
|
325
|
+
"verifier": "'"$VERIFIER_ENGINE"'",
|
|
326
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
327
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
328
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
329
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
|
|
330
|
+
},
|
|
331
|
+
"verification": {
|
|
332
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
333
|
+
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
334
|
+
"consensus_scope": "'"$CONSENSUS_SCOPE"'"
|
|
335
|
+
},
|
|
251
336
|
"config": {
|
|
252
337
|
"max_iter": '"$MAX_ITER"',
|
|
253
338
|
"poll_interval": '"$POLL_INTERVAL"',
|
|
@@ -305,10 +390,19 @@ safe_send_keys() {
|
|
|
305
390
|
log_debug " Trust prompt detected, dismissing"
|
|
306
391
|
tmux send-keys -t "$pane_id" C-m
|
|
307
392
|
sleep 0.12
|
|
308
|
-
|
|
393
|
+
fi
|
|
394
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
395
|
+
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
396
|
+
log_debug " Permission prompt detected, auto-approving"
|
|
397
|
+
tmux send-keys -t "$pane_id" Enter
|
|
398
|
+
sleep 0.3
|
|
399
|
+
fi
|
|
400
|
+
# Auto-dismiss codex update prompt (select Skip)
|
|
401
|
+
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
402
|
+
log_debug " Codex update prompt detected, selecting Skip"
|
|
403
|
+
tmux send-keys -t "$pane_id" "2" Enter
|
|
309
404
|
sleep 0.2
|
|
310
405
|
fi
|
|
311
|
-
|
|
312
406
|
# Send text in literal mode with -- separator
|
|
313
407
|
log_debug " Sending text to pane $pane_id (${#text} chars)"
|
|
314
408
|
tmux send-keys -t "$pane_id" -l -- "$text"
|
|
@@ -407,6 +501,22 @@ wait_for_pane_ready() {
|
|
|
407
501
|
continue
|
|
408
502
|
fi
|
|
409
503
|
|
|
504
|
+
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
505
|
+
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
506
|
+
log " Permission prompt detected, auto-approving..."
|
|
507
|
+
tmux send-keys -t "$pane_id" Enter
|
|
508
|
+
sleep 0.5
|
|
509
|
+
continue
|
|
510
|
+
fi
|
|
511
|
+
|
|
512
|
+
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
513
|
+
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
514
|
+
log " Codex update prompt detected, selecting Skip..."
|
|
515
|
+
tmux send-keys -t "$pane_id" "2" Enter
|
|
516
|
+
sleep 0.5
|
|
517
|
+
continue
|
|
518
|
+
fi
|
|
519
|
+
|
|
410
520
|
# tmux paneLooksReady: check each line for prompt char at line start
|
|
411
521
|
local ready=0
|
|
412
522
|
echo "$captured" | while IFS= read -r line; do
|
|
@@ -529,8 +639,12 @@ restart_worker() {
|
|
|
529
639
|
tmux send-keys -t "$pane_id" "/exit" Enter 2>/dev/null
|
|
530
640
|
sleep 2
|
|
531
641
|
|
|
532
|
-
# Re-launch
|
|
533
|
-
|
|
642
|
+
# Re-launch worker (tmux interactive pattern)
|
|
643
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
644
|
+
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
645
|
+
else
|
|
646
|
+
safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
647
|
+
fi
|
|
534
648
|
WORKER_RESTARTS[$iter]=$((restart_count + 1))
|
|
535
649
|
return 0
|
|
536
650
|
}
|
|
@@ -575,9 +689,64 @@ write_worker_trigger() {
|
|
|
575
689
|
echo ""
|
|
576
690
|
cat "$fix_contract_file"
|
|
577
691
|
fi
|
|
692
|
+
|
|
693
|
+
# Per-US mode: tell Worker exactly which US to work on
|
|
694
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$US_LIST" ]]; then
|
|
695
|
+
# Find next unverified US
|
|
696
|
+
local next_us=""
|
|
697
|
+
for us in $(echo "$US_LIST" | tr ',' ' '); do
|
|
698
|
+
if ! echo ",$VERIFIED_US," | grep -q ",$us,"; then
|
|
699
|
+
next_us="$us"
|
|
700
|
+
break
|
|
701
|
+
fi
|
|
702
|
+
done
|
|
703
|
+
|
|
704
|
+
if [[ -n "$next_us" ]]; then
|
|
705
|
+
echo ""
|
|
706
|
+
echo "---"
|
|
707
|
+
echo "## PER-US SCOPE LOCK (this iteration)"
|
|
708
|
+
echo "You MUST implement ONLY **${next_us}** in this iteration."
|
|
709
|
+
echo "Do NOT implement any other user stories."
|
|
710
|
+
echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
|
|
711
|
+
echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
|
|
712
|
+
elif [[ -n "$VERIFIED_US" ]]; then
|
|
713
|
+
# All individual US verified — this is the final full verify iteration
|
|
714
|
+
echo ""
|
|
715
|
+
echo "---"
|
|
716
|
+
echo "## FINAL VERIFICATION ITERATION"
|
|
717
|
+
echo "All individual US have been verified: $VERIFIED_US"
|
|
718
|
+
echo "Run all tests and verification commands to confirm everything works together."
|
|
719
|
+
echo "Signal verify with us_id=\"ALL\" for the final full verification."
|
|
720
|
+
fi
|
|
721
|
+
elif [[ "$VERIFY_MODE" = "batch" ]]; then
|
|
722
|
+
echo ""
|
|
723
|
+
echo "---"
|
|
724
|
+
echo "## BATCH MODE OVERRIDE"
|
|
725
|
+
echo "Ignore any per-US signal instructions above. In batch mode:"
|
|
726
|
+
echo "- Implement ALL user stories in this iteration"
|
|
727
|
+
echo '- Signal verify with us_id="ALL" only when ALL stories are complete'
|
|
728
|
+
echo "- Do NOT signal verify after individual stories"
|
|
729
|
+
fi
|
|
578
730
|
} | atomic_write "$prompt_file"
|
|
579
731
|
|
|
580
732
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
733
|
+
# Engine-specific launch command (expanded at write time)
|
|
734
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
735
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL \\
|
|
736
|
+
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
737
|
+
--dangerously-bypass-approvals-and-sandbox \\
|
|
738
|
+
\"\$(cat $prompt_file)\" \\
|
|
739
|
+
2>&1 | tee $output_log"
|
|
740
|
+
local engine_comment="# Run codex with fresh context (governance.md s7 step 5)"
|
|
741
|
+
else
|
|
742
|
+
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
743
|
+
--model $WORKER_MODEL \\
|
|
744
|
+
--dangerously-skip-permissions \\
|
|
745
|
+
--output-format text \\
|
|
746
|
+
2>&1 | tee $output_log"
|
|
747
|
+
local engine_comment="# Run claude with fresh context (governance.md s7 step 5)"
|
|
748
|
+
fi
|
|
749
|
+
|
|
581
750
|
{
|
|
582
751
|
cat <<TRIGGER_EOF
|
|
583
752
|
#!/bin/zsh
|
|
@@ -596,12 +765,8 @@ HEARTBEAT_FILE="$WORKER_HEARTBEAT"
|
|
|
596
765
|
) &
|
|
597
766
|
HEARTBEAT_PID=\$!
|
|
598
767
|
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
--model $WORKER_MODEL \\
|
|
602
|
-
--dangerously-skip-permissions \\
|
|
603
|
-
--output-format text \\
|
|
604
|
-
2>&1 | tee $output_log
|
|
768
|
+
$engine_comment
|
|
769
|
+
$engine_cmd
|
|
605
770
|
|
|
606
771
|
# Cleanup heartbeat writer
|
|
607
772
|
kill \$HEARTBEAT_PID 2>/dev/null
|
|
@@ -618,11 +783,20 @@ TRIGGER_EOF
|
|
|
618
783
|
|
|
619
784
|
write_verifier_trigger() {
|
|
620
785
|
local iter="$1"
|
|
621
|
-
local
|
|
622
|
-
local
|
|
623
|
-
local
|
|
786
|
+
local verifier_engine="${2:-$VERIFIER_ENGINE}" # allow override for consensus
|
|
787
|
+
local verifier_model="${3:-$VERIFIER_MODEL}"
|
|
788
|
+
local suffix="${4:-}" # optional suffix for consensus (e.g., "-claude", "-codex")
|
|
789
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
790
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
791
|
+
local output_log="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-output.log"
|
|
792
|
+
|
|
793
|
+
# Read us_id from iter-signal.json for per-US scoping
|
|
794
|
+
local us_id=""
|
|
795
|
+
if [[ -f "$SIGNAL_FILE" ]]; then
|
|
796
|
+
us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
797
|
+
fi
|
|
624
798
|
|
|
625
|
-
# Build verifier prompt from base
|
|
799
|
+
# Build verifier prompt from base with US scope
|
|
626
800
|
{
|
|
627
801
|
cat "$VERIFIER_PROMPT_BASE"
|
|
628
802
|
echo ""
|
|
@@ -630,13 +804,40 @@ write_verifier_trigger() {
|
|
|
630
804
|
echo "## Verification Context"
|
|
631
805
|
echo "- **Iteration**: $iter"
|
|
632
806
|
echo "- **Done Claim**: $DONE_CLAIM_FILE"
|
|
807
|
+
echo "- **Verify Mode**: $VERIFY_MODE"
|
|
808
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$us_id" ]]; then
|
|
809
|
+
if [[ "$us_id" = "ALL" ]]; then
|
|
810
|
+
echo "- **Scope**: FINAL FULL VERIFY — check ALL acceptance criteria from the PRD"
|
|
811
|
+
echo "- **Previously verified US**: $VERIFIED_US"
|
|
812
|
+
else
|
|
813
|
+
echo "- **Scope**: Verify ONLY the acceptance criteria for **${us_id}**"
|
|
814
|
+
echo "- **Previously verified US**: $VERIFIED_US"
|
|
815
|
+
fi
|
|
816
|
+
fi
|
|
633
817
|
} | atomic_write "$prompt_file"
|
|
634
818
|
|
|
635
819
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
820
|
+
# Engine-specific launch command (expanded at write time)
|
|
821
|
+
if [[ "$verifier_engine" = "codex" ]]; then
|
|
822
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
|
|
823
|
+
-c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
|
|
824
|
+
--dangerously-bypass-approvals-and-sandbox \\
|
|
825
|
+
\"\$(cat $prompt_file)\" \\
|
|
826
|
+
2>&1 | tee $output_log"
|
|
827
|
+
local engine_comment="# Run codex with fresh context (governance.md s7 step 7)"
|
|
828
|
+
else
|
|
829
|
+
local engine_cmd="$CLAUDE_BIN -p \"\$(cat $prompt_file)\" \\
|
|
830
|
+
--model $verifier_model \\
|
|
831
|
+
--dangerously-skip-permissions \\
|
|
832
|
+
--output-format text \\
|
|
833
|
+
2>&1 | tee $output_log"
|
|
834
|
+
local engine_comment="# Run claude with fresh context (governance.md s7 step 7)"
|
|
835
|
+
fi
|
|
836
|
+
|
|
636
837
|
{
|
|
637
838
|
cat <<TRIGGER_EOF
|
|
638
839
|
#!/bin/zsh
|
|
639
|
-
# Trigger for iteration $iter verifier - generated by run_ralph_desk.zsh
|
|
840
|
+
# Trigger for iteration $iter verifier${suffix} - generated by run_ralph_desk.zsh
|
|
640
841
|
# DO NOT use exec here -- it breaks heartbeat cleanup
|
|
641
842
|
|
|
642
843
|
HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
@@ -651,12 +852,8 @@ HEARTBEAT_FILE="$VERIFIER_HEARTBEAT"
|
|
|
651
852
|
) &
|
|
652
853
|
HEARTBEAT_PID=\$!
|
|
653
854
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
--model $VERIFIER_MODEL \\
|
|
657
|
-
--dangerously-skip-permissions \\
|
|
658
|
-
--output-format text \\
|
|
659
|
-
2>&1 | tee $output_log
|
|
855
|
+
$engine_comment
|
|
856
|
+
$engine_cmd
|
|
660
857
|
|
|
661
858
|
# Cleanup heartbeat writer
|
|
662
859
|
kill \$HEARTBEAT_PID 2>/dev/null
|
|
@@ -680,6 +877,22 @@ update_status() {
|
|
|
680
877
|
local phase="$1"
|
|
681
878
|
local last_result="$2"
|
|
682
879
|
|
|
880
|
+
# Build verified_us as JSON array
|
|
881
|
+
local verified_us_json="[]"
|
|
882
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
883
|
+
verified_us_json=$(echo "$VERIFIED_US" | tr ',' '\n' | jq -R . | jq -s .)
|
|
884
|
+
fi
|
|
885
|
+
|
|
886
|
+
# Build consensus fields
|
|
887
|
+
local consensus_json=""
|
|
888
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
889
|
+
consensus_json=',
|
|
890
|
+
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
891
|
+
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
892
|
+
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
893
|
+
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
894
|
+
fi
|
|
895
|
+
|
|
683
896
|
echo '{
|
|
684
897
|
"slug": "'"$SLUG"'",
|
|
685
898
|
"iteration": '"$ITERATION"',
|
|
@@ -687,8 +900,17 @@ update_status() {
|
|
|
687
900
|
"phase": "'"$phase"'",
|
|
688
901
|
"worker_model": "'"$WORKER_MODEL"'",
|
|
689
902
|
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
903
|
+
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
904
|
+
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
905
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
906
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
907
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
908
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
909
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
910
|
+
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
690
911
|
"last_result": "'"$last_result"'",
|
|
691
912
|
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
913
|
+
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
692
914
|
"updated_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"
|
|
693
915
|
}' | atomic_write "$STATUS_FILE"
|
|
694
916
|
}
|
|
@@ -753,6 +975,9 @@ Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$BLOCKED_SENTINEL"
|
|
|
753
975
|
cleanup() {
|
|
754
976
|
log "Cleaning up..."
|
|
755
977
|
|
|
978
|
+
# Remove lockfile
|
|
979
|
+
rm -f "$DESK/logs/.rlp-desk-$SLUG.lock" 2>/dev/null
|
|
980
|
+
|
|
756
981
|
# Kill claude processes then kill panes
|
|
757
982
|
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
758
983
|
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
@@ -764,10 +989,14 @@ cleanup() {
|
|
|
764
989
|
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
765
990
|
fi
|
|
766
991
|
sleep 2
|
|
767
|
-
# Kill
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
992
|
+
# Kill panes on completion
|
|
993
|
+
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
994
|
+
tmux kill-pane -t "$WORKER_PANE" 2>/dev/null
|
|
995
|
+
fi
|
|
996
|
+
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
997
|
+
tmux kill-pane -t "$VERIFIER_PANE" 2>/dev/null
|
|
998
|
+
fi
|
|
999
|
+
log " Panes cleaned up."
|
|
771
1000
|
|
|
772
1001
|
# Remove any leftover tmp files (setopt nonomatch to avoid zsh glob errors)
|
|
773
1002
|
setopt local_options nonomatch 2>/dev/null
|
|
@@ -780,6 +1009,70 @@ cleanup() {
|
|
|
780
1009
|
local minutes=$(( elapsed / 60 ))
|
|
781
1010
|
local seconds=$(( elapsed % 60 ))
|
|
782
1011
|
|
|
1012
|
+
local final_status="UNKNOWN"
|
|
1013
|
+
if [[ -f "$COMPLETE_SENTINEL" ]]; then final_status="COMPLETE"
|
|
1014
|
+
elif [[ -f "$BLOCKED_SENTINEL" ]]; then final_status="BLOCKED"
|
|
1015
|
+
else final_status="TIMEOUT"; fi
|
|
1016
|
+
|
|
1017
|
+
if (( DEBUG )); then
|
|
1018
|
+
local end_ts=$(date +%s)
|
|
1019
|
+
local elapsed=$((end_ts - START_TIME))
|
|
1020
|
+
|
|
1021
|
+
log_debug "[EXEC] final status=$final_status iterations=$ITERATION elapsed=${elapsed}s"
|
|
1022
|
+
|
|
1023
|
+
# --- Validation ---
|
|
1024
|
+
log_debug "[VALIDATE] === Execution Validation ==="
|
|
1025
|
+
|
|
1026
|
+
# 1. Did the correct verify mode run?
|
|
1027
|
+
log_debug "[VALIDATE] verify_mode=$VERIFY_MODE configured=true"
|
|
1028
|
+
|
|
1029
|
+
# 2. Per-US: were all US individually verified?
|
|
1030
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1031
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1032
|
+
local expected_us=""
|
|
1033
|
+
if [[ -f "$prd_file" ]]; then
|
|
1034
|
+
expected_us=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1035
|
+
fi
|
|
1036
|
+
local verified_count=$(echo "$VERIFIED_US" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1037
|
+
local expected_count=$(echo "$expected_us" | tr ',' '\n' | grep -c 'US-' 2>/dev/null || echo 0)
|
|
1038
|
+
|
|
1039
|
+
if [[ "$final_status" = "COMPLETE" ]]; then
|
|
1040
|
+
if (( verified_count >= expected_count )); then
|
|
1041
|
+
log_debug "[VALIDATE] per_us_coverage=PASS verified=$verified_count/$expected_count us=$VERIFIED_US"
|
|
1042
|
+
else
|
|
1043
|
+
log_debug "[VALIDATE] per_us_coverage=FAIL verified=$verified_count/$expected_count expected=$expected_us got=$VERIFIED_US"
|
|
1044
|
+
fi
|
|
1045
|
+
else
|
|
1046
|
+
log_debug "[VALIDATE] per_us_coverage=INCOMPLETE verified=$verified_count/$expected_count status=$final_status"
|
|
1047
|
+
fi
|
|
1048
|
+
fi
|
|
1049
|
+
|
|
1050
|
+
# 3. Consensus: were both engines used?
|
|
1051
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1052
|
+
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
1053
|
+
log_debug "[VALIDATE] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1054
|
+
else
|
|
1055
|
+
log_debug "[VALIDATE] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1056
|
+
fi
|
|
1057
|
+
fi
|
|
1058
|
+
|
|
1059
|
+
# 4. Engine match: did the configured engines actually run?
|
|
1060
|
+
local worker_dispatches=$(grep -c '\[EXEC\].*phase=worker.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1061
|
+
local verifier_dispatches=$(grep -c '\[EXEC\].*phase=verifier.*dispatched=true' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1062
|
+
log_debug "[VALIDATE] dispatches worker=$worker_dispatches verifier=$verifier_dispatches"
|
|
1063
|
+
|
|
1064
|
+
# 5. Fix loops: how many fix contracts were generated?
|
|
1065
|
+
local fix_count=$(grep -c '\[EXEC\].*phase=fix_loop' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1066
|
+
log_debug "[VALIDATE] fix_loops=$fix_count consecutive_failures=$CONSECUTIVE_FAILURES"
|
|
1067
|
+
|
|
1068
|
+
# 6. Circuit breakers: any triggered?
|
|
1069
|
+
local cb_count=$(grep -c '\[EXEC\].*circuit_breaker=' "$DEBUG_LOG" 2>/dev/null || echo 0)
|
|
1070
|
+
log_debug "[VALIDATE] circuit_breakers_triggered=$cb_count"
|
|
1071
|
+
|
|
1072
|
+
# 7. Overall result
|
|
1073
|
+
log_debug "[VALIDATE] result=$final_status iterations=$ITERATION elapsed=${elapsed}s verified_us=$VERIFIED_US"
|
|
1074
|
+
fi
|
|
1075
|
+
|
|
783
1076
|
echo ""
|
|
784
1077
|
echo "============================================================"
|
|
785
1078
|
echo " Ralph Desk Tmux Runner - Session Complete"
|
|
@@ -870,6 +1163,7 @@ poll_for_signal() {
|
|
|
870
1163
|
(( HEARTBEAT_STALE_COUNT++ ))
|
|
871
1164
|
# Circuit breaker: 3 consecutive heartbeat stale events
|
|
872
1165
|
if (( HEARTBEAT_STALE_COUNT >= 3 )); then
|
|
1166
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=heartbeat_stale detail=\"3 consecutive heartbeat stale events\""
|
|
873
1167
|
log_error "Circuit breaker: 3 consecutive heartbeat stale events"
|
|
874
1168
|
return 1
|
|
875
1169
|
fi
|
|
@@ -887,6 +1181,16 @@ poll_for_signal() {
|
|
|
887
1181
|
fi
|
|
888
1182
|
fi
|
|
889
1183
|
|
|
1184
|
+
# Auto-approve permission prompts during poll
|
|
1185
|
+
local poll_capture
|
|
1186
|
+
poll_capture=$(tmux capture-pane -t "$pane_id" -p 2>/dev/null)
|
|
1187
|
+
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1188
|
+
log " Permission prompt detected during poll, auto-approving..."
|
|
1189
|
+
log_debug "[EXEC] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1190
|
+
tmux send-keys -t "$pane_id" Enter
|
|
1191
|
+
sleep 0.5
|
|
1192
|
+
fi
|
|
1193
|
+
|
|
890
1194
|
# Idle pane nudging (tmux pattern)
|
|
891
1195
|
check_and_nudge_idle_pane "$pane_id" "nudge_count"
|
|
892
1196
|
|
|
@@ -926,6 +1230,243 @@ check_stale_context() {
|
|
|
926
1230
|
return 0
|
|
927
1231
|
}
|
|
928
1232
|
|
|
1233
|
+
# =============================================================================
|
|
1234
|
+
# Consensus Verification (run two verifiers sequentially in same pane)
|
|
1235
|
+
# =============================================================================
|
|
1236
|
+
|
|
1237
|
+
# --- US-004: Run a single verifier in the Verifier pane and poll for verdict ---
|
|
1238
|
+
run_single_verifier() {
|
|
1239
|
+
local iter="$1"
|
|
1240
|
+
local engine="$2" # claude|codex
|
|
1241
|
+
local model="$3" # model for this verifier
|
|
1242
|
+
local suffix="$4" # "-claude" or "-codex"
|
|
1243
|
+
local verdict_dest="$5" # where to copy the verdict file
|
|
1244
|
+
|
|
1245
|
+
# Write trigger for this engine
|
|
1246
|
+
write_verifier_trigger "$iter" "$engine" "$model" "$suffix"
|
|
1247
|
+
local trigger_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-trigger.sh"
|
|
1248
|
+
local prompt_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier${suffix}-prompt.md"
|
|
1249
|
+
|
|
1250
|
+
# Clean previous Verifier session
|
|
1251
|
+
local verifier_cmd
|
|
1252
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1253
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1254
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1255
|
+
sleep 0.5
|
|
1256
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1257
|
+
sleep 2
|
|
1258
|
+
fi
|
|
1259
|
+
# Always ensure clean shell state before launching new verifier
|
|
1260
|
+
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1261
|
+
# Clear pane to avoid residual text interference
|
|
1262
|
+
tmux send-keys -t "$VERIFIER_PANE" C-l 2>/dev/null
|
|
1263
|
+
sleep 0.5
|
|
1264
|
+
|
|
1265
|
+
# Remove previous verdict file
|
|
1266
|
+
rm -f "$VERDICT_FILE" 2>/dev/null
|
|
1267
|
+
|
|
1268
|
+
# Launch verifier
|
|
1269
|
+
if [[ "$engine" = "codex" ]]; then
|
|
1270
|
+
# Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
|
|
1271
|
+
local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1272
|
+
log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
|
|
1273
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
|
|
1274
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1275
|
+
log_debug "Verifier$suffix codex exec sent directly"
|
|
1276
|
+
else
|
|
1277
|
+
# Claude: use interactive TUI
|
|
1278
|
+
local verifier_launch="$CLAUDE_BIN --model $model --dangerously-skip-permissions"
|
|
1279
|
+
log " Launching $suffix verifier (claude) in pane $VERIFIER_PANE..."
|
|
1280
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1281
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1282
|
+
|
|
1283
|
+
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1284
|
+
log_error "Verifier$suffix failed to start"
|
|
1285
|
+
return 1
|
|
1286
|
+
fi
|
|
1287
|
+
|
|
1288
|
+
sleep 3
|
|
1289
|
+
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
1290
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1291
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1292
|
+
log_debug "Verifier$suffix instruction sent directly"
|
|
1293
|
+
|
|
1294
|
+
# Verify claude actually started working
|
|
1295
|
+
local v_submit=0
|
|
1296
|
+
while (( v_submit < 15 )); do
|
|
1297
|
+
sleep 2
|
|
1298
|
+
local v_check
|
|
1299
|
+
v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1300
|
+
if echo "$v_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut" 2>/dev/null; then
|
|
1301
|
+
log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
|
|
1302
|
+
break
|
|
1303
|
+
fi
|
|
1304
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1305
|
+
if (( v_submit == 8 )); then
|
|
1306
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1307
|
+
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1308
|
+
sleep 0.1
|
|
1309
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1310
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1311
|
+
fi
|
|
1312
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1313
|
+
sleep 0.3
|
|
1314
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1315
|
+
(( v_submit++ ))
|
|
1316
|
+
done
|
|
1317
|
+
fi
|
|
1318
|
+
|
|
1319
|
+
# Poll for verdict
|
|
1320
|
+
if [[ "$engine" = "codex" ]]; then
|
|
1321
|
+
# Codex exec: simple file poll (non-interactive, no heartbeat/nudge needed)
|
|
1322
|
+
log " Polling for verify-verdict.json ($suffix, codex exec)..."
|
|
1323
|
+
local codex_poll_start
|
|
1324
|
+
codex_poll_start=$(date +%s)
|
|
1325
|
+
while true; do
|
|
1326
|
+
if [[ -f "$VERDICT_FILE" ]]; then
|
|
1327
|
+
# Validate JSON
|
|
1328
|
+
if jq . "$VERDICT_FILE" >/dev/null 2>&1; then
|
|
1329
|
+
log " Verdict file detected: $VERDICT_FILE"
|
|
1330
|
+
break
|
|
1331
|
+
fi
|
|
1332
|
+
fi
|
|
1333
|
+
local codex_elapsed=$(( $(date +%s) - codex_poll_start ))
|
|
1334
|
+
if (( codex_elapsed >= ITER_TIMEOUT )); then
|
|
1335
|
+
log_error "Codex verifier$suffix timed out after ${ITER_TIMEOUT}s"
|
|
1336
|
+
return 1
|
|
1337
|
+
fi
|
|
1338
|
+
sleep "$POLL_INTERVAL"
|
|
1339
|
+
done
|
|
1340
|
+
else
|
|
1341
|
+
# Claude: use full poll_for_signal with heartbeat/nudge
|
|
1342
|
+
log " Polling for verify-verdict.json ($suffix)..."
|
|
1343
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier$suffix"; then
|
|
1344
|
+
log_error "Verifier$suffix poll failed"
|
|
1345
|
+
return 1
|
|
1346
|
+
fi
|
|
1347
|
+
fi
|
|
1348
|
+
|
|
1349
|
+
# Copy verdict to destination
|
|
1350
|
+
cp "$VERDICT_FILE" "$verdict_dest"
|
|
1351
|
+
log " Verifier$suffix verdict saved to $verdict_dest"
|
|
1352
|
+
return 0
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
1356
|
+
run_consensus_verification() {
|
|
1357
|
+
local iter="$1"
|
|
1358
|
+
local claude_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-claude.json"
|
|
1359
|
+
local codex_verdict_file="$LOGS_DIR/iter-$(printf '%03d' $iter).verify-verdict-codex.json"
|
|
1360
|
+
|
|
1361
|
+
CONSENSUS_ROUND=0
|
|
1362
|
+
CLAUDE_VERDICT=""
|
|
1363
|
+
CODEX_VERDICT=""
|
|
1364
|
+
|
|
1365
|
+
while (( CONSENSUS_ROUND < 3 )); do
|
|
1366
|
+
(( CONSENSUS_ROUND++ ))
|
|
1367
|
+
log " Consensus round $CONSENSUS_ROUND/3..."
|
|
1368
|
+
|
|
1369
|
+
# Run claude verifier first
|
|
1370
|
+
if ! run_single_verifier "$iter" "claude" "$VERIFIER_MODEL" "-claude" "$claude_verdict_file"; then
|
|
1371
|
+
log_error "Claude verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1372
|
+
return 1
|
|
1373
|
+
fi
|
|
1374
|
+
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1375
|
+
log_debug "[EXEC] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1376
|
+
|
|
1377
|
+
# Run codex verifier second
|
|
1378
|
+
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1379
|
+
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1380
|
+
return 1
|
|
1381
|
+
fi
|
|
1382
|
+
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1383
|
+
log_debug "[EXEC] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
1384
|
+
|
|
1385
|
+
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
1386
|
+
local _combined_action="retry"
|
|
1387
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then _combined_action="pass"
|
|
1388
|
+
elif (( CONSENSUS_ROUND >= 3 )); then _combined_action="blocked"
|
|
1389
|
+
fi
|
|
1390
|
+
log_debug "[EXEC] iter=$iter phase=consensus round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT combined_action=$_combined_action"
|
|
1391
|
+
|
|
1392
|
+
# Both pass → success
|
|
1393
|
+
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
1394
|
+
# Create merged verdict with per-engine details
|
|
1395
|
+
{
|
|
1396
|
+
echo '{'
|
|
1397
|
+
echo ' "verdict": "pass",'
|
|
1398
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1399
|
+
echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
|
|
1400
|
+
echo ' "recommended_state_transition": "complete",'
|
|
1401
|
+
echo ' "consensus": {'
|
|
1402
|
+
echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
|
|
1403
|
+
echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
|
|
1404
|
+
echo ' "round": '"$CONSENSUS_ROUND"
|
|
1405
|
+
echo ' }'
|
|
1406
|
+
echo '}'
|
|
1407
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1408
|
+
return 0
|
|
1409
|
+
fi
|
|
1410
|
+
|
|
1411
|
+
# Consensus disagreement
|
|
1412
|
+
log_debug "[EXEC] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
1413
|
+
|
|
1414
|
+
# Either fails → build combined fix contract
|
|
1415
|
+
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
1416
|
+
{
|
|
1417
|
+
echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
|
|
1418
|
+
echo ""
|
|
1419
|
+
echo "## Claude Verdict: $CLAUDE_VERDICT"
|
|
1420
|
+
if [[ "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1421
|
+
echo "### Claude Issues"
|
|
1422
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$claude_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1423
|
+
fi
|
|
1424
|
+
echo ""
|
|
1425
|
+
echo "## Codex Verdict: $CODEX_VERDICT"
|
|
1426
|
+
if [[ "$CODEX_VERDICT" = "fail" ]]; then
|
|
1427
|
+
echo "### Codex Issues"
|
|
1428
|
+
jq -r '.issues[]? | "- [\(.severity // "unknown")] \(.criterion // "?"): \(.description // "no description")\(if .fix_hint then " (hint: \(.fix_hint))" else "" end)"' "$codex_verdict_file" 2>/dev/null || echo "- (no structured issues)"
|
|
1429
|
+
fi
|
|
1430
|
+
echo ""
|
|
1431
|
+
echo "## Traceability"
|
|
1432
|
+
echo "Only changes that resolve a listed issue are allowed."
|
|
1433
|
+
} | atomic_write "$fix_contract"
|
|
1434
|
+
|
|
1435
|
+
log " Combined fix contract: $fix_contract"
|
|
1436
|
+
|
|
1437
|
+
# If this is not the last round, the caller will dispatch the Worker with the fix contract
|
|
1438
|
+
# For now, write a fail verdict so the main loop can handle the fix loop
|
|
1439
|
+
if (( CONSENSUS_ROUND < 3 )); then
|
|
1440
|
+
# Create a merged fail verdict for the main loop
|
|
1441
|
+
{
|
|
1442
|
+
echo '{'
|
|
1443
|
+
echo ' "verdict": "fail",'
|
|
1444
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1445
|
+
echo ' "summary": "Consensus disagreement (round '"$CONSENSUS_ROUND"'/3): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1446
|
+
echo ' "issues": [],'
|
|
1447
|
+
echo ' "recommended_state_transition": "continue",'
|
|
1448
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": '"$CONSENSUS_ROUND"' }'
|
|
1449
|
+
echo '}'
|
|
1450
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1451
|
+
return 2 # special return: consensus disagreement, needs retry
|
|
1452
|
+
fi
|
|
1453
|
+
done
|
|
1454
|
+
|
|
1455
|
+
# Max consensus rounds exceeded
|
|
1456
|
+
log_error "Consensus failed after 3 rounds"
|
|
1457
|
+
{
|
|
1458
|
+
echo '{'
|
|
1459
|
+
echo ' "verdict": "fail",'
|
|
1460
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1461
|
+
echo ' "summary": "Consensus failed after 3 rounds: claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'",'
|
|
1462
|
+
echo ' "issues": [],'
|
|
1463
|
+
echo ' "recommended_state_transition": "blocked",'
|
|
1464
|
+
echo ' "consensus": { "claude": "'"$CLAUDE_VERDICT"'", "codex": "'"$CODEX_VERDICT"'", "round": 3 }'
|
|
1465
|
+
echo '}'
|
|
1466
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1467
|
+
return 1
|
|
1468
|
+
}
|
|
1469
|
+
|
|
929
1470
|
# =============================================================================
|
|
930
1471
|
# Security Warning
|
|
931
1472
|
# =============================================================================
|
|
@@ -947,6 +1488,21 @@ print_security_warning() {
|
|
|
947
1488
|
# =============================================================================
|
|
948
1489
|
|
|
949
1490
|
main() {
|
|
1491
|
+
# --- Lockfile: prevent duplicate execution ---
|
|
1492
|
+
local lockfile="$DESK/logs/.rlp-desk-$SLUG.lock"
|
|
1493
|
+
mkdir -p "$(dirname "$lockfile")" 2>/dev/null
|
|
1494
|
+
if ! (set -C; echo $$ > "$lockfile") 2>/dev/null; then
|
|
1495
|
+
local lock_pid
|
|
1496
|
+
lock_pid=$(cat "$lockfile" 2>/dev/null)
|
|
1497
|
+
if kill -0 "$lock_pid" 2>/dev/null; then
|
|
1498
|
+
log_error "Another instance is already running (PID $lock_pid)"
|
|
1499
|
+
exit 1
|
|
1500
|
+
fi
|
|
1501
|
+
# Stale lock — overwrite
|
|
1502
|
+
echo $$ > "$lockfile"
|
|
1503
|
+
fi
|
|
1504
|
+
mkdir -p "$LOGS_DIR" 2>/dev/null
|
|
1505
|
+
|
|
950
1506
|
# --- Startup ---
|
|
951
1507
|
log "Ralph Desk Tmux Runner starting..."
|
|
952
1508
|
log " Slug: $SLUG"
|
|
@@ -954,8 +1510,50 @@ main() {
|
|
|
954
1510
|
log " Max iterations: $MAX_ITER"
|
|
955
1511
|
log " Worker model: $WORKER_MODEL"
|
|
956
1512
|
log " Verifier model: $VERIFIER_MODEL"
|
|
1513
|
+
log " Verify mode: $VERIFY_MODE"
|
|
1514
|
+
log " Verify consensus:$VERIFY_CONSENSUS"
|
|
1515
|
+
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
957
1516
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
958
1517
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
1518
|
+
# --- Debug: Log execution plan ---
|
|
1519
|
+
if (( DEBUG )); then
|
|
1520
|
+
# Extract US IDs from PRD
|
|
1521
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1522
|
+
local us_list=""
|
|
1523
|
+
if [[ -f "$prd_file" ]]; then
|
|
1524
|
+
us_list=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1525
|
+
fi
|
|
1526
|
+
local us_count=$(echo "$us_list" | tr ',' '\n' | grep -c 'US-')
|
|
1527
|
+
|
|
1528
|
+
log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1529
|
+
log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1530
|
+
log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1531
|
+
log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
|
|
1532
|
+
|
|
1533
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1534
|
+
# Build expected flow
|
|
1535
|
+
local expected_flow=""
|
|
1536
|
+
for us in $(echo "$us_list" | tr ',' ' '); do
|
|
1537
|
+
expected_flow="${expected_flow}worker->verify($us)->"
|
|
1538
|
+
done
|
|
1539
|
+
expected_flow="${expected_flow}verify(ALL)->COMPLETE"
|
|
1540
|
+
log_debug "[PLAN] expected_flow=$expected_flow"
|
|
1541
|
+
else
|
|
1542
|
+
log_debug "[PLAN] expected_flow=worker(all)->verify(ALL)->COMPLETE"
|
|
1543
|
+
fi
|
|
1544
|
+
|
|
1545
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1546
|
+
log_debug "[PLAN] consensus_flow=each_verify_runs_claude+codex_both_must_pass"
|
|
1547
|
+
fi
|
|
1548
|
+
fi
|
|
1549
|
+
|
|
1550
|
+
# Extract US list for per-US sequencing
|
|
1551
|
+
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1552
|
+
local prd_file="$DESK/plans/prd-$SLUG.md"
|
|
1553
|
+
if [[ -f "$prd_file" ]]; then
|
|
1554
|
+
US_LIST=$(grep -oE 'US-[0-9]+' "$prd_file" | sort -u | tr '\n' ',' | sed 's/,$//')
|
|
1555
|
+
fi
|
|
1556
|
+
fi
|
|
959
1557
|
|
|
960
1558
|
# Dependency checks
|
|
961
1559
|
check_dependencies
|
|
@@ -982,6 +1580,9 @@ main() {
|
|
|
982
1580
|
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
983
1581
|
log ""
|
|
984
1582
|
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
1583
|
+
local _iter_contract=""
|
|
1584
|
+
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
1585
|
+
log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
985
1586
|
|
|
986
1587
|
# --- governance.md s7 step 1: Check sentinels ---
|
|
987
1588
|
if [[ -f "$COMPLETE_SENTINEL" ]]; then
|
|
@@ -1021,12 +1622,19 @@ main() {
|
|
|
1021
1622
|
|
|
1022
1623
|
update_status "worker" "running"
|
|
1023
1624
|
|
|
1024
|
-
# --- governance.md s7 step 5: Execute Worker (interactive
|
|
1025
|
-
# Step 5a: Launch interactive
|
|
1026
|
-
local worker_launch
|
|
1027
|
-
|
|
1625
|
+
# --- governance.md s7 step 5: Execute Worker (interactive TUI, tmux pattern) ---
|
|
1626
|
+
# Step 5a: Launch interactive worker engine in Worker pane
|
|
1627
|
+
local worker_launch
|
|
1628
|
+
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1629
|
+
worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1630
|
+
log " Launching Worker codex in pane $WORKER_PANE..."
|
|
1631
|
+
else
|
|
1632
|
+
worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
1633
|
+
log " Launching Worker claude in pane $WORKER_PANE..."
|
|
1634
|
+
fi
|
|
1028
1635
|
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_launch"
|
|
1029
1636
|
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1637
|
+
log_debug "[EXEC] iter=$ITERATION phase=worker engine=$WORKER_ENGINE model=$WORKER_MODEL dispatched=true"
|
|
1030
1638
|
|
|
1031
1639
|
# Step 5b: Wait for claude TUI to be ready (tmux pattern)
|
|
1032
1640
|
if ! wait_for_pane_ready "$WORKER_PANE" 30; then
|
|
@@ -1036,39 +1644,82 @@ main() {
|
|
|
1036
1644
|
return 1
|
|
1037
1645
|
fi
|
|
1038
1646
|
|
|
1039
|
-
# Step 5c: Wait for claude to fully initialize, then send instruction
|
|
1647
|
+
# Step 5c: Wait for claude to fully initialize, then send instruction directly
|
|
1040
1648
|
sleep 3
|
|
1041
1649
|
local worker_instruction="Read and execute the instructions in $worker_prompt"
|
|
1042
|
-
|
|
1043
|
-
|
|
1650
|
+
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1651
|
+
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1652
|
+
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
1653
|
+
|
|
1654
|
+
# Verify claude actually started working — keep sending C-m until activity detected
|
|
1655
|
+
local submit_attempts=0
|
|
1656
|
+
while (( submit_attempts < 15 )); do
|
|
1657
|
+
sleep 2
|
|
1658
|
+
local pane_check
|
|
1659
|
+
pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
|
|
1660
|
+
if echo "$pane_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1661
|
+
log_debug "Worker started working after $((submit_attempts + 1)) submit checks"
|
|
1662
|
+
log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
1663
|
+
break
|
|
1664
|
+
fi
|
|
1665
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1666
|
+
if (( submit_attempts == 8 )); then
|
|
1667
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1668
|
+
tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
|
|
1669
|
+
sleep 0.1
|
|
1670
|
+
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1671
|
+
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1672
|
+
fi
|
|
1673
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1674
|
+
sleep 0.3
|
|
1675
|
+
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1676
|
+
(( submit_attempts++ ))
|
|
1677
|
+
done
|
|
1678
|
+
if (( submit_attempts >= 15 )); then
|
|
1679
|
+
log " WARNING: Could not confirm Worker started working after 15 attempts"
|
|
1680
|
+
log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=15"
|
|
1044
1681
|
fi
|
|
1045
|
-
# Extra C-m to ensure submission (long text may false-positive the consumed check)
|
|
1046
|
-
sleep 0.5
|
|
1047
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1048
|
-
sleep 0.3
|
|
1049
|
-
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1050
1682
|
|
|
1051
1683
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
1052
1684
|
log " Polling for iter-signal.json..."
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1685
|
+
local worker_poll_done=0
|
|
1686
|
+
while (( ! worker_poll_done )); do
|
|
1687
|
+
if poll_for_signal "$SIGNAL_FILE" "$WORKER_HEARTBEAT" "$WORKER_PANE" "$worker_launch" "Worker"; then
|
|
1688
|
+
worker_poll_done=1
|
|
1689
|
+
log_debug "[EXEC] iter=$ITERATION poll_signal_received=true"
|
|
1690
|
+
else
|
|
1691
|
+
# Check if Worker is still actively running (not stuck)
|
|
1692
|
+
local worker_cmd
|
|
1693
|
+
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1694
|
+
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
1695
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll..."
|
|
1696
|
+
log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd"
|
|
1697
|
+
log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
1698
|
+
update_status "worker" "slow"
|
|
1699
|
+
# Loop continues — re-poll same iteration
|
|
1700
|
+
else
|
|
1701
|
+
# Worker is truly dead/stuck
|
|
1702
|
+
(( MONITOR_FAILURE_COUNT++ ))
|
|
1703
|
+
log_debug "[EXEC] iter=$ITERATION monitor_failure=$MONITOR_FAILURE_COUNT/3"
|
|
1704
|
+
if (( MONITOR_FAILURE_COUNT >= 3 )); then
|
|
1705
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=monitor_failures detail=\"3 consecutive monitor failures\""
|
|
1706
|
+
write_blocked_sentinel "3 consecutive monitor failures (worker not active)"
|
|
1707
|
+
update_status "blocked" "monitor_failures"
|
|
1708
|
+
return 1
|
|
1709
|
+
fi
|
|
1710
|
+
log " WARNING: Worker poll failed (monitor failure $MONITOR_FAILURE_COUNT/3)"
|
|
1711
|
+
update_status "worker" "poll_failed"
|
|
1712
|
+
worker_poll_done=1 # exit poll loop, continue to next iteration
|
|
1713
|
+
log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
|
|
1714
|
+
# Worker is truly dead/stuck — kill and replace pane (omc-teams pattern)
|
|
1715
|
+
WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
|
|
1716
|
+
fi
|
|
1069
1717
|
fi
|
|
1070
|
-
|
|
1071
|
-
|
|
1718
|
+
done
|
|
1719
|
+
|
|
1720
|
+
if [[ ! -f "$SIGNAL_FILE" ]]; then
|
|
1721
|
+
log_debug "[EXEC] iter=$ITERATION no_signal_after_poll=true continuing"
|
|
1722
|
+
# No signal — monitor failure, go to next iteration
|
|
1072
1723
|
continue
|
|
1073
1724
|
fi
|
|
1074
1725
|
|
|
@@ -1083,6 +1734,11 @@ main() {
|
|
|
1083
1734
|
|
|
1084
1735
|
log " Worker signal: status=$signal_status summary=\"$signal_summary\""
|
|
1085
1736
|
|
|
1737
|
+
# Read us_id early for EXEC logging (also used later in verify branch)
|
|
1738
|
+
local signal_us_id_early=""
|
|
1739
|
+
signal_us_id_early=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1740
|
+
log_debug "[EXEC] iter=$ITERATION phase=worker_signal status=$signal_status us_id=${signal_us_id_early:-none} summary=\"$signal_summary\""
|
|
1741
|
+
|
|
1086
1742
|
case "$signal_status" in
|
|
1087
1743
|
continue)
|
|
1088
1744
|
# --- governance.md s7 step 6: continue -> go to step 8 ---
|
|
@@ -1091,52 +1747,113 @@ main() {
|
|
|
1091
1747
|
;;
|
|
1092
1748
|
verify)
|
|
1093
1749
|
# --- governance.md s7 step 7: Execute Verifier ---
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1750
|
+
# Read us_id from signal for per-US scoping
|
|
1751
|
+
local signal_us_id=""
|
|
1752
|
+
signal_us_id=$(jq -r '.us_id // empty' "$SIGNAL_FILE" 2>/dev/null)
|
|
1753
|
+
log " Worker claims done (us_id=${signal_us_id:-all}). Dispatching Verifier..."
|
|
1098
1754
|
|
|
1099
1755
|
update_status "verifier" "running"
|
|
1100
1756
|
|
|
1101
|
-
#
|
|
1102
|
-
local
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
sleep 2
|
|
1109
|
-
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
1110
|
-
fi
|
|
1111
|
-
|
|
1112
|
-
local verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1113
|
-
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
1114
|
-
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1115
|
-
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1116
|
-
|
|
1117
|
-
# Step 7b: Wait for claude TUI to be ready
|
|
1118
|
-
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1119
|
-
log_error "Verifier claude failed to start"
|
|
1120
|
-
update_status "verifier" "start_failed"
|
|
1121
|
-
continue
|
|
1757
|
+
# --- Consensus scope check ---
|
|
1758
|
+
local use_consensus=0
|
|
1759
|
+
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1760
|
+
case "$CONSENSUS_SCOPE" in
|
|
1761
|
+
all) use_consensus=1 ;;
|
|
1762
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && use_consensus=1 ;;
|
|
1763
|
+
esac
|
|
1122
1764
|
fi
|
|
1123
1765
|
|
|
1124
|
-
#
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1766
|
+
# --- Consensus vs single verification ---
|
|
1767
|
+
if (( use_consensus )); then
|
|
1768
|
+
# US-004: Run consensus verification (claude + codex sequentially)
|
|
1769
|
+
local consensus_rc=0
|
|
1770
|
+
run_consensus_verification "$ITERATION" || consensus_rc=$?
|
|
1771
|
+
|
|
1772
|
+
if (( consensus_rc == 2 )); then
|
|
1773
|
+
# Consensus disagreement — treat as fail, fix loop will handle
|
|
1774
|
+
log " Consensus disagreement, treating as fail."
|
|
1775
|
+
elif (( consensus_rc != 0 )); then
|
|
1776
|
+
# Consensus verification failed entirely
|
|
1777
|
+
log_error "Consensus verification failed"
|
|
1778
|
+
write_blocked_sentinel "Consensus verification failed after max rounds"
|
|
1779
|
+
update_status "blocked" "consensus_failed"
|
|
1780
|
+
return 1
|
|
1781
|
+
fi
|
|
1782
|
+
else
|
|
1783
|
+
# Standard single-engine verification
|
|
1784
|
+
write_verifier_trigger "$ITERATION"
|
|
1785
|
+
local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $ITERATION).verifier-prompt.md"
|
|
1786
|
+
|
|
1787
|
+
# Step 7a: Clean previous Verifier session if running
|
|
1788
|
+
local verifier_cmd
|
|
1789
|
+
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1790
|
+
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1791
|
+
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1792
|
+
sleep 0.5
|
|
1793
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" Enter 2>/dev/null
|
|
1794
|
+
sleep 2
|
|
1795
|
+
wait_for_pane_ready "$VERIFIER_PANE" 5 2>/dev/null || true
|
|
1796
|
+
fi
|
|
1797
|
+
|
|
1798
|
+
local verifier_launch
|
|
1799
|
+
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1800
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1801
|
+
log " Launching Verifier codex in pane $VERIFIER_PANE..."
|
|
1802
|
+
else
|
|
1803
|
+
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
1804
|
+
log " Launching Verifier claude in pane $VERIFIER_PANE..."
|
|
1805
|
+
fi
|
|
1806
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_launch"
|
|
1807
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1808
|
+
log_debug "[EXEC] iter=$ITERATION phase=verifier engine=$VERIFIER_ENGINE model=$VERIFIER_MODEL scope=${signal_us_id:-all} dispatched=true"
|
|
1809
|
+
|
|
1810
|
+
# Step 7b: Wait for TUI to be ready
|
|
1811
|
+
if ! wait_for_pane_ready "$VERIFIER_PANE" 30; then
|
|
1812
|
+
log_error "Verifier failed to start"
|
|
1813
|
+
update_status "verifier" "start_failed"
|
|
1814
|
+
continue
|
|
1815
|
+
fi
|
|
1816
|
+
|
|
1817
|
+
# Step 7c: Send instruction
|
|
1818
|
+
sleep 3
|
|
1819
|
+
local verifier_instruction="Read and execute the instructions in $verifier_prompt"
|
|
1820
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1821
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1822
|
+
log_debug "Verifier instruction sent directly"
|
|
1823
|
+
|
|
1824
|
+
# Verify verifier actually started working
|
|
1825
|
+
local vs_submit=0
|
|
1826
|
+
while (( vs_submit < 15 )); do
|
|
1827
|
+
sleep 2
|
|
1828
|
+
local vs_check
|
|
1829
|
+
vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
1830
|
+
if echo "$vs_check" | grep -qi "esc to interrupt\|thinking\|working\|kneading\|crunching\|clauding\|billowing\|brewing\|tinkering\|burrowing\|saut\|Exploring\|Running\|exec\|Explored" 2>/dev/null; then
|
|
1831
|
+
log_debug "Verifier started working after $((vs_submit + 1)) checks"
|
|
1832
|
+
break
|
|
1833
|
+
fi
|
|
1834
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1835
|
+
if (( vs_submit == 8 )); then
|
|
1836
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1837
|
+
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1838
|
+
sleep 0.1
|
|
1839
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1840
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1841
|
+
fi
|
|
1842
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1843
|
+
sleep 0.3
|
|
1844
|
+
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1845
|
+
(( vs_submit++ ))
|
|
1846
|
+
done
|
|
1847
|
+
|
|
1848
|
+
# Poll for verify-verdict.json
|
|
1849
|
+
log " Polling for verify-verdict.json..."
|
|
1850
|
+
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
1851
|
+
log_error "Verifier poll failed"
|
|
1852
|
+
update_status "verifier" "poll_failed"
|
|
1853
|
+
# Verifier is dead/stuck — kill and replace pane (omc-teams pattern)
|
|
1854
|
+
VERIFIER_PANE=$(replace_worker_pane "$VERIFIER_PANE" "verifier")
|
|
1855
|
+
continue
|
|
1856
|
+
fi
|
|
1140
1857
|
fi
|
|
1141
1858
|
|
|
1142
1859
|
# --- governance.md s7 step 7: Read verdict via jq ---
|
|
@@ -1149,12 +1866,28 @@ main() {
|
|
|
1149
1866
|
|
|
1150
1867
|
log " Verifier: verdict=$verdict recommended=$recommended"
|
|
1151
1868
|
log " Verifier summary: \"$verdict_summary\""
|
|
1869
|
+
local _issues_count=$(jq '.issues | length' "$VERDICT_FILE" 2>/dev/null || echo 0)
|
|
1870
|
+
log_debug "[EXEC] iter=$ITERATION phase=verdict engine=$VERIFIER_ENGINE verdict=$verdict recommended=$recommended us_id=${signal_us_id:-all} issues=$_issues_count"
|
|
1152
1871
|
|
|
1153
1872
|
case "$verdict" in
|
|
1154
1873
|
pass)
|
|
1155
1874
|
CONSECUTIVE_FAILURES=0
|
|
1156
|
-
|
|
1157
|
-
|
|
1875
|
+
CONSENSUS_ROUND=0
|
|
1876
|
+
|
|
1877
|
+
# --- Per-US tracking ---
|
|
1878
|
+
if [[ "$VERIFY_MODE" = "per-us" && -n "$signal_us_id" && "$signal_us_id" != "ALL" ]]; then
|
|
1879
|
+
# Add this US to verified list
|
|
1880
|
+
if [[ -n "$VERIFIED_US" ]]; then
|
|
1881
|
+
VERIFIED_US="${VERIFIED_US},${signal_us_id}"
|
|
1882
|
+
else
|
|
1883
|
+
VERIFIED_US="$signal_us_id"
|
|
1884
|
+
fi
|
|
1885
|
+
log " US $signal_us_id verified. Verified so far: $VERIFIED_US"
|
|
1886
|
+
log_debug "[EXEC] iter=$ITERATION verified_us_update=$signal_us_id verified_us_total=$VERIFIED_US"
|
|
1887
|
+
update_status "verifier" "pass_us"
|
|
1888
|
+
# Worker will do next US on next iteration
|
|
1889
|
+
elif [[ "$recommended" == "complete" || "$signal_us_id" == "ALL" ]]; then
|
|
1890
|
+
# Final full verify passed or complete recommended
|
|
1158
1891
|
write_complete_sentinel "$verdict_summary"
|
|
1159
1892
|
update_status "complete" "pass"
|
|
1160
1893
|
return 0
|
|
@@ -1185,9 +1918,11 @@ main() {
|
|
|
1185
1918
|
jq -r '.next_iteration_contract // "Fix the issues listed above."' "$VERDICT_FILE" 2>/dev/null
|
|
1186
1919
|
} | atomic_write "$fix_contract"
|
|
1187
1920
|
log " Fix contract: $fix_contract"
|
|
1921
|
+
log_debug "[EXEC] iter=$ITERATION phase=fix_loop trigger=$verdict consecutive_failures=$CONSECUTIVE_FAILURES fix_contract=$fix_contract"
|
|
1188
1922
|
|
|
1189
1923
|
# Circuit breaker: consecutive failures
|
|
1190
1924
|
if (( CONSECUTIVE_FAILURES >= 3 )); then
|
|
1925
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=consecutive_failures detail=\"3 consecutive verification failures\""
|
|
1191
1926
|
log_error "Circuit breaker: 3 consecutive verification failures"
|
|
1192
1927
|
write_blocked_sentinel "3 consecutive verification failures"
|
|
1193
1928
|
update_status "blocked" "consecutive_failures"
|
|
@@ -1233,6 +1968,7 @@ main() {
|
|
|
1233
1968
|
|
|
1234
1969
|
# --- governance.md s7 step 8: Circuit breaker - stale context check ---
|
|
1235
1970
|
if ! check_stale_context; then
|
|
1971
|
+
log_debug "[EXEC] iter=$ITERATION circuit_breaker=stale_context detail=\"context unchanged for 3 consecutive iterations\""
|
|
1236
1972
|
write_blocked_sentinel "Context unchanged for 3 consecutive iterations (stale)"
|
|
1237
1973
|
update_status "blocked" "stale_context"
|
|
1238
1974
|
return 1
|