@ai-dev-methodologies/rlp-desk 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +46 -10
- package/src/scripts/run_ralph_desk.zsh +132 -25
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-dev-methodologies/rlp-desk",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Fresh-context iterative loops for Claude Code — autonomous task completion with independent verification",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"postinstall": "node scripts/postinstall.js",
|
package/src/commands/rlp-desk.md
CHANGED
|
@@ -31,10 +31,16 @@ Ask about these items one by one (or in small groups):
|
|
|
31
31
|
5. **Verification Commands** — build, test, lint commands
|
|
32
32
|
6. **Completion / Blocked Criteria**
|
|
33
33
|
7. **Worker / Verifier Model** — haiku, sonnet, opus. Suggest defaults (worker: sonnet, verifier: opus), ask if OK.
|
|
34
|
-
8. **Engine** —
|
|
34
|
+
8. **Engine & Model** — For each role (Worker, Verifier):
|
|
35
|
+
- Engine: claude (default) or codex
|
|
36
|
+
- If claude: suggest model (haiku/sonnet/opus) based on task complexity
|
|
37
|
+
- If codex: suggest model (default: gpt-5.4) and reasoning effort (low/medium/high)
|
|
38
|
+
- AI should recommend: "For this task complexity, I suggest Worker: sonnet, Verifier: opus"
|
|
39
|
+
- If codex selected: "For codex Worker, I suggest gpt-5.4 with high reasoning"
|
|
35
40
|
9. **Verify Mode** — per-us (default) or batch. Ask: "Verify after each user story (per-us, recommended) or only after all stories are done (batch)?" Default recommendation: per-us for 2+ stories.
|
|
36
41
|
10. **Verify Consensus** — Ask: "Use cross-engine consensus verification? (Both claude and codex verify independently, both must pass.) Requires codex CLI." Default: no.
|
|
37
|
-
11. **
|
|
42
|
+
11. **Consensus Scope** — If consensus enabled, ask: "Consensus on every verify (all, default) or only on final verify (final-only)?" Default: all.
|
|
43
|
+
12. **Max Iterations** — suggest based on story count, ask if OK.
|
|
38
44
|
|
|
39
45
|
After all items are confirmed, present the full contract summary.
|
|
40
46
|
On approval, offer to run `init`.
|
|
@@ -48,6 +54,25 @@ Do NOT auto-decide iteration unit — the user MUST explicitly choose.
|
|
|
48
54
|
Run: `~/.claude/ralph-desk/init_ralph_desk.zsh <slug> "<objective>"`
|
|
49
55
|
If brainstorm was done, auto-fill PRD and test-spec with the results.
|
|
50
56
|
|
|
57
|
+
**After init completes, STOP. Do NOT auto-run the loop.**
|
|
58
|
+
|
|
59
|
+
Tell the user:
|
|
60
|
+
1. The scaffold has been created — list the generated files
|
|
61
|
+
2. Ask them to review/edit the PRD and test-spec if needed
|
|
62
|
+
3. Show the run command with available options:
|
|
63
|
+
```
|
|
64
|
+
/rlp-desk run <slug> [options]
|
|
65
|
+
|
|
66
|
+
Options:
|
|
67
|
+
--mode agent|tmux
|
|
68
|
+
--worker-engine claude|codex
|
|
69
|
+
--verifier-engine claude|codex
|
|
70
|
+
--verify-mode per-us|batch
|
|
71
|
+
--verify-consensus
|
|
72
|
+
--consensus-scope all|final-only
|
|
73
|
+
```
|
|
74
|
+
4. Wait for the user to explicitly invoke `/rlp-desk run`
|
|
75
|
+
|
|
51
76
|
---
|
|
52
77
|
|
|
53
78
|
## `run <slug> [options]`
|
|
@@ -61,12 +86,17 @@ Options (parse from `$ARGUMENTS`):
|
|
|
61
86
|
- `--verifier-model MODEL` (default: opus)
|
|
62
87
|
- `--worker-engine claude|codex` (default: `claude`) — engine for Worker
|
|
63
88
|
- `--verifier-engine claude|codex` (default: `claude`) — engine for Verifier
|
|
64
|
-
- `--codex-model MODEL` (default: `gpt-5.4`) — model
|
|
65
|
-
- `--codex-reasoning low|medium|high` (default: `high`) — reasoning
|
|
89
|
+
- `--worker-codex-model MODEL` (default: `gpt-5.4`) — codex model for Worker
|
|
90
|
+
- `--worker-codex-reasoning low|medium|high` (default: `high`) — reasoning for Worker
|
|
91
|
+
- `--verifier-codex-model MODEL` (default: `gpt-5.4`) — codex model for Verifier
|
|
92
|
+
- `--verifier-codex-reasoning low|medium|high` (default: `high`) — reasoning for Verifier
|
|
66
93
|
- `--verify-mode per-us|batch` (default: `per-us`) — verification strategy
|
|
67
94
|
- `per-us`: verify after each US, then final full verify of all AC
|
|
68
95
|
- `batch`: verify only after all US done (legacy behavior)
|
|
69
96
|
- `--verify-consensus` — enable cross-engine consensus verification (both claude and codex verify independently; both must pass)
|
|
97
|
+
- `--consensus-scope all|final-only` — when consensus runs (default: `all`)
|
|
98
|
+
- `all`: consensus runs on every verify (current behavior)
|
|
99
|
+
- `final-only`: consensus only on final ALL verify
|
|
70
100
|
- `--debug` — enable debug logging (tmux mode only, writes to logs/<slug>/debug.log)
|
|
71
101
|
|
|
72
102
|
### Mode Selection
|
|
@@ -90,10 +120,13 @@ WORKER_MODEL=<--worker-model value> \
|
|
|
90
120
|
VERIFIER_MODEL=<--verifier-model value> \
|
|
91
121
|
WORKER_ENGINE=<--worker-engine value, default: claude> \
|
|
92
122
|
VERIFIER_ENGINE=<--verifier-engine value, default: claude> \
|
|
93
|
-
|
|
94
|
-
|
|
123
|
+
WORKER_CODEX_MODEL=<--worker-codex-model value, default: gpt-5.4> \
|
|
124
|
+
WORKER_CODEX_REASONING=<--worker-codex-reasoning value, default: high> \
|
|
125
|
+
VERIFIER_CODEX_MODEL=<--verifier-codex-model value, default: gpt-5.4> \
|
|
126
|
+
VERIFIER_CODEX_REASONING=<--verifier-codex-reasoning value, default: high> \
|
|
95
127
|
VERIFY_MODE=<--verify-mode value, default: per-us> \
|
|
96
128
|
VERIFY_CONSENSUS=<1 if --verify-consensus, else 0> \
|
|
129
|
+
CONSENSUS_SCOPE=<--consensus-scope value, default: all> \
|
|
97
130
|
DEBUG=<1 if --debug, else 0> \
|
|
98
131
|
zsh ~/.claude/ralph-desk/run_ralph_desk.zsh
|
|
99
132
|
```
|
|
@@ -161,7 +194,7 @@ Agent(
|
|
|
161
194
|
|
|
162
195
|
If `--worker-engine codex`:
|
|
163
196
|
```
|
|
164
|
-
Bash("codex exec --model <
|
|
197
|
+
Bash("codex exec --model <worker_codex_model> --reasoning-effort <worker_codex_reasoning> <full worker prompt text>")
|
|
165
198
|
```
|
|
166
199
|
- Codex runs as a subprocess via Bash(), not Agent().
|
|
167
200
|
- Each Bash() call = fresh context for codex.
|
|
@@ -206,7 +239,7 @@ Agent(
|
|
|
206
239
|
|
|
207
240
|
If `--verifier-engine codex`:
|
|
208
241
|
```
|
|
209
|
-
Bash("codex exec --model <
|
|
242
|
+
Bash("codex exec --model <verifier_codex_model> --reasoning-effort <verifier_codex_reasoning> <full verifier prompt text>")
|
|
210
243
|
```
|
|
211
244
|
|
|
212
245
|
**⑦b Consensus Verification** (when `--verify-consensus` is enabled):
|
|
@@ -302,10 +335,13 @@ Run options:
|
|
|
302
335
|
--verifier-model MODEL Verifier model (default: opus)
|
|
303
336
|
--worker-engine claude|codex Worker engine (default: claude)
|
|
304
337
|
--verifier-engine claude|codex Verifier engine (default: claude)
|
|
305
|
-
--codex-model MODEL
|
|
306
|
-
--codex-reasoning LEVEL
|
|
338
|
+
--worker-codex-model MODEL Worker codex model (default: gpt-5.4)
|
|
339
|
+
--worker-codex-reasoning LEVEL Worker codex reasoning (default: high)
|
|
340
|
+
--verifier-codex-model MODEL Verifier codex model (default: gpt-5.4)
|
|
341
|
+
--verifier-codex-reasoning LEVEL Verifier codex reasoning (default: high)
|
|
307
342
|
--verify-mode per-us|batch Verification strategy (default: per-us)
|
|
308
343
|
--verify-consensus Cross-engine consensus verification
|
|
344
|
+
--consensus-scope SCOPE When consensus runs: all|final-only (default: all)
|
|
309
345
|
--debug Debug logging (tmux mode only)
|
|
310
346
|
```
|
|
311
347
|
|
|
@@ -29,6 +29,16 @@ set -uo pipefail
|
|
|
29
29
|
# IDLE_NUDGE_THRESHOLD - seconds of idle before nudge (default: 30)
|
|
30
30
|
# MAX_NUDGES - max nudges per pane per iteration (default: 3)
|
|
31
31
|
#
|
|
32
|
+
# Per-role codex config:
|
|
33
|
+
# WORKER_CODEX_MODEL - codex model for Worker (default: gpt-5.4)
|
|
34
|
+
# WORKER_CODEX_REASONING - codex reasoning for Worker (default: high)
|
|
35
|
+
# VERIFIER_CODEX_MODEL - codex model for Verifier (default: gpt-5.4)
|
|
36
|
+
# VERIFIER_CODEX_REASONING - codex reasoning for Verifier (default: high)
|
|
37
|
+
#
|
|
38
|
+
# Consensus scope:
|
|
39
|
+
# CONSENSUS_SCOPE - when consensus applies (default: all)
|
|
40
|
+
# all=every verify, final-only=final ALL only
|
|
41
|
+
#
|
|
32
42
|
# Dependencies: tmux, claude CLI, jq
|
|
33
43
|
# Optional: codex CLI (required when WORKER_ENGINE=codex, VERIFIER_ENGINE=codex, or VERIFY_CONSENSUS=1)
|
|
34
44
|
# =============================================================================
|
|
@@ -49,13 +59,16 @@ MAX_NUDGES="${MAX_NUDGES:-3}"
|
|
|
49
59
|
# --- Engine Selection ---
|
|
50
60
|
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
51
61
|
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
|
|
52
|
-
|
|
53
|
-
|
|
62
|
+
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
|
|
63
|
+
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
64
|
+
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
|
|
65
|
+
VERIFIER_CODEX_REASONING="${VERIFIER_CODEX_REASONING:-high}" # low|medium|high
|
|
54
66
|
CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
55
67
|
|
|
56
68
|
# --- Verify Mode ---
|
|
57
69
|
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
58
70
|
VERIFY_CONSENSUS="${VERIFY_CONSENSUS:-0}" # 0|1
|
|
71
|
+
CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-all}" # all|final-only
|
|
59
72
|
|
|
60
73
|
# --- Derived Paths ---
|
|
61
74
|
DESK="$ROOT/.claude/ralph-desk"
|
|
@@ -127,6 +140,31 @@ atomic_write() {
|
|
|
127
140
|
mv "$tmp" "$target"
|
|
128
141
|
}
|
|
129
142
|
|
|
143
|
+
# --- omc-teams pattern: Kill-and-replace dead/stuck worker panes ---
|
|
144
|
+
replace_worker_pane() {
|
|
145
|
+
local old_pane="$1"
|
|
146
|
+
local role="$2" # "worker" or "verifier"
|
|
147
|
+
|
|
148
|
+
log " Replacing dead $role pane $old_pane..."
|
|
149
|
+
tmux kill-pane -t "$old_pane" 2>/dev/null
|
|
150
|
+
|
|
151
|
+
# Create fresh pane via split-window off leader (omc-teams kill-and-replace pattern)
|
|
152
|
+
local new_pane
|
|
153
|
+
new_pane=$(tmux split-window -h -d -t "$LEADER_PANE" -P -F '#{pane_id}' -c "$ROOT")
|
|
154
|
+
|
|
155
|
+
log " New $role pane: $new_pane (replaced $old_pane)"
|
|
156
|
+
log_debug "[EXEC] iter=$ITERATION pane_replaced=${role} old=$old_pane new=$new_pane"
|
|
157
|
+
|
|
158
|
+
# Update session-config.json with new pane ID
|
|
159
|
+
if [[ -f "$SESSION_CONFIG" ]]; then
|
|
160
|
+
jq --arg role "$role" --arg pane "$new_pane" \
|
|
161
|
+
'.panes[$role] = $pane' "$SESSION_CONFIG" | atomic_write "$SESSION_CONFIG"
|
|
162
|
+
log_debug "Updated session-config.json: $role pane → $new_pane"
|
|
163
|
+
fi
|
|
164
|
+
|
|
165
|
+
echo "$new_pane"
|
|
166
|
+
}
|
|
167
|
+
|
|
130
168
|
# =============================================================================
|
|
131
169
|
# Dependency Checks
|
|
132
170
|
# =============================================================================
|
|
@@ -282,6 +320,19 @@ create_session() {
|
|
|
282
320
|
"worker": "'"$WORKER_MODEL"'",
|
|
283
321
|
"verifier": "'"$VERIFIER_MODEL"'"
|
|
284
322
|
},
|
|
323
|
+
"engines": {
|
|
324
|
+
"worker": "'"$WORKER_ENGINE"'",
|
|
325
|
+
"verifier": "'"$VERIFIER_ENGINE"'",
|
|
326
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
327
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
328
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
329
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'"
|
|
330
|
+
},
|
|
331
|
+
"verification": {
|
|
332
|
+
"verify_mode": "'"$VERIFY_MODE"'",
|
|
333
|
+
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
334
|
+
"consensus_scope": "'"$CONSENSUS_SCOPE"'"
|
|
335
|
+
},
|
|
285
336
|
"config": {
|
|
286
337
|
"max_iter": '"$MAX_ITER"',
|
|
287
338
|
"poll_interval": '"$POLL_INTERVAL"',
|
|
@@ -590,7 +641,7 @@ restart_worker() {
|
|
|
590
641
|
|
|
591
642
|
# Re-launch worker (tmux interactive pattern)
|
|
592
643
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
593
|
-
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $
|
|
644
|
+
safe_send_keys "$pane_id" "${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
594
645
|
else
|
|
595
646
|
safe_send_keys "$pane_id" "$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
596
647
|
fi
|
|
@@ -681,8 +732,8 @@ write_worker_trigger() {
|
|
|
681
732
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
682
733
|
# Engine-specific launch command (expanded at write time)
|
|
683
734
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
684
|
-
local engine_cmd="${CODEX_BIN:-codex} -m $
|
|
685
|
-
-c model_reasoning_effort=\"$
|
|
735
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL \\
|
|
736
|
+
-c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" \\
|
|
686
737
|
--dangerously-bypass-approvals-and-sandbox \\
|
|
687
738
|
\"\$(cat $prompt_file)\" \\
|
|
688
739
|
2>&1 | tee $output_log"
|
|
@@ -768,8 +819,8 @@ write_verifier_trigger() {
|
|
|
768
819
|
# Write trigger script (DO NOT use exec -- breaks heartbeat cleanup)
|
|
769
820
|
# Engine-specific launch command (expanded at write time)
|
|
770
821
|
if [[ "$verifier_engine" = "codex" ]]; then
|
|
771
|
-
local engine_cmd="${CODEX_BIN:-codex} -m $
|
|
772
|
-
-c model_reasoning_effort=\"$
|
|
822
|
+
local engine_cmd="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL \\
|
|
823
|
+
-c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" \\
|
|
773
824
|
--dangerously-bypass-approvals-and-sandbox \\
|
|
774
825
|
\"\$(cat $prompt_file)\" \\
|
|
775
826
|
2>&1 | tee $output_log"
|
|
@@ -836,6 +887,7 @@ update_status() {
|
|
|
836
887
|
local consensus_json=""
|
|
837
888
|
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
838
889
|
consensus_json=',
|
|
890
|
+
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
839
891
|
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
840
892
|
"claude_verdict": "'"${CLAUDE_VERDICT:-}"'",
|
|
841
893
|
"codex_verdict": "'"${CODEX_VERDICT:-}"'"'
|
|
@@ -850,6 +902,10 @@ update_status() {
|
|
|
850
902
|
"verifier_model": "'"$VERIFIER_MODEL"'",
|
|
851
903
|
"worker_engine": "'"$WORKER_ENGINE"'",
|
|
852
904
|
"verifier_engine": "'"$VERIFIER_ENGINE"'",
|
|
905
|
+
"worker_codex_model": "'"$WORKER_CODEX_MODEL"'",
|
|
906
|
+
"worker_codex_reasoning": "'"$WORKER_CODEX_REASONING"'",
|
|
907
|
+
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
908
|
+
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
853
909
|
"verify_mode": "'"$VERIFY_MODE"'",
|
|
854
910
|
"verify_consensus": '"$VERIFY_CONSENSUS"',
|
|
855
911
|
"last_result": "'"$last_result"'",
|
|
@@ -1212,7 +1268,7 @@ run_single_verifier() {
|
|
|
1212
1268
|
# Launch verifier
|
|
1213
1269
|
if [[ "$engine" = "codex" ]]; then
|
|
1214
1270
|
# Codex: use non-interactive exec mode in pane (more reliable than TUI for sequential runs)
|
|
1215
|
-
local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $
|
|
1271
|
+
local codex_cmd="${CODEX_BIN:-codex} exec \"\$(cat $prompt_file)\" -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1216
1272
|
log " Running $suffix verifier (codex exec) in pane $VERIFIER_PANE..."
|
|
1217
1273
|
tmux send-keys -t "$VERIFIER_PANE" -l -- "$codex_cmd"
|
|
1218
1274
|
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
@@ -1237,7 +1293,7 @@ run_single_verifier() {
|
|
|
1237
1293
|
|
|
1238
1294
|
# Verify claude actually started working
|
|
1239
1295
|
local v_submit=0
|
|
1240
|
-
while (( v_submit <
|
|
1296
|
+
while (( v_submit < 15 )); do
|
|
1241
1297
|
sleep 2
|
|
1242
1298
|
local v_check
|
|
1243
1299
|
v_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
@@ -1245,6 +1301,14 @@ run_single_verifier() {
|
|
|
1245
1301
|
log_debug "Verifier$suffix started working after $((v_submit + 1)) checks"
|
|
1246
1302
|
break
|
|
1247
1303
|
fi
|
|
1304
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1305
|
+
if (( v_submit == 8 )); then
|
|
1306
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1307
|
+
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1308
|
+
sleep 0.1
|
|
1309
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1310
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1311
|
+
fi
|
|
1248
1312
|
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1249
1313
|
sleep 0.3
|
|
1250
1314
|
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
@@ -1308,13 +1372,15 @@ run_consensus_verification() {
|
|
|
1308
1372
|
return 1
|
|
1309
1373
|
fi
|
|
1310
1374
|
CLAUDE_VERDICT=$(jq -r '.verdict' "$claude_verdict_file" 2>/dev/null)
|
|
1375
|
+
log_debug "[EXEC] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1311
1376
|
|
|
1312
1377
|
# Run codex verifier second
|
|
1313
|
-
if ! run_single_verifier "$iter" "codex" "$
|
|
1378
|
+
if ! run_single_verifier "$iter" "codex" "$VERIFIER_CODEX_MODEL" "-codex" "$codex_verdict_file"; then
|
|
1314
1379
|
log_error "Codex verifier failed in consensus round $CONSENSUS_ROUND"
|
|
1315
1380
|
return 1
|
|
1316
1381
|
fi
|
|
1317
1382
|
CODEX_VERDICT=$(jq -r '.verdict' "$codex_verdict_file" 2>/dev/null)
|
|
1383
|
+
log_debug "[EXEC] iter=$iter phase=consensus_codex verdict=$CODEX_VERDICT model=$VERIFIER_CODEX_MODEL reasoning=$VERIFIER_CODEX_REASONING"
|
|
1318
1384
|
|
|
1319
1385
|
log " Consensus: claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT"
|
|
1320
1386
|
local _combined_action="retry"
|
|
@@ -1325,11 +1391,26 @@ run_consensus_verification() {
|
|
|
1325
1391
|
|
|
1326
1392
|
# Both pass → success
|
|
1327
1393
|
if [[ "$CLAUDE_VERDICT" = "pass" && "$CODEX_VERDICT" = "pass" ]]; then
|
|
1328
|
-
#
|
|
1329
|
-
|
|
1394
|
+
# Create merged verdict with per-engine details
|
|
1395
|
+
{
|
|
1396
|
+
echo '{'
|
|
1397
|
+
echo ' "verdict": "pass",'
|
|
1398
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1399
|
+
echo ' "summary": "Consensus PASS: both claude and codex verified independently",'
|
|
1400
|
+
echo ' "recommended_state_transition": "complete",'
|
|
1401
|
+
echo ' "consensus": {'
|
|
1402
|
+
echo ' "claude": { "verdict": "pass", "file": "'"$claude_verdict_file"'" },'
|
|
1403
|
+
echo ' "codex": { "verdict": "pass", "file": "'"$codex_verdict_file"'" },'
|
|
1404
|
+
echo ' "round": '"$CONSENSUS_ROUND"
|
|
1405
|
+
echo ' }'
|
|
1406
|
+
echo '}'
|
|
1407
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1330
1408
|
return 0
|
|
1331
1409
|
fi
|
|
1332
1410
|
|
|
1411
|
+
# Consensus disagreement
|
|
1412
|
+
log_debug "[EXEC] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
1413
|
+
|
|
1333
1414
|
# Either fails → build combined fix contract
|
|
1334
1415
|
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
1335
1416
|
{
|
|
@@ -1431,6 +1512,7 @@ main() {
|
|
|
1431
1512
|
log " Verifier model: $VERIFIER_MODEL"
|
|
1432
1513
|
log " Verify mode: $VERIFY_MODE"
|
|
1433
1514
|
log " Verify consensus:$VERIFY_CONSENSUS"
|
|
1515
|
+
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
1434
1516
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
1435
1517
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
1436
1518
|
# --- Debug: Log execution plan ---
|
|
@@ -1446,7 +1528,7 @@ main() {
|
|
|
1446
1528
|
log_debug "[PLAN] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1447
1529
|
log_debug "[PLAN] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1448
1530
|
log_debug "[PLAN] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1449
|
-
log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS max_iter=$MAX_ITER"
|
|
1531
|
+
log_debug "[PLAN] verify_mode=$VERIFY_MODE consensus=$VERIFY_CONSENSUS consensus_scope=$CONSENSUS_SCOPE max_iter=$MAX_ITER"
|
|
1450
1532
|
|
|
1451
1533
|
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
1452
1534
|
# Build expected flow
|
|
@@ -1544,7 +1626,7 @@ main() {
|
|
|
1544
1626
|
# Step 5a: Launch interactive worker engine in Worker pane
|
|
1545
1627
|
local worker_launch
|
|
1546
1628
|
if [[ "$WORKER_ENGINE" = "codex" ]]; then
|
|
1547
|
-
worker_launch="${CODEX_BIN:-codex} -m $
|
|
1629
|
+
worker_launch="${CODEX_BIN:-codex} -m $WORKER_CODEX_MODEL -c model_reasoning_effort=\"$WORKER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1548
1630
|
log " Launching Worker codex in pane $WORKER_PANE..."
|
|
1549
1631
|
else
|
|
1550
1632
|
worker_launch="$CLAUDE_BIN --model $WORKER_MODEL --dangerously-skip-permissions"
|
|
@@ -1571,7 +1653,7 @@ main() {
|
|
|
1571
1653
|
|
|
1572
1654
|
# Verify claude actually started working — keep sending C-m until activity detected
|
|
1573
1655
|
local submit_attempts=0
|
|
1574
|
-
while (( submit_attempts <
|
|
1656
|
+
while (( submit_attempts < 15 )); do
|
|
1575
1657
|
sleep 2
|
|
1576
1658
|
local pane_check
|
|
1577
1659
|
pane_check=$(tmux capture-pane -t "$WORKER_PANE" -p 2>/dev/null)
|
|
@@ -1580,14 +1662,22 @@ main() {
|
|
|
1580
1662
|
log_debug "[EXEC] iter=$ITERATION worker_submit_check=OK attempts=$((submit_attempts + 1))"
|
|
1581
1663
|
break
|
|
1582
1664
|
fi
|
|
1665
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1666
|
+
if (( submit_attempts == 8 )); then
|
|
1667
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1668
|
+
tmux send-keys -t "$WORKER_PANE" C-u 2>/dev/null
|
|
1669
|
+
sleep 0.1
|
|
1670
|
+
tmux send-keys -t "$WORKER_PANE" -l -- "$worker_instruction"
|
|
1671
|
+
tmux send-keys -t "$WORKER_PANE" Enter
|
|
1672
|
+
fi
|
|
1583
1673
|
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1584
1674
|
sleep 0.3
|
|
1585
1675
|
tmux send-keys -t "$WORKER_PANE" C-m 2>/dev/null
|
|
1586
1676
|
(( submit_attempts++ ))
|
|
1587
1677
|
done
|
|
1588
|
-
if (( submit_attempts >=
|
|
1589
|
-
log " WARNING: Could not confirm Worker started working after
|
|
1590
|
-
log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=
|
|
1678
|
+
if (( submit_attempts >= 15 )); then
|
|
1679
|
+
log " WARNING: Could not confirm Worker started working after 15 attempts"
|
|
1680
|
+
log_debug "[EXEC] iter=$ITERATION worker_submit_check=FAILED attempts=15"
|
|
1591
1681
|
fi
|
|
1592
1682
|
|
|
1593
1683
|
# --- governance.md s7 step 5+6: Poll for Worker completion ---
|
|
@@ -1621,10 +1711,8 @@ main() {
|
|
|
1621
1711
|
update_status "worker" "poll_failed"
|
|
1622
1712
|
worker_poll_done=1 # exit poll loop, continue to next iteration
|
|
1623
1713
|
log_debug "[EXEC] iter=$ITERATION poll_worker_dead=true worker_cmd=$worker_cmd"
|
|
1624
|
-
#
|
|
1625
|
-
|
|
1626
|
-
tmux send-keys -t "$WORKER_PANE" "/exit" Enter 2>/dev/null
|
|
1627
|
-
sleep 1
|
|
1714
|
+
# Worker is truly dead/stuck — kill and replace pane (omc-teams pattern)
|
|
1715
|
+
WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
|
|
1628
1716
|
fi
|
|
1629
1717
|
fi
|
|
1630
1718
|
done
|
|
@@ -1666,8 +1754,17 @@ main() {
|
|
|
1666
1754
|
|
|
1667
1755
|
update_status "verifier" "running"
|
|
1668
1756
|
|
|
1669
|
-
# --- Consensus
|
|
1757
|
+
# --- Consensus scope check ---
|
|
1758
|
+
local use_consensus=0
|
|
1670
1759
|
if [[ "$VERIFY_CONSENSUS" = "1" ]]; then
|
|
1760
|
+
case "$CONSENSUS_SCOPE" in
|
|
1761
|
+
all) use_consensus=1 ;;
|
|
1762
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && use_consensus=1 ;;
|
|
1763
|
+
esac
|
|
1764
|
+
fi
|
|
1765
|
+
|
|
1766
|
+
# --- Consensus vs single verification ---
|
|
1767
|
+
if (( use_consensus )); then
|
|
1671
1768
|
# US-004: Run consensus verification (claude + codex sequentially)
|
|
1672
1769
|
local consensus_rc=0
|
|
1673
1770
|
run_consensus_verification "$ITERATION" || consensus_rc=$?
|
|
@@ -1700,7 +1797,7 @@ main() {
|
|
|
1700
1797
|
|
|
1701
1798
|
local verifier_launch
|
|
1702
1799
|
if [[ "$VERIFIER_ENGINE" = "codex" ]]; then
|
|
1703
|
-
verifier_launch="${CODEX_BIN:-codex} -m $
|
|
1800
|
+
verifier_launch="${CODEX_BIN:-codex} -m $VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$VERIFIER_CODEX_REASONING\" --dangerously-bypass-approvals-and-sandbox"
|
|
1704
1801
|
log " Launching Verifier codex in pane $VERIFIER_PANE..."
|
|
1705
1802
|
else
|
|
1706
1803
|
verifier_launch="$CLAUDE_BIN --model $VERIFIER_MODEL --dangerously-skip-permissions"
|
|
@@ -1726,7 +1823,7 @@ main() {
|
|
|
1726
1823
|
|
|
1727
1824
|
# Verify verifier actually started working
|
|
1728
1825
|
local vs_submit=0
|
|
1729
|
-
while (( vs_submit <
|
|
1826
|
+
while (( vs_submit < 15 )); do
|
|
1730
1827
|
sleep 2
|
|
1731
1828
|
local vs_check
|
|
1732
1829
|
vs_check=$(tmux capture-pane -t "$VERIFIER_PANE" -p 2>/dev/null)
|
|
@@ -1734,6 +1831,14 @@ main() {
|
|
|
1734
1831
|
log_debug "Verifier started working after $((vs_submit + 1)) checks"
|
|
1735
1832
|
break
|
|
1736
1833
|
fi
|
|
1834
|
+
# After 8 failed attempts, try C-u clear + re-type (omc-teams adaptive retry)
|
|
1835
|
+
if (( vs_submit == 8 )); then
|
|
1836
|
+
log_debug "Adaptive instruction retry: clearing line and re-typing"
|
|
1837
|
+
tmux send-keys -t "$VERIFIER_PANE" C-u 2>/dev/null
|
|
1838
|
+
sleep 0.1
|
|
1839
|
+
tmux send-keys -t "$VERIFIER_PANE" -l -- "$verifier_instruction"
|
|
1840
|
+
tmux send-keys -t "$VERIFIER_PANE" Enter
|
|
1841
|
+
fi
|
|
1737
1842
|
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
1738
1843
|
sleep 0.3
|
|
1739
1844
|
tmux send-keys -t "$VERIFIER_PANE" C-m 2>/dev/null
|
|
@@ -1745,6 +1850,8 @@ main() {
|
|
|
1745
1850
|
if ! poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier"; then
|
|
1746
1851
|
log_error "Verifier poll failed"
|
|
1747
1852
|
update_status "verifier" "poll_failed"
|
|
1853
|
+
# Verifier is dead/stuck — kill and replace pane (omc-teams pattern)
|
|
1854
|
+
VERIFIER_PANE=$(replace_worker_pane "$VERIFIER_PANE" "verifier")
|
|
1748
1855
|
continue
|
|
1749
1856
|
fi
|
|
1750
1857
|
fi
|