@ai-dev-methodologies/rlp-desk 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -27
- package/docs/plans/frolicking-churning-honey.md +253 -0
- package/docs/plans/keen-sauteeing-snowflake.md +245 -0
- package/docs/plans/toasty-whistling-diffie.md +1 -1
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +130 -109
- package/src/governance.md +75 -24
- package/src/scripts/lib_ralph_desk.zsh +45 -14
- package/src/scripts/run_ralph_desk.zsh +125 -100
|
@@ -95,12 +95,7 @@ get_next_model() {
|
|
|
95
95
|
gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
|
|
96
96
|
gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
|
|
97
97
|
gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling (full name)
|
|
98
|
-
# Codex
|
|
99
|
-
gpt-5.3-codex:low) echo "gpt-5.3-codex:medium" ;;
|
|
100
|
-
gpt-5.3-codex:medium) echo "gpt-5.3-codex:high" ;;
|
|
101
|
-
gpt-5.3-codex:high) echo "gpt-5.3-codex:xhigh" ;;
|
|
102
|
-
gpt-5.3-codex:xhigh) echo "" ;; # codex ceiling
|
|
103
|
-
# Codex Non-Pro / upper path
|
|
98
|
+
# Codex Non-Pro upgrade path
|
|
104
99
|
gpt-5.4:low) echo "gpt-5.4:medium" ;;
|
|
105
100
|
gpt-5.4:medium) echo "gpt-5.4:high" ;;
|
|
106
101
|
gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
|
|
@@ -160,6 +155,7 @@ check_model_upgrade() {
|
|
|
160
155
|
fi
|
|
161
156
|
|
|
162
157
|
log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
|
|
158
|
+
log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
|
|
163
159
|
log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
|
|
164
160
|
_SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
|
|
165
161
|
fi
|
|
@@ -167,6 +163,26 @@ check_model_upgrade() {
|
|
|
167
163
|
return 0
|
|
168
164
|
}
|
|
169
165
|
|
|
166
|
+
# record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
|
|
167
|
+
# Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
|
|
168
|
+
# This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
|
|
169
|
+
# Usage: record_us_failure <us_id>
|
|
170
|
+
record_us_failure() {
|
|
171
|
+
local us_id="$1"
|
|
172
|
+
[[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
|
|
173
|
+
|
|
174
|
+
local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
|
|
175
|
+
US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
|
|
176
|
+
|
|
177
|
+
# Prior-failure warning: if this US has failed before, it's showing fragility
|
|
178
|
+
if (( prev_count > 0 )); then
|
|
179
|
+
log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
|
|
180
|
+
log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
|
|
181
|
+
fi
|
|
182
|
+
|
|
183
|
+
return 0
|
|
184
|
+
}
|
|
185
|
+
|
|
170
186
|
# --- governance.md s7: Atomic file writes (tmux pattern) ---
|
|
171
187
|
# All file writes by the Leader use tmp+mv to prevent corruption.
|
|
172
188
|
atomic_write() {
|
|
@@ -228,7 +244,7 @@ update_status() {
|
|
|
228
244
|
|
|
229
245
|
# Build consensus fields
|
|
230
246
|
local consensus_json=""
|
|
231
|
-
if [[ "$
|
|
247
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
232
248
|
consensus_json=',
|
|
233
249
|
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
234
250
|
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
@@ -251,7 +267,7 @@ update_status() {
|
|
|
251
267
|
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
252
268
|
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
253
269
|
"verify_mode": "'"$VERIFY_MODE"'",
|
|
254
|
-
"
|
|
270
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'",
|
|
255
271
|
"last_result": "'"$last_result"'",
|
|
256
272
|
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
257
273
|
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
@@ -351,9 +367,8 @@ write_cost_log() {
|
|
|
351
367
|
echo '{"iteration":'"$iter"',"estimated_tokens":'"$estimated_tokens"',"token_source":"estimated","prompt_bytes":'"$prompt_bytes"',"claim_bytes":'"$claim_bytes"',"verdict_bytes":'"$verdict_bytes"',"worker_start_time":"'"$worker_start_time"'","worker_end_time":"'"$worker_end_time"'","worker_duration_s":'"$worker_duration_s"',"verifier_start_time":"'"$verifier_start_time"'","verifier_end_time":"'"$verifier_end_time"'","verifier_duration_s":'"$verifier_duration_s"''"$consensus_fields"',"timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' >> "$COST_LOG"
|
|
352
368
|
}
|
|
353
369
|
|
|
354
|
-
# --- Analytics: write per-iteration structured data to campaign.jsonl ---
|
|
370
|
+
# --- Analytics: write per-iteration structured data to campaign.jsonl (always-on) ---
|
|
355
371
|
write_campaign_jsonl() {
|
|
356
|
-
if (( ! DEBUG )) && (( ! WITH_SELF_VERIFICATION )); then return 0; fi
|
|
357
372
|
local iter="$1"
|
|
358
373
|
local us_id="${2:-unknown}"
|
|
359
374
|
local verdict="${3:-unknown}"
|
|
@@ -367,6 +382,19 @@ write_campaign_jsonl() {
|
|
|
367
382
|
verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
|
|
368
383
|
fi
|
|
369
384
|
|
|
385
|
+
# Build us_fail_history JSON object from associative array
|
|
386
|
+
local us_fail_history_json="{}"
|
|
387
|
+
if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
|
|
388
|
+
us_fail_history_json="{"
|
|
389
|
+
local first=1
|
|
390
|
+
for key in "${(@k)US_FAIL_HISTORY}"; do
|
|
391
|
+
(( first )) || us_fail_history_json+=","
|
|
392
|
+
us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
|
|
393
|
+
first=0
|
|
394
|
+
done
|
|
395
|
+
us_fail_history_json+="}"
|
|
396
|
+
fi
|
|
397
|
+
|
|
370
398
|
jq -nc \
|
|
371
399
|
--argjson iter "$iter" \
|
|
372
400
|
--arg us_id "$us_id" \
|
|
@@ -375,13 +403,16 @@ write_campaign_jsonl() {
|
|
|
375
403
|
--arg verifier_engine "$VERIFIER_ENGINE" \
|
|
376
404
|
--arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
|
|
377
405
|
--arg codex_verdict "${CODEX_VERDICT:-N/A}" \
|
|
378
|
-
--
|
|
406
|
+
--arg consensus_mode "$CONSENSUS_MODE" \
|
|
407
|
+
--argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
|
|
408
|
+
--argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
|
|
409
|
+
--argjson us_fail_history "$us_fail_history_json" \
|
|
379
410
|
--argjson duration_worker_s "$worker_duration_s" \
|
|
380
411
|
--argjson duration_verifier_s "$verifier_duration_s" \
|
|
381
412
|
--arg project_root "$ROOT" \
|
|
382
413
|
--arg slug "$SLUG" \
|
|
383
414
|
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
384
|
-
'{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict,
|
|
415
|
+
'{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
|
|
385
416
|
>> "$CAMPAIGN_JSONL"
|
|
386
417
|
}
|
|
387
418
|
|
|
@@ -431,7 +462,7 @@ ${untracked}"
|
|
|
431
462
|
local sv_summary=""
|
|
432
463
|
if (( WITH_SELF_VERIFICATION )); then
|
|
433
464
|
local sv_report
|
|
434
|
-
sv_report=$(ls -t "$
|
|
465
|
+
sv_report=$(ls -t "$LOGS_DIR"/self-verification-report-*.md 2>/dev/null | head -1)
|
|
435
466
|
if [[ -n "$sv_report" ]]; then
|
|
436
467
|
sv_summary="See: $sv_report"
|
|
437
468
|
else
|
|
@@ -460,7 +491,7 @@ ${untracked}"
|
|
|
460
491
|
echo "- Elapsed: ${elapsed}s"
|
|
461
492
|
echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
|
|
462
493
|
echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
|
|
463
|
-
echo "- Consensus:
|
|
494
|
+
echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
|
|
464
495
|
echo ""
|
|
465
496
|
echo "## US Status"
|
|
466
497
|
echo "- Verified: ${VERIFIED_US:-none}"
|
|
@@ -47,8 +47,9 @@ set -uo pipefail
|
|
|
47
47
|
SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
|
|
48
48
|
ROOT="${ROOT:-$PWD}"
|
|
49
49
|
MAX_ITER="${MAX_ITER:-20}"
|
|
50
|
-
WORKER_MODEL="${WORKER_MODEL:-
|
|
51
|
-
VERIFIER_MODEL="${VERIFIER_MODEL:-
|
|
50
|
+
WORKER_MODEL="${WORKER_MODEL:-haiku}"
|
|
51
|
+
VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
|
|
52
|
+
FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
|
|
52
53
|
POLL_INTERVAL="${POLL_INTERVAL:-5}"
|
|
53
54
|
ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
|
|
54
55
|
HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
@@ -60,6 +61,7 @@ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
|
|
|
60
61
|
# --- Engine Selection ---
|
|
61
62
|
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
62
63
|
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
|
|
64
|
+
FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}" # claude|codex (derived from FINAL_VERIFIER_MODEL)
|
|
63
65
|
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
|
|
64
66
|
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
65
67
|
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
|
|
@@ -68,13 +70,19 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
|
68
70
|
|
|
69
71
|
# --- Verify Mode ---
|
|
70
72
|
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
|
|
73
|
+
# Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
|
|
74
|
+
CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
|
|
75
|
+
CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.4:medium}" # per-US cross-verifier (lighter)
|
|
76
|
+
FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.4:high}" # final cross-verifier (stricter)
|
|
77
|
+
# Legacy compat: map old flags to CONSENSUS_MODE
|
|
77
78
|
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
79
|
+
CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
|
|
80
|
+
elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
|
|
81
|
+
CONSENSUS_MODE="final-only"
|
|
82
|
+
fi
|
|
83
|
+
CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
|
|
84
|
+
# Effective CB threshold: doubled when consensus mode active
|
|
85
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
78
86
|
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
79
87
|
else
|
|
80
88
|
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
@@ -120,6 +128,7 @@ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
|
|
|
120
128
|
typeset -A LAST_PANE_CONTENT
|
|
121
129
|
typeset -A PANE_IDLE_SINCE
|
|
122
130
|
typeset -A WORKER_RESTARTS
|
|
131
|
+
typeset -A US_FAIL_HISTORY
|
|
123
132
|
STALE_CONTEXT_COUNT=0
|
|
124
133
|
HEARTBEAT_STALE_COUNT=0
|
|
125
134
|
MONITOR_FAILURE_COUNT=0
|
|
@@ -192,7 +201,7 @@ launch_worker_codex() {
|
|
|
192
201
|
|
|
193
202
|
log " Launching Worker codex via trigger script in pane $pane_id..."
|
|
194
203
|
paste_to_pane "$pane_id" "bash $trigger_file"
|
|
195
|
-
tmux send-keys -t "$pane_id"
|
|
204
|
+
tmux send-keys -t "$pane_id" C-m
|
|
196
205
|
log_debug "Worker codex trigger sent: $trigger_file"
|
|
197
206
|
sleep 3 # brief wait for codex to start
|
|
198
207
|
return 0
|
|
@@ -211,7 +220,7 @@ launch_worker_claude() {
|
|
|
211
220
|
|
|
212
221
|
log " Launching Worker claude in pane $pane_id..."
|
|
213
222
|
paste_to_pane "$pane_id" "$worker_launch"
|
|
214
|
-
tmux send-keys -t "$pane_id"
|
|
223
|
+
tmux send-keys -t "$pane_id" C-m
|
|
215
224
|
|
|
216
225
|
# Wait for claude TUI to be ready
|
|
217
226
|
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
@@ -223,7 +232,7 @@ launch_worker_claude() {
|
|
|
223
232
|
sleep 3
|
|
224
233
|
local worker_instruction="Read and execute the instructions in $prompt_file"
|
|
225
234
|
paste_to_pane "$pane_id" "$worker_instruction"
|
|
226
|
-
tmux send-keys -t "$pane_id"
|
|
235
|
+
tmux send-keys -t "$pane_id" C-m
|
|
227
236
|
log_debug "Worker instruction sent directly (${#worker_instruction} chars)"
|
|
228
237
|
|
|
229
238
|
# 15-iteration submit loop — verify claude started working
|
|
@@ -244,7 +253,7 @@ launch_worker_claude() {
|
|
|
244
253
|
sleep 0.2
|
|
245
254
|
paste_to_pane "$pane_id" "$worker_instruction"
|
|
246
255
|
sleep 0.15
|
|
247
|
-
tmux send-keys -t "$pane_id"
|
|
256
|
+
tmux send-keys -t "$pane_id" C-m
|
|
248
257
|
sleep 1
|
|
249
258
|
fi
|
|
250
259
|
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
@@ -259,15 +268,15 @@ launch_worker_claude() {
|
|
|
259
268
|
log_debug "[GOV] iter=$iter worker_instruction_failed=true attempts=15 action=restart_claude"
|
|
260
269
|
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
261
270
|
sleep 0.5
|
|
262
|
-
tmux send-keys -t "$pane_id" "/exit"
|
|
271
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
263
272
|
sleep 2
|
|
264
273
|
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
265
274
|
paste_to_pane "$pane_id" "$worker_launch"
|
|
266
|
-
tmux send-keys -t "$pane_id"
|
|
275
|
+
tmux send-keys -t "$pane_id" C-m
|
|
267
276
|
if wait_for_pane_ready "$pane_id" 30; then
|
|
268
277
|
sleep 3
|
|
269
278
|
paste_to_pane "$pane_id" "$worker_instruction"
|
|
270
|
-
tmux send-keys -t "$pane_id"
|
|
279
|
+
tmux send-keys -t "$pane_id" C-m
|
|
271
280
|
log " Worker restarted and instruction re-sent"
|
|
272
281
|
log_debug "[FLOW] iter=$iter worker_restart_recovery=success"
|
|
273
282
|
else
|
|
@@ -290,7 +299,7 @@ launch_verifier_codex() {
|
|
|
290
299
|
|
|
291
300
|
log " Launching Verifier codex in pane $pane_id..."
|
|
292
301
|
paste_to_pane "$pane_id" "$verifier_launch"
|
|
293
|
-
tmux send-keys -t "$pane_id"
|
|
302
|
+
tmux send-keys -t "$pane_id" C-m
|
|
294
303
|
sleep 3
|
|
295
304
|
return 0
|
|
296
305
|
}
|
|
@@ -306,7 +315,7 @@ launch_verifier_claude() {
|
|
|
306
315
|
|
|
307
316
|
log " Launching Verifier claude in pane $pane_id..."
|
|
308
317
|
paste_to_pane "$pane_id" "$verifier_launch"
|
|
309
|
-
tmux send-keys -t "$pane_id"
|
|
318
|
+
tmux send-keys -t "$pane_id" C-m
|
|
310
319
|
|
|
311
320
|
if ! wait_for_pane_ready "$pane_id" 30; then
|
|
312
321
|
log_error "Verifier failed to start"
|
|
@@ -316,7 +325,7 @@ launch_verifier_claude() {
|
|
|
316
325
|
sleep 3
|
|
317
326
|
local verifier_instruction="Read and execute the instructions in $prompt_file"
|
|
318
327
|
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
319
|
-
tmux send-keys -t "$pane_id"
|
|
328
|
+
tmux send-keys -t "$pane_id" C-m
|
|
320
329
|
log_debug "Verifier instruction sent directly"
|
|
321
330
|
|
|
322
331
|
# Submit loop — verify verifier started working
|
|
@@ -334,7 +343,7 @@ launch_verifier_claude() {
|
|
|
334
343
|
tmux send-keys -t "$pane_id" C-u 2>/dev/null
|
|
335
344
|
sleep 0.1
|
|
336
345
|
paste_to_pane "$pane_id" "$verifier_instruction"
|
|
337
|
-
tmux send-keys -t "$pane_id"
|
|
346
|
+
tmux send-keys -t "$pane_id" C-m
|
|
338
347
|
fi
|
|
339
348
|
tmux send-keys -t "$pane_id" C-m 2>/dev/null
|
|
340
349
|
sleep 0.3
|
|
@@ -455,7 +464,7 @@ check_dependencies() {
|
|
|
455
464
|
fi
|
|
456
465
|
|
|
457
466
|
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
458
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$
|
|
467
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
459
468
|
if ! command -v codex >/dev/null 2>&1; then
|
|
460
469
|
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
461
470
|
missing=1
|
|
@@ -473,7 +482,7 @@ check_dependencies() {
|
|
|
473
482
|
fi
|
|
474
483
|
|
|
475
484
|
# Resolve codex binary if needed
|
|
476
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$
|
|
485
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
477
486
|
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
478
487
|
log " Codex binary: $CODEX_BIN"
|
|
479
488
|
fi
|
|
@@ -531,7 +540,7 @@ create_session() {
|
|
|
531
540
|
# Set pane titles and enable border labels for visual distinction
|
|
532
541
|
local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
|
|
533
542
|
local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
|
|
534
|
-
[[ "$
|
|
543
|
+
[[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
|
|
535
544
|
tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
|
|
536
545
|
tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
|
|
537
546
|
tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
|
|
@@ -585,8 +594,7 @@ create_session() {
|
|
|
585
594
|
},
|
|
586
595
|
"verification": {
|
|
587
596
|
"verify_mode": "'"$VERIFY_MODE"'",
|
|
588
|
-
"
|
|
589
|
-
"consensus_scope": "'"$CONSENSUS_SCOPE"'"
|
|
597
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'"
|
|
590
598
|
},
|
|
591
599
|
"config": {
|
|
592
600
|
"max_iter": '"$MAX_ITER"',
|
|
@@ -663,13 +671,13 @@ safe_send_keys() {
|
|
|
663
671
|
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
664
672
|
if echo "$initial_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
665
673
|
log_debug " Permission prompt detected, auto-approving"
|
|
666
|
-
tmux send-keys -t "$pane_id"
|
|
674
|
+
tmux send-keys -t "$pane_id" C-m
|
|
667
675
|
sleep 0.3
|
|
668
676
|
fi
|
|
669
677
|
# Auto-dismiss codex update prompt (select Skip)
|
|
670
678
|
if echo "$initial_capture" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
671
679
|
log_debug " Codex update prompt detected, selecting Skip"
|
|
672
|
-
tmux send-keys -t "$pane_id" "2"
|
|
680
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
673
681
|
sleep 0.2
|
|
674
682
|
fi
|
|
675
683
|
# Send text via buffer paste (reliable for long strings)
|
|
@@ -761,9 +769,9 @@ wait_for_pane_ready() {
|
|
|
761
769
|
# Auto-dismiss trust prompt (tmux pattern: paneHasTrustPrompt)
|
|
762
770
|
if echo "$captured" | grep -q "Do you trust" 2>/dev/null; then
|
|
763
771
|
log " Trust prompt detected, auto-dismissing..."
|
|
764
|
-
tmux send-keys -t "$pane_id"
|
|
772
|
+
tmux send-keys -t "$pane_id" C-m
|
|
765
773
|
sleep 0.12
|
|
766
|
-
tmux send-keys -t "$pane_id"
|
|
774
|
+
tmux send-keys -t "$pane_id" C-m
|
|
767
775
|
sleep 2
|
|
768
776
|
continue
|
|
769
777
|
fi
|
|
@@ -771,7 +779,7 @@ wait_for_pane_ready() {
|
|
|
771
779
|
# Auto-approve permission prompts ("Do you want to create/overwrite X?")
|
|
772
780
|
if echo "$captured" | grep -q "Do you want to" 2>/dev/null; then
|
|
773
781
|
log " Permission prompt detected, auto-approving..."
|
|
774
|
-
tmux send-keys -t "$pane_id"
|
|
782
|
+
tmux send-keys -t "$pane_id" C-m
|
|
775
783
|
sleep 0.5
|
|
776
784
|
continue
|
|
777
785
|
fi
|
|
@@ -779,7 +787,7 @@ wait_for_pane_ready() {
|
|
|
779
787
|
# Auto-dismiss codex update prompt (select Skip = option 2)
|
|
780
788
|
if echo "$captured" | grep -qi "new version\|update.*codex\|codex.*update" 2>/dev/null; then
|
|
781
789
|
log " Codex update prompt detected, selecting Skip..."
|
|
782
|
-
tmux send-keys -t "$pane_id" "2"
|
|
790
|
+
tmux send-keys -t "$pane_id" "2" C-m
|
|
783
791
|
sleep 0.5
|
|
784
792
|
continue
|
|
785
793
|
fi
|
|
@@ -917,7 +925,7 @@ restart_worker() {
|
|
|
917
925
|
|
|
918
926
|
# Kill existing claude, wait for shell prompt
|
|
919
927
|
tmux send-keys -t "$pane_id" C-c 2>/dev/null
|
|
920
|
-
tmux send-keys -t "$pane_id" "/exit"
|
|
928
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null
|
|
921
929
|
sleep 2
|
|
922
930
|
|
|
923
931
|
# Re-launch worker (tmux interactive pattern)
|
|
@@ -1205,11 +1213,11 @@ cleanup() {
|
|
|
1205
1213
|
log_debug "cleanup: WORKER_PANE=${WORKER_PANE:-unset} VERIFIER_PANE=${VERIFIER_PANE:-unset}"
|
|
1206
1214
|
if [[ -n "${WORKER_PANE:-}" ]]; then
|
|
1207
1215
|
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1208
|
-
tmux send-keys -t "$WORKER_PANE" "/exit"
|
|
1216
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
1209
1217
|
fi
|
|
1210
1218
|
if [[ -n "${VERIFIER_PANE:-}" ]]; then
|
|
1211
1219
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1212
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
1220
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1213
1221
|
fi
|
|
1214
1222
|
sleep 2
|
|
1215
1223
|
# Kill panes on completion
|
|
@@ -1284,11 +1292,11 @@ cleanup() {
|
|
|
1284
1292
|
fi
|
|
1285
1293
|
|
|
1286
1294
|
# 3. Consensus: were both engines used?
|
|
1287
|
-
if [[ "$
|
|
1295
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
1288
1296
|
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
1289
|
-
log_debug "[FLOW] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1297
|
+
log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1290
1298
|
else
|
|
1291
|
-
log_debug "[FLOW] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1299
|
+
log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1292
1300
|
fi
|
|
1293
1301
|
fi
|
|
1294
1302
|
|
|
@@ -1410,7 +1418,7 @@ poll_for_signal() {
|
|
|
1410
1418
|
log " A5: Rate-limited pane shows 'queued messages' — restarting $role pane"
|
|
1411
1419
|
log_debug "[GOV] iter=$ITERATION phase=rate_limit_pane_restart role=$role reason=queued_messages"
|
|
1412
1420
|
tmux send-keys -t "$pane_id" C-c 2>/dev/null; sleep 0.5
|
|
1413
|
-
tmux send-keys -t "$pane_id" "/exit"
|
|
1421
|
+
tmux send-keys -t "$pane_id" "/exit" C-m 2>/dev/null; sleep 2
|
|
1414
1422
|
wait_for_pane_ready "$pane_id" 10 2>/dev/null || true
|
|
1415
1423
|
fi
|
|
1416
1424
|
sleep "$_API_RETRY_INTERVAL_S"
|
|
@@ -1487,7 +1495,7 @@ poll_for_signal() {
|
|
|
1487
1495
|
if echo "$poll_capture" | grep -q "Do you want to" 2>/dev/null; then
|
|
1488
1496
|
log " Permission prompt detected during poll, auto-approving..."
|
|
1489
1497
|
log_debug "[FLOW] iter=$ITERATION permission_prompt_auto_approved=true"
|
|
1490
|
-
tmux send-keys -t "$pane_id"
|
|
1498
|
+
tmux send-keys -t "$pane_id" C-m
|
|
1491
1499
|
sleep 0.5
|
|
1492
1500
|
fi
|
|
1493
1501
|
|
|
@@ -1529,12 +1537,12 @@ run_single_verifier() {
|
|
|
1529
1537
|
log_debug "[GOV] iter=$iter pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
1530
1538
|
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
1531
1539
|
sleep 0.2
|
|
1532
|
-
tmux send-keys -t "$VERIFIER_PANE" "clear"
|
|
1540
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
1533
1541
|
sleep 0.3
|
|
1534
1542
|
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1535
1543
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
1536
1544
|
sleep 0.5
|
|
1537
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
1545
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
1538
1546
|
sleep 2
|
|
1539
1547
|
fi
|
|
1540
1548
|
# Always ensure clean shell state before launching new verifier
|
|
@@ -1628,7 +1636,7 @@ run_sequential_final_verify() {
|
|
|
1628
1636
|
verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1629
1637
|
if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
1630
1638
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
|
|
1631
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
1639
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
|
|
1632
1640
|
fi
|
|
1633
1641
|
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
1634
1642
|
|
|
@@ -1689,20 +1697,14 @@ run_sequential_final_verify() {
|
|
|
1689
1697
|
|
|
1690
1698
|
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
1691
1699
|
# Returns 0 (use consensus) or 1 (single engine).
|
|
1692
|
-
#
|
|
1693
|
-
# FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
|
|
1700
|
+
# Uses unified CONSENSUS_MODE: off|all|final-only
|
|
1694
1701
|
_should_use_consensus() {
|
|
1695
1702
|
local signal_us_id="${1:-}"
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
fi
|
|
1702
|
-
if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
|
|
1703
|
-
return 0
|
|
1704
|
-
fi
|
|
1705
|
-
return 1
|
|
1703
|
+
case "$CONSENSUS_MODE" in
|
|
1704
|
+
all) return 0 ;;
|
|
1705
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
1706
|
+
off|*) return 1 ;;
|
|
1707
|
+
esac
|
|
1706
1708
|
}
|
|
1707
1709
|
|
|
1708
1710
|
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
@@ -1744,13 +1746,7 @@ run_consensus_verification() {
|
|
|
1744
1746
|
fi
|
|
1745
1747
|
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1746
1748
|
|
|
1747
|
-
#
|
|
1748
|
-
if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1749
|
-
log " Consensus fail-fast: claude=fail, skipping codex verifier"
|
|
1750
|
-
log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
|
|
1751
|
-
CODEX_VERDICT="skipped"
|
|
1752
|
-
return 2 # disagreement/fail signal
|
|
1753
|
-
fi
|
|
1749
|
+
# consensus-fail-fast removed (complexity vs value too low)
|
|
1754
1750
|
|
|
1755
1751
|
# Run codex verifier second
|
|
1756
1752
|
local _codex_t0=$(date +%s)
|
|
@@ -1887,12 +1883,10 @@ main() {
|
|
|
1887
1883
|
trap cleanup EXIT INT TERM
|
|
1888
1884
|
mkdir -p "$LOGS_DIR" "$RUNTIME_DIR" 2>/dev/null
|
|
1889
1885
|
|
|
1890
|
-
# --- Analytics directory: create
|
|
1891
|
-
|
|
1892
|
-
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
1893
|
-
fi
|
|
1886
|
+
# --- Analytics directory: always create (campaign.jsonl + metadata.json are always-on) ---
|
|
1887
|
+
mkdir -p "$ANALYTICS_DIR" 2>/dev/null
|
|
1894
1888
|
|
|
1895
|
-
# --- debug.log versioning (in analytics dir) ---
|
|
1889
|
+
# --- debug.log versioning (in analytics dir, --debug only) ---
|
|
1896
1890
|
if (( DEBUG )) && [[ -f "$DEBUG_LOG" ]]; then
|
|
1897
1891
|
local dbg_n=1
|
|
1898
1892
|
while [[ -f "${DEBUG_LOG%.log}-v${dbg_n}.log" ]]; do
|
|
@@ -1901,33 +1895,30 @@ main() {
|
|
|
1901
1895
|
mv "$DEBUG_LOG" "${DEBUG_LOG%.log}-v${dbg_n}.log"
|
|
1902
1896
|
fi
|
|
1903
1897
|
|
|
1904
|
-
# --- campaign.jsonl versioning (
|
|
1905
|
-
if
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
1912
|
-
fi
|
|
1898
|
+
# --- campaign.jsonl versioning (always-on) ---
|
|
1899
|
+
if [[ -f "$CAMPAIGN_JSONL" ]]; then
|
|
1900
|
+
local cj_n=1
|
|
1901
|
+
while [[ -f "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl" ]]; do
|
|
1902
|
+
(( cj_n++ ))
|
|
1903
|
+
done
|
|
1904
|
+
mv "$CAMPAIGN_JSONL" "${CAMPAIGN_JSONL%.jsonl}-v${cj_n}.jsonl"
|
|
1913
1905
|
fi
|
|
1914
1906
|
|
|
1915
|
-
# --- metadata.json: write at campaign start ---
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
fi
|
|
1907
|
+
# --- metadata.json: always write at campaign start (cross-project identification) ---
|
|
1908
|
+
jq -n \
|
|
1909
|
+
--arg slug "$SLUG" \
|
|
1910
|
+
--arg project_root "$ROOT" \
|
|
1911
|
+
--arg project_name "$(basename "$ROOT")" \
|
|
1912
|
+
--arg campaign_status "running" \
|
|
1913
|
+
--arg start_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
1914
|
+
--arg end_time "" \
|
|
1915
|
+
--arg worker_model "$WORKER_MODEL" \
|
|
1916
|
+
--arg verifier_model "$VERIFIER_MODEL" \
|
|
1917
|
+
--argjson debug "$DEBUG" \
|
|
1918
|
+
--argjson with_sv "$WITH_SELF_VERIFICATION" \
|
|
1919
|
+
--argjson consensus "$VERIFY_CONSENSUS" \
|
|
1920
|
+
'{slug: $slug, project_root: $project_root, project_name: $project_name, campaign_status: $campaign_status, start_time: $start_time, end_time: $end_time, worker_model: $worker_model, verifier_model: $verifier_model, debug: $debug, with_self_verification: $with_sv, consensus: $consensus}' \
|
|
1921
|
+
> "$METADATA_FILE"
|
|
1931
1922
|
|
|
1932
1923
|
# --- Startup ---
|
|
1933
1924
|
log "Ralph Desk Tmux Runner starting..."
|
|
@@ -1935,11 +1926,10 @@ main() {
|
|
|
1935
1926
|
log " Root: $ROOT"
|
|
1936
1927
|
log " Max iterations: $MAX_ITER"
|
|
1937
1928
|
log " Worker model: $WORKER_MODEL"
|
|
1938
|
-
log " Verifier model: $VERIFIER_MODEL"
|
|
1929
|
+
log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
|
|
1939
1930
|
log " Verify mode: $VERIFY_MODE"
|
|
1940
|
-
log "
|
|
1941
|
-
log "
|
|
1942
|
-
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
1931
|
+
log " Consensus mode: $CONSENSUS_MODE"
|
|
1932
|
+
log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
|
|
1943
1933
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
1944
1934
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
1945
1935
|
# --- Debug: Log execution plan ---
|
|
@@ -1955,7 +1945,7 @@ main() {
|
|
|
1955
1945
|
log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1956
1946
|
log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1957
1947
|
log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1958
|
-
log_debug "[OPTION] verify_mode=$VERIFY_MODE
|
|
1948
|
+
log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
|
|
1959
1949
|
log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
|
|
1960
1950
|
|
|
1961
1951
|
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
@@ -2065,7 +2055,7 @@ main() {
|
|
|
2065
2055
|
# Send C-c first (in case claude is mid-task), then /exit
|
|
2066
2056
|
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
2067
2057
|
sleep 1
|
|
2068
|
-
tmux send-keys -t "$WORKER_PANE" "/exit"
|
|
2058
|
+
tmux send-keys -t "$WORKER_PANE" "/exit" C-m 2>/dev/null
|
|
2069
2059
|
sleep 2
|
|
2070
2060
|
# Wait for shell prompt before proceeding
|
|
2071
2061
|
wait_for_pane_ready "$WORKER_PANE" 10 2>/dev/null || true
|
|
@@ -2222,7 +2212,7 @@ main() {
|
|
|
2222
2212
|
fi
|
|
2223
2213
|
fi
|
|
2224
2214
|
|
|
2225
|
-
# --- Consensus scope check (US-005: _should_use_consensus handles
|
|
2215
|
+
# --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
|
|
2226
2216
|
local use_consensus=0
|
|
2227
2217
|
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
2228
2218
|
|
|
@@ -2261,12 +2251,12 @@ main() {
|
|
|
2261
2251
|
log_debug "[GOV] iter=$ITERATION pane_dead=true pane_id=$VERIFIER_PANE cmd=$verifier_cmd action=reset_shell"
|
|
2262
2252
|
tmux send-keys -t "$VERIFIER_PANE" C-c C-u 2>/dev/null
|
|
2263
2253
|
sleep 0.2
|
|
2264
|
-
tmux send-keys -t "$VERIFIER_PANE" "clear"
|
|
2254
|
+
tmux send-keys -t "$VERIFIER_PANE" "clear" C-m 2>/dev/null
|
|
2265
2255
|
sleep 0.3
|
|
2266
2256
|
elif [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
|
|
2267
2257
|
tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null
|
|
2268
2258
|
sleep 0.5
|
|
2269
|
-
tmux send-keys -t "$VERIFIER_PANE" "/exit"
|
|
2259
|
+
tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null
|
|
2270
2260
|
sleep 2
|
|
2271
2261
|
fi
|
|
2272
2262
|
wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
|
|
@@ -2362,7 +2352,14 @@ main() {
|
|
|
2362
2352
|
fail)
|
|
2363
2353
|
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
2364
2354
|
(( CONSECUTIVE_FAILURES++ ))
|
|
2355
|
+
record_us_failure "${signal_us_id:-unknown}"
|
|
2365
2356
|
check_model_upgrade "${signal_us_id:-unknown}"
|
|
2357
|
+
|
|
2358
|
+
# Mid-CB warning: alert at halfway point (governance §8 early warning)
|
|
2359
|
+
if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
|
|
2360
|
+
log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
|
|
2361
|
+
log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
|
|
2362
|
+
fi
|
|
2366
2363
|
local verdict_summary_fail
|
|
2367
2364
|
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2368
2365
|
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
@@ -2498,8 +2495,36 @@ while (( _cli_i <= $# )); do
|
|
|
2498
2495
|
--lock-worker-model)
|
|
2499
2496
|
LOCK_WORKER_MODEL=1
|
|
2500
2497
|
;;
|
|
2498
|
+
--final-verifier-model)
|
|
2499
|
+
(( _cli_i++ ))
|
|
2500
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
|
|
2501
|
+
FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2502
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2503
|
+
FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2504
|
+
if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
|
|
2505
|
+
FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
|
|
2506
|
+
FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2507
|
+
fi
|
|
2508
|
+
;;
|
|
2509
|
+
--consensus)
|
|
2510
|
+
(( _cli_i++ ))
|
|
2511
|
+
CONSENSUS_MODE="${@[$_cli_i]:-off}"
|
|
2512
|
+
;;
|
|
2513
|
+
--consensus-model)
|
|
2514
|
+
(( _cli_i++ ))
|
|
2515
|
+
CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:medium}"
|
|
2516
|
+
;;
|
|
2517
|
+
--final-consensus-model)
|
|
2518
|
+
(( _cli_i++ ))
|
|
2519
|
+
FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:high}"
|
|
2520
|
+
;;
|
|
2501
2521
|
--final-consensus)
|
|
2502
|
-
|
|
2522
|
+
# Legacy: map to new --consensus final-only
|
|
2523
|
+
CONSENSUS_MODE="final-only"
|
|
2524
|
+
;;
|
|
2525
|
+
--verify-consensus)
|
|
2526
|
+
# Legacy: map to new --consensus all
|
|
2527
|
+
CONSENSUS_MODE="all"
|
|
2503
2528
|
;;
|
|
2504
2529
|
esac
|
|
2505
2530
|
(( _cli_i++ ))
|