@ai-dev-methodologies/rlp-desk 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -27
- package/docs/plans/frolicking-churning-honey.md +253 -0
- package/package.json +1 -1
- package/src/commands/rlp-desk.md +130 -109
- package/src/governance.md +74 -23
- package/src/scripts/lib_ralph_desk.zsh +41 -11
- package/src/scripts/run_ralph_desk.zsh +72 -42
|
@@ -95,12 +95,7 @@ get_next_model() {
|
|
|
95
95
|
gpt-5.3-codex-spark:medium) echo "gpt-5.3-codex-spark:high" ;;
|
|
96
96
|
gpt-5.3-codex-spark:high) echo "gpt-5.3-codex-spark:xhigh" ;;
|
|
97
97
|
gpt-5.3-codex-spark:xhigh) echo "" ;; # spark ceiling (full name)
|
|
98
|
-
# Codex
|
|
99
|
-
gpt-5.3-codex:low) echo "gpt-5.3-codex:medium" ;;
|
|
100
|
-
gpt-5.3-codex:medium) echo "gpt-5.3-codex:high" ;;
|
|
101
|
-
gpt-5.3-codex:high) echo "gpt-5.3-codex:xhigh" ;;
|
|
102
|
-
gpt-5.3-codex:xhigh) echo "" ;; # codex ceiling
|
|
103
|
-
# Codex Non-Pro / upper path
|
|
98
|
+
# Codex Non-Pro upgrade path
|
|
104
99
|
gpt-5.4:low) echo "gpt-5.4:medium" ;;
|
|
105
100
|
gpt-5.4:medium) echo "gpt-5.4:high" ;;
|
|
106
101
|
gpt-5.4:high) echo "gpt-5.4:xhigh" ;;
|
|
@@ -160,6 +155,7 @@ check_model_upgrade() {
|
|
|
160
155
|
fi
|
|
161
156
|
|
|
162
157
|
log " Worker model upgraded: ${_ORIGINAL_WORKER_MODEL} → ${WORKER_MODEL} (same-US consecutive fail threshold)"
|
|
158
|
+
log " [WARN] Same AC failing repeatedly — consider IL-2 re-assessment of AC quality (spec quality check)"
|
|
163
159
|
log_debug "[DECIDE] iter=${ITERATION:-0} phase=model_select model_upgrade=true reason=consecutive_same_ac_fail from=${_ORIGINAL_WORKER_MODEL} to=${WORKER_MODEL}"
|
|
164
160
|
_SAME_US_FAIL_COUNT=0 # Reset counter after upgrade
|
|
165
161
|
fi
|
|
@@ -167,6 +163,26 @@ check_model_upgrade() {
|
|
|
167
163
|
return 0
|
|
168
164
|
}
|
|
169
165
|
|
|
166
|
+
# record_us_failure() — track per-US cumulative failure count (dual counter, Option D)
|
|
167
|
+
# Unlike CONSECUTIVE_FAILURES which resets on pass, US_FAIL_HISTORY persists across phases.
|
|
168
|
+
# This enables prior-failure warnings when a US that struggled in per-US mode fails again in final verify.
|
|
169
|
+
# Usage: record_us_failure <us_id>
|
|
170
|
+
record_us_failure() {
|
|
171
|
+
local us_id="$1"
|
|
172
|
+
[[ -z "$us_id" || "$us_id" = "unknown" ]] && return 0
|
|
173
|
+
|
|
174
|
+
local prev_count="${US_FAIL_HISTORY[$us_id]:-0}"
|
|
175
|
+
US_FAIL_HISTORY[$us_id]=$(( prev_count + 1 ))
|
|
176
|
+
|
|
177
|
+
# Prior-failure warning: if this US has failed before, it's showing fragility
|
|
178
|
+
if (( prev_count > 0 )); then
|
|
179
|
+
log " [WARN] US $us_id has prior failure history (${US_FAIL_HISTORY[$us_id]} total failures) — consider IL-2 AC quality re-assessment"
|
|
180
|
+
log_debug "[GOV] iter=${ITERATION:-0} us_prior_failures=$us_id count=${US_FAIL_HISTORY[$us_id]}"
|
|
181
|
+
fi
|
|
182
|
+
|
|
183
|
+
return 0
|
|
184
|
+
}
|
|
185
|
+
|
|
170
186
|
# --- governance.md s7: Atomic file writes (tmux pattern) ---
|
|
171
187
|
# All file writes by the Leader use tmp+mv to prevent corruption.
|
|
172
188
|
atomic_write() {
|
|
@@ -228,7 +244,7 @@ update_status() {
|
|
|
228
244
|
|
|
229
245
|
# Build consensus fields
|
|
230
246
|
local consensus_json=""
|
|
231
|
-
if [[ "$
|
|
247
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
232
248
|
consensus_json=',
|
|
233
249
|
"consensus_scope": "'"$CONSENSUS_SCOPE"'",
|
|
234
250
|
"consensus_round": '"$CONSENSUS_ROUND"',
|
|
@@ -251,7 +267,7 @@ update_status() {
|
|
|
251
267
|
"verifier_codex_model": "'"$VERIFIER_CODEX_MODEL"'",
|
|
252
268
|
"verifier_codex_reasoning": "'"$VERIFIER_CODEX_REASONING"'",
|
|
253
269
|
"verify_mode": "'"$VERIFY_MODE"'",
|
|
254
|
-
"
|
|
270
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'",
|
|
255
271
|
"last_result": "'"$last_result"'",
|
|
256
272
|
"consecutive_failures": '"$CONSECUTIVE_FAILURES"',
|
|
257
273
|
"verified_us": '"$verified_us_json"''"$consensus_json"',
|
|
@@ -366,6 +382,19 @@ write_campaign_jsonl() {
|
|
|
366
382
|
verifier_duration_s=$(( ${ITER_VERIFIER_END:-$(date +%s)} - ITER_VERIFIER_START ))
|
|
367
383
|
fi
|
|
368
384
|
|
|
385
|
+
# Build us_fail_history JSON object from associative array
|
|
386
|
+
local us_fail_history_json="{}"
|
|
387
|
+
if (( ${#US_FAIL_HISTORY[@]} > 0 )); then
|
|
388
|
+
us_fail_history_json="{"
|
|
389
|
+
local first=1
|
|
390
|
+
for key in "${(@k)US_FAIL_HISTORY}"; do
|
|
391
|
+
(( first )) || us_fail_history_json+=","
|
|
392
|
+
us_fail_history_json+="\"$key\":${US_FAIL_HISTORY[$key]}"
|
|
393
|
+
first=0
|
|
394
|
+
done
|
|
395
|
+
us_fail_history_json+="}"
|
|
396
|
+
fi
|
|
397
|
+
|
|
369
398
|
jq -nc \
|
|
370
399
|
--argjson iter "$iter" \
|
|
371
400
|
--arg us_id "$us_id" \
|
|
@@ -374,15 +403,16 @@ write_campaign_jsonl() {
|
|
|
374
403
|
--arg verifier_engine "$VERIFIER_ENGINE" \
|
|
375
404
|
--arg claude_verdict "${CLAUDE_VERDICT:-$verdict}" \
|
|
376
405
|
--arg codex_verdict "${CODEX_VERDICT:-N/A}" \
|
|
377
|
-
--
|
|
406
|
+
--arg consensus_mode "$CONSENSUS_MODE" \
|
|
378
407
|
--argjson consecutive_failures "$CONSECUTIVE_FAILURES" \
|
|
379
408
|
--argjson model_upgraded "${_MODEL_UPGRADED:-0}" \
|
|
409
|
+
--argjson us_fail_history "$us_fail_history_json" \
|
|
380
410
|
--argjson duration_worker_s "$worker_duration_s" \
|
|
381
411
|
--argjson duration_verifier_s "$verifier_duration_s" \
|
|
382
412
|
--arg project_root "$ROOT" \
|
|
383
413
|
--arg slug "$SLUG" \
|
|
384
414
|
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
385
|
-
'{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict,
|
|
415
|
+
'{iter: $iter, us_id: $us_id, worker_model: $worker_model, worker_engine: $worker_engine, verifier_engine: $verifier_engine, claude_verdict: $claude_verdict, codex_verdict: $codex_verdict, consensus_mode: $consensus_mode, consecutive_failures: $consecutive_failures, model_upgraded: $model_upgraded, us_fail_history: $us_fail_history, duration_worker_s: $duration_worker_s, duration_verifier_s: $duration_verifier_s, project_root: $project_root, slug: $slug, timestamp: $timestamp}' \
|
|
386
416
|
>> "$CAMPAIGN_JSONL"
|
|
387
417
|
}
|
|
388
418
|
|
|
@@ -461,7 +491,7 @@ ${untracked}"
|
|
|
461
491
|
echo "- Elapsed: ${elapsed}s"
|
|
462
492
|
echo "- Worker model: $WORKER_MODEL ($WORKER_ENGINE)"
|
|
463
493
|
echo "- Verifier model: $VERIFIER_MODEL ($VERIFIER_ENGINE)"
|
|
464
|
-
echo "- Consensus:
|
|
494
|
+
echo "- Consensus: mode=$CONSENSUS_MODE model=$CONSENSUS_MODEL final_model=$FINAL_CONSENSUS_MODEL"
|
|
465
495
|
echo ""
|
|
466
496
|
echo "## US Status"
|
|
467
497
|
echo "- Verified: ${VERIFIED_US:-none}"
|
|
@@ -47,8 +47,9 @@ set -uo pipefail
|
|
|
47
47
|
SLUG="${LOOP_NAME:?ERROR: LOOP_NAME is required. Set it to the campaign slug.}"
|
|
48
48
|
ROOT="${ROOT:-$PWD}"
|
|
49
49
|
MAX_ITER="${MAX_ITER:-20}"
|
|
50
|
-
WORKER_MODEL="${WORKER_MODEL:-
|
|
51
|
-
VERIFIER_MODEL="${VERIFIER_MODEL:-
|
|
50
|
+
WORKER_MODEL="${WORKER_MODEL:-haiku}"
|
|
51
|
+
VERIFIER_MODEL="${VERIFIER_MODEL:-sonnet}"
|
|
52
|
+
FINAL_VERIFIER_MODEL="${FINAL_VERIFIER_MODEL:-opus}"
|
|
52
53
|
POLL_INTERVAL="${POLL_INTERVAL:-5}"
|
|
53
54
|
ITER_TIMEOUT="${ITER_TIMEOUT:-600}"
|
|
54
55
|
HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
|
|
@@ -60,6 +61,7 @@ WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
|
|
|
60
61
|
# --- Engine Selection ---
|
|
61
62
|
WORKER_ENGINE="${WORKER_ENGINE:-claude}" # claude|codex
|
|
62
63
|
VERIFIER_ENGINE="${VERIFIER_ENGINE:-claude}" # claude|codex
|
|
64
|
+
FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}" # claude|codex (derived from FINAL_VERIFIER_MODEL)
|
|
63
65
|
WORKER_CODEX_MODEL="${WORKER_CODEX_MODEL:-gpt-5.4}"
|
|
64
66
|
WORKER_CODEX_REASONING="${WORKER_CODEX_REASONING:-high}" # low|medium|high
|
|
65
67
|
VERIFIER_CODEX_MODEL="${VERIFIER_CODEX_MODEL:-gpt-5.4}"
|
|
@@ -68,13 +70,19 @@ CODEX_BIN="" # resolved by check_dependencies when engine=codex
|
|
|
68
70
|
|
|
69
71
|
# --- Verify Mode ---
|
|
70
72
|
VERIFY_MODE="${VERIFY_MODE:-per-us}" # per-us|batch
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# Effective CB threshold: doubled when consensus mode active (AC2 auto-double)
|
|
73
|
+
# Consensus: off|all|final-only (replaces VERIFY_CONSENSUS + FINAL_CONSENSUS + CONSENSUS_SCOPE)
|
|
74
|
+
CONSENSUS_MODE="${CONSENSUS_MODE:-off}" # off|all|final-only
|
|
75
|
+
CONSENSUS_MODEL="${CONSENSUS_MODEL:-gpt-5.4:medium}" # per-US cross-verifier (lighter)
|
|
76
|
+
FINAL_CONSENSUS_MODEL="${FINAL_CONSENSUS_MODEL:-gpt-5.4:high}" # final cross-verifier (stricter)
|
|
77
|
+
# Legacy compat: map old flags to CONSENSUS_MODE
|
|
77
78
|
if [[ "${VERIFY_CONSENSUS:-0}" = "1" ]]; then
|
|
79
|
+
CONSENSUS_MODE="${CONSENSUS_SCOPE:-all}"
|
|
80
|
+
elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
|
|
81
|
+
CONSENSUS_MODE="final-only"
|
|
82
|
+
fi
|
|
83
|
+
CB_THRESHOLD="${CB_THRESHOLD:-6}" # consecutive failures before BLOCKED (default: 6)
|
|
84
|
+
# Effective CB threshold: doubled when consensus mode active
|
|
85
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
78
86
|
EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
|
|
79
87
|
else
|
|
80
88
|
EFFECTIVE_CB_THRESHOLD=$CB_THRESHOLD
|
|
@@ -120,6 +128,7 @@ SESSION_NAME="rlp-desk-${SLUG}-${TIMESTAMP}"
|
|
|
120
128
|
typeset -A LAST_PANE_CONTENT
|
|
121
129
|
typeset -A PANE_IDLE_SINCE
|
|
122
130
|
typeset -A WORKER_RESTARTS
|
|
131
|
+
typeset -A US_FAIL_HISTORY
|
|
123
132
|
STALE_CONTEXT_COUNT=0
|
|
124
133
|
HEARTBEAT_STALE_COUNT=0
|
|
125
134
|
MONITOR_FAILURE_COUNT=0
|
|
@@ -455,7 +464,7 @@ check_dependencies() {
|
|
|
455
464
|
fi
|
|
456
465
|
|
|
457
466
|
# Codex binary required only when engine=codex or consensus verification is enabled
|
|
458
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$
|
|
467
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
459
468
|
if ! command -v codex >/dev/null 2>&1; then
|
|
460
469
|
log_error "codex CLI not found. Install: npm install -g @openai/codex"
|
|
461
470
|
missing=1
|
|
@@ -473,7 +482,7 @@ check_dependencies() {
|
|
|
473
482
|
fi
|
|
474
483
|
|
|
475
484
|
# Resolve codex binary if needed
|
|
476
|
-
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$
|
|
485
|
+
if [[ "$WORKER_ENGINE" = "codex" || "$VERIFIER_ENGINE" = "codex" || "$CONSENSUS_MODE" != "off" ]]; then
|
|
477
486
|
CODEX_BIN=$(command -v codex 2>/dev/null || echo "codex")
|
|
478
487
|
log " Codex binary: $CODEX_BIN"
|
|
479
488
|
fi
|
|
@@ -531,7 +540,7 @@ create_session() {
|
|
|
531
540
|
# Set pane titles and enable border labels for visual distinction
|
|
532
541
|
local worker_label="Worker ($WORKER_ENGINE:$WORKER_MODEL)"
|
|
533
542
|
local verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL)"
|
|
534
|
-
[[ "$
|
|
543
|
+
[[ "$CONSENSUS_MODE" != "off" ]] && verifier_label="Verifier ($VERIFIER_ENGINE:$VERIFIER_MODEL + consensus)"
|
|
535
544
|
tmux select-pane -t "$LEADER_PANE" -T "Leader" 2>/dev/null
|
|
536
545
|
tmux select-pane -t "$WORKER_PANE" -T "$worker_label" 2>/dev/null
|
|
537
546
|
tmux select-pane -t "$VERIFIER_PANE" -T "$verifier_label" 2>/dev/null
|
|
@@ -585,8 +594,7 @@ create_session() {
|
|
|
585
594
|
},
|
|
586
595
|
"verification": {
|
|
587
596
|
"verify_mode": "'"$VERIFY_MODE"'",
|
|
588
|
-
"
|
|
589
|
-
"consensus_scope": "'"$CONSENSUS_SCOPE"'"
|
|
597
|
+
"consensus_mode": "'"$CONSENSUS_MODE"'"
|
|
590
598
|
},
|
|
591
599
|
"config": {
|
|
592
600
|
"max_iter": '"$MAX_ITER"',
|
|
@@ -1284,11 +1292,11 @@ cleanup() {
|
|
|
1284
1292
|
fi
|
|
1285
1293
|
|
|
1286
1294
|
# 3. Consensus: were both engines used?
|
|
1287
|
-
if [[ "$
|
|
1295
|
+
if [[ "$CONSENSUS_MODE" != "off" ]]; then
|
|
1288
1296
|
if [[ -n "${CLAUDE_VERDICT:-}" && -n "${CODEX_VERDICT:-}" ]]; then
|
|
1289
|
-
log_debug "[FLOW] consensus=USED claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1297
|
+
log_debug "[FLOW] consensus=USED mode=$CONSENSUS_MODE claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT rounds=$CONSENSUS_ROUND"
|
|
1290
1298
|
else
|
|
1291
|
-
log_debug "[FLOW] consensus=NOT_TRIGGERED claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1299
|
+
log_debug "[FLOW] consensus=NOT_TRIGGERED mode=$CONSENSUS_MODE claude=${CLAUDE_VERDICT:-none} codex=${CODEX_VERDICT:-none}"
|
|
1292
1300
|
fi
|
|
1293
1301
|
fi
|
|
1294
1302
|
|
|
@@ -1689,20 +1697,14 @@ run_sequential_final_verify() {
|
|
|
1689
1697
|
|
|
1690
1698
|
# --- US-005: Determine whether consensus verification should run for this signal ---
|
|
1691
1699
|
# Returns 0 (use consensus) or 1 (single engine).
|
|
1692
|
-
#
|
|
1693
|
-
# FINAL_CONSENSUS independently enables consensus for the final ALL verify only.
|
|
1700
|
+
# Uses unified CONSENSUS_MODE: off|all|final-only
|
|
1694
1701
|
_should_use_consensus() {
|
|
1695
1702
|
local signal_us_id="${1:-}"
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
fi
|
|
1702
|
-
if [[ "$FINAL_CONSENSUS" = "1" && "$signal_us_id" == "ALL" ]]; then
|
|
1703
|
-
return 0
|
|
1704
|
-
fi
|
|
1705
|
-
return 1
|
|
1703
|
+
case "$CONSENSUS_MODE" in
|
|
1704
|
+
all) return 0 ;;
|
|
1705
|
+
final-only) [[ "$signal_us_id" == "ALL" ]] && return 0 ;;
|
|
1706
|
+
off|*) return 1 ;;
|
|
1707
|
+
esac
|
|
1706
1708
|
}
|
|
1707
1709
|
|
|
1708
1710
|
# --- US-004: Run consensus verification (claude + codex sequentially) ---
|
|
@@ -1744,13 +1746,7 @@ run_consensus_verification() {
|
|
|
1744
1746
|
fi
|
|
1745
1747
|
log_debug "[GOV] iter=$iter phase=consensus_claude verdict=$CLAUDE_VERDICT model=$VERIFIER_MODEL"
|
|
1746
1748
|
|
|
1747
|
-
#
|
|
1748
|
-
if [[ "$CONSENSUS_FAIL_FAST" = "1" && "$CLAUDE_VERDICT" = "fail" ]]; then
|
|
1749
|
-
log " Consensus fail-fast: claude=fail, skipping codex verifier"
|
|
1750
|
-
log_debug "[GOV] iter=$iter phase=consensus_fail_fast claude=fail codex=skipped"
|
|
1751
|
-
CODEX_VERDICT="skipped"
|
|
1752
|
-
return 2 # disagreement/fail signal
|
|
1753
|
-
fi
|
|
1749
|
+
# consensus-fail-fast removed (complexity vs value too low)
|
|
1754
1750
|
|
|
1755
1751
|
# Run codex verifier second
|
|
1756
1752
|
local _codex_t0=$(date +%s)
|
|
@@ -1930,11 +1926,10 @@ main() {
|
|
|
1930
1926
|
log " Root: $ROOT"
|
|
1931
1927
|
log " Max iterations: $MAX_ITER"
|
|
1932
1928
|
log " Worker model: $WORKER_MODEL"
|
|
1933
|
-
log " Verifier model: $VERIFIER_MODEL"
|
|
1929
|
+
log " Verifier model: $VERIFIER_MODEL (per-US) / $FINAL_VERIFIER_MODEL (final)"
|
|
1934
1930
|
log " Verify mode: $VERIFY_MODE"
|
|
1935
|
-
log "
|
|
1936
|
-
log "
|
|
1937
|
-
log " Consensus scope: $CONSENSUS_SCOPE"
|
|
1931
|
+
log " Consensus mode: $CONSENSUS_MODE"
|
|
1932
|
+
log " Consensus model: $CONSENSUS_MODEL (per-US) / $FINAL_CONSENSUS_MODEL (final)"
|
|
1938
1933
|
log " Poll interval: ${POLL_INTERVAL}s"
|
|
1939
1934
|
log " Iter timeout: ${ITER_TIMEOUT}s"
|
|
1940
1935
|
# --- Debug: Log execution plan ---
|
|
@@ -1950,7 +1945,7 @@ main() {
|
|
|
1950
1945
|
log_debug "[OPTION] slug=$SLUG us_count=$us_count us_list=$us_list"
|
|
1951
1946
|
log_debug "[OPTION] worker_engine=$WORKER_ENGINE worker_model=$WORKER_MODEL"
|
|
1952
1947
|
log_debug "[OPTION] verifier_engine=$VERIFIER_ENGINE verifier_model=$VERIFIER_MODEL"
|
|
1953
|
-
log_debug "[OPTION] verify_mode=$VERIFY_MODE
|
|
1948
|
+
log_debug "[OPTION] verify_mode=$VERIFY_MODE consensus_mode=$CONSENSUS_MODE max_iter=$MAX_ITER"
|
|
1954
1949
|
log_debug "[OPTION] cb_threshold=$CB_THRESHOLD effective_cb_threshold=$EFFECTIVE_CB_THRESHOLD iter_timeout=$ITER_TIMEOUT with_self_verification=$WITH_SELF_VERIFICATION debug=$DEBUG"
|
|
1955
1950
|
|
|
1956
1951
|
if [[ "$VERIFY_MODE" = "per-us" ]]; then
|
|
@@ -2217,7 +2212,7 @@ main() {
|
|
|
2217
2212
|
fi
|
|
2218
2213
|
fi
|
|
2219
2214
|
|
|
2220
|
-
# --- Consensus scope check (US-005: _should_use_consensus handles
|
|
2215
|
+
# --- Consensus scope check (US-005: _should_use_consensus handles CONSENSUS_MODE) ---
|
|
2221
2216
|
local use_consensus=0
|
|
2222
2217
|
_should_use_consensus "$signal_us_id" && use_consensus=1
|
|
2223
2218
|
|
|
@@ -2357,7 +2352,14 @@ main() {
|
|
|
2357
2352
|
fail)
|
|
2358
2353
|
# --- governance.md s7½: Fix Loop (adapted for tmux lean mode) ---
|
|
2359
2354
|
(( CONSECUTIVE_FAILURES++ ))
|
|
2355
|
+
record_us_failure "${signal_us_id:-unknown}"
|
|
2360
2356
|
check_model_upgrade "${signal_us_id:-unknown}"
|
|
2357
|
+
|
|
2358
|
+
# Mid-CB warning: alert at halfway point (governance §8 early warning)
|
|
2359
|
+
if (( CONSECUTIVE_FAILURES == EFFECTIVE_CB_THRESHOLD / 2 )); then
|
|
2360
|
+
log " [WARN] Mid-CB: $CONSECUTIVE_FAILURES/${EFFECTIVE_CB_THRESHOLD} consecutive failures — consider reviewing AC quality"
|
|
2361
|
+
log_debug "[GOV] iter=$ITERATION mid_cb_warning=true consecutive_failures=$CONSECUTIVE_FAILURES threshold=$EFFECTIVE_CB_THRESHOLD"
|
|
2362
|
+
fi
|
|
2361
2363
|
local verdict_summary_fail
|
|
2362
2364
|
verdict_summary_fail=$(jq -r '.summary // "no summary"' "$VERDICT_FILE" 2>/dev/null)
|
|
2363
2365
|
log " Verifier FAILED (consecutive: $CONSECUTIVE_FAILURES). Building fix contract..."
|
|
@@ -2493,8 +2495,36 @@ while (( _cli_i <= $# )); do
|
|
|
2493
2495
|
--lock-worker-model)
|
|
2494
2496
|
LOCK_WORKER_MODEL=1
|
|
2495
2497
|
;;
|
|
2498
|
+
--final-verifier-model)
|
|
2499
|
+
(( _cli_i++ ))
|
|
2500
|
+
_cli_parsed=$(parse_model_flag "${@[$_cli_i]:-}" "final-verifier") || exit 1
|
|
2501
|
+
FINAL_VERIFIER_ENGINE="${_cli_parsed%% *}"
|
|
2502
|
+
_cli_rest="${_cli_parsed#* }"
|
|
2503
|
+
FINAL_VERIFIER_MODEL="${_cli_rest%% *}"
|
|
2504
|
+
if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
|
|
2505
|
+
FINAL_VERIFIER_CODEX_MODEL="$FINAL_VERIFIER_MODEL"
|
|
2506
|
+
FINAL_VERIFIER_CODEX_REASONING="${_cli_rest##* }"
|
|
2507
|
+
fi
|
|
2508
|
+
;;
|
|
2509
|
+
--consensus)
|
|
2510
|
+
(( _cli_i++ ))
|
|
2511
|
+
CONSENSUS_MODE="${@[$_cli_i]:-off}"
|
|
2512
|
+
;;
|
|
2513
|
+
--consensus-model)
|
|
2514
|
+
(( _cli_i++ ))
|
|
2515
|
+
CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:medium}"
|
|
2516
|
+
;;
|
|
2517
|
+
--final-consensus-model)
|
|
2518
|
+
(( _cli_i++ ))
|
|
2519
|
+
FINAL_CONSENSUS_MODEL="${@[$_cli_i]:-gpt-5.4:high}"
|
|
2520
|
+
;;
|
|
2496
2521
|
--final-consensus)
|
|
2497
|
-
|
|
2522
|
+
# Legacy: map to new --consensus final-only
|
|
2523
|
+
CONSENSUS_MODE="final-only"
|
|
2524
|
+
;;
|
|
2525
|
+
--verify-consensus)
|
|
2526
|
+
# Legacy: map to new --consensus all
|
|
2527
|
+
CONSENSUS_MODE="all"
|
|
2498
2528
|
;;
|
|
2499
2529
|
esac
|
|
2500
2530
|
(( _cli_i++ ))
|