npm - @ai-dev-methodologies/rlp-desk - Versions diffs - 0.18.0 → 0.18.2 - Mend

@ai-dev-methodologies/rlp-desk 0.18.0 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/CHANGELOG.md +34 -0
package/package.json +1 -1
package/src/scripts/run_ralph_desk.zsh +257 -83

package/CHANGELOG.md CHANGED Viewed

@@ -11,6 +11,40 @@ For pre-v0.15.4 versions, refer to `git log` and individual GitHub release notes
 - Post-v0.15.6: remove `RLP_LIFECYCLE_METRICS` flag entirely (per plan v3 ADR follow-ups).
 - Phase D.1 (handoff documents) + Phase D.2 (per-stage agent role specialization) — both deferred per `docs/plans/v0.15.4-release-runbook.md` §7.6.
+## [0.18.2] — 2026-06-25
+Five leader-resilience fixes (per-us, consensus, and config-validation paths), each
+validated by a real-LLM dogfood. Consensus is opt-in; default per-us campaigns benefit
+from the final-verify and config-validation fixes.
+### Fixed
+- Final verification is resilient to verifier non-determinism: a user story that already
+  passed its per-US check is re-verified (up to a small bound) on a fail verdict, so a
+  flaky false-fail must reproduce before it charges a fix-loop failure. A single
+  non-deterministic false-fail can no longer block a complete, correct campaign.
+- The `claude` rate-limit banner ("API Error: … temporarily limiting requests … · Rate
+  limited") is now recognized as a transient API condition and routed to the bounded
+  backoff, instead of being misclassified as a frozen-pane deadlock.
+- Numeric configuration knobs (e.g. `--max-iter`, `--cb-threshold`, timeouts) are now
+  validated: a non-integer or out-of-range value falls back to its default with a warning,
+  instead of silently mis-evaluating (a bad `--max-iter` previously could make a campaign
+  run zero iterations).
+- Leader auto-commit recovery no longer blocks a campaign whose work the worker already
+  committed (a commit-timing race previously surfaced as "nothing to commit" → blocked).
+- `--consensus final-only` now actually runs at the final verification in the default
+  per-us verify mode (it was previously bypassed by the sequential final-verify path, so
+  the recommended consensus configuration was a silent no-op in the default mode).
+## [0.18.1] — 2026-06-25
+### Fixed
+- End-of-campaign resilience: when the last user story passes its per-US verify, the
+  leader now runs the final verification directly instead of dispatching one more
+  worker iteration solely to hand off to the final verify. That extra round-trip was a
+  fragile dependency on a healthy worker at the very end of a campaign (a worker stall
+  or API rate-limit at that moment could block a campaign whose work was already
+  complete and verified). Per-US campaigns now finalize without it.
 ## [0.18.0] — 2026-06-24
 **Leader hardening: campaigns that complete.** The `--mode tmux` leader's decision

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ai-dev-methodologies/rlp-desk",
-  "version": "0.18.0",
+  "version": "0.18.2",
   "description": "Fresh-context iterative loops for Claude Code — autonomous task completion with independent verification",
   "scripts": {
     "postinstall": "node scripts/postinstall.js",

package/src/scripts/run_ralph_desk.zsh CHANGED Viewed

@@ -3,6 +3,31 @@ set -uo pipefail
 # NOTE: We use set -u (undefined var check) and pipefail, but NOT set -e
 # because the main loop uses explicit error checks throughout.
+# D-19: validate an env-overridable INTEGER knob. A non-integer value (operator
+# typo, or a bad CLI arg like `--max-iter abc` threaded into the env) otherwise
+# mis-evaluates under `set -u` inside (( )) arithmetic — e.g. a non-integer
+# MAX_ITER makes the main-loop bound error so the campaign silently runs ZERO
+# iterations; a non-integer CB_THRESHOLD breaks the circuit breaker. The `<->`
+# integer glob is checked FIRST (and short-circuits) so the arithmetic never runs
+# on a non-integer. A malformed / below-min / above-max value → the default.
+_validate_int_knob() {
+  local _name="$1" _default="$2" _min="${3:-0}" _max="${4:-0}"
+  local _val="${(P)_name}"
+  local _bad=0
+  if ! [[ "$_val" == <-> ]]; then
+    _bad=1
+  elif (( _val < _min )); then
+    _bad=1
+  elif (( _max > 0 && _val > _max )); then
+    _bad=1
+  fi
+  if (( _bad )); then
+    local _range="min=$_min"; (( _max > 0 )) && _range="$_range, max=$_max"
+    print -r -- "WARNING: $_name='$_val' is not a valid integer ($_range) — using default $_default" >&2
+    eval "$_name=$_default"
+  fi
+}
 # =============================================================================
 # Ralph Desk Tmux Runner
 #
@@ -56,6 +81,14 @@ HEARTBEAT_STALE_THRESHOLD="${HEARTBEAT_STALE_THRESHOLD:-120}"
 MAX_RESTARTS="${MAX_RESTARTS:-3}"
 IDLE_NUDGE_THRESHOLD="${IDLE_NUDGE_THRESHOLD:-30}"
 MAX_NUDGES="${MAX_NUDGES:-3}"
+# D-19: validate the numeric knobs above (set -u + (( )) arithmetic safety).
+_validate_int_knob MAX_ITER 20 1
+_validate_int_knob POLL_INTERVAL 5 1
+_validate_int_knob ITER_TIMEOUT 600 1
+_validate_int_knob HEARTBEAT_STALE_THRESHOLD 120 1
+_validate_int_knob MAX_RESTARTS 3 0
+_validate_int_knob IDLE_NUDGE_THRESHOLD 30 1
+_validate_int_knob MAX_NUDGES 3 0
 WITH_SELF_VERIFICATION="${WITH_SELF_VERIFICATION:-0}"
 WITH_SELF_VERIFICATION_REQUESTED="$WITH_SELF_VERIFICATION"  # preserves original user intent for traceability (governance §1f)
 SV_SKIPPED_REASON=""                                         # set when SV is disabled despite user request
@@ -88,6 +121,7 @@ TEST_DENSITY_MODE="${TEST_DENSITY_MODE:-warn}"
 # .sisyphus/mission-abort.json and exits non-zero so contract defects don't
 # silently loop. infra_failure category and the very first iteration are exempt.
 BLOCK_CB_THRESHOLD="${BLOCK_CB_THRESHOLD:-3}"
+_validate_int_knob BLOCK_CB_THRESHOLD 3 1   # D-19
 CONSECUTIVE_BLOCKS=0
 LAST_BLOCK_REASON=""
@@ -229,6 +263,18 @@ FINAL_VERIFIER_ENGINE="${FINAL_VERIFIER_ENGINE:-claude}"
 WORKER_EFFORT="${WORKER_EFFORT:-}"
 VERIFIER_EFFORT="${VERIFIER_EFFORT:-}"
 FINAL_VERIFIER_EFFORT="${FINAL_VERIFIER_EFFORT:-}"
+# D-18: max final-verify attempts for a US that ALREADY passed per-US. A verifier
+# fail verdict on already-per-US-passed work must REPRODUCE across all attempts
+# (first pass wins) before it charges a fix-loop failure — guards against verifier
+# non-determinism defeating a complete, correct campaign. A genuinely-regressed US
+# (or one never per-US-passed) still fails on the first attempt.
+FINAL_VERIFY_MAX_ATTEMPTS="${FINAL_VERIFY_MAX_ATTEMPTS:-3}"
+# D-18/D-19: a non-integer value ("abc") would mis-evaluate under set -u in the
+# (( )) retry-loop arithmetic — skipping the loop and silently FALSE-FAILING a US
+# — and an unbounded value would be ruinously expensive. Validate to an integer
+# in 1..10 via the shared _validate_int_knob helper (D-19 generalized this
+# per-knob fix into one validator used by every numeric knob).
+_validate_int_knob FINAL_VERIFY_MAX_ATTEMPTS 3 1 10
 # Auto-detect engine from model format for env var path (CLI path uses parse_model_flag)
 _auto_detect_engine WORKER_MODEL WORKER_ENGINE WORKER_CODEX_MODEL WORKER_CODEX_REASONING WORKER_EFFORT
@@ -260,6 +306,7 @@ elif [[ "${FINAL_CONSENSUS:-0}" = "1" ]]; then
 fi
 CONSENSUS_SCOPE="${CONSENSUS_SCOPE:-${CONSENSUS_MODE}}"
 CB_THRESHOLD="${CB_THRESHOLD:-6}"           # consecutive failures before BLOCKED (default: 6)
+_validate_int_knob CB_THRESHOLD 6 1   # D-19: must be valid before the (( *2 )) below
 # Effective CB threshold: doubled when consensus mode active
 if [[ "$CONSENSUS_MODE" != "off" ]]; then
   EFFECTIVE_CB_THRESHOLD=$(( CB_THRESHOLD * 2 ))
@@ -268,6 +315,8 @@ else
 fi
 _API_MAX_RETRIES="${_API_MAX_RETRIES:-5}"
 _API_RETRY_INTERVAL_S="${_API_RETRY_INTERVAL_S:-30}"
+_validate_int_knob _API_MAX_RETRIES 5 1        # D-19
+_validate_int_knob _API_RETRY_INTERVAL_S 30 1  # D-19
 # --- Derived Paths ---
 DESK="$ROOT/${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
@@ -350,6 +399,9 @@ BASELINE_COMMIT=""       # git HEAD at campaign start (captured before loop)
 CAMPAIGN_REPORT_GENERATED=0  # guard against double-generation in cleanup trap
 SV_REPORT_GENERATED=0       # guard against double-generation in generate_sv_report
 VERIFIED_US=""           # comma-separated list of verified US IDs (per-us mode)
+_FINALIZE_PENDING=0      # D-16: armed when the last per-US pass completes coverage;
+                         # the next loop top synthesizes an ALL verify signal and
+                         # skips the (fragile) worker round-trip to emit it.
 CONSENSUS_ROUND=0        # current consensus round for current US
 US_LIST=""               # comma-separated US IDs from PRD (per-us mode)
 LOCKFILE_ACQUIRED=0
@@ -844,7 +896,25 @@ _bug8_check_synth_allowed() {
       log "  Bug #8 F-8 recovery: done-claim + Worker's uncommitted tracked changes — auto-committing $us_id work (files: $_bug8_first5)."
       log_debug "[GOV] iter=$iter bug8=recover_autocommit us_id=$us_id files='$_bug8_first5'"
       local -a _bug8_add=("${(@f)_bug8_worker_files}")
-      if git -C "$ROOT" add -- "${_bug8_add[@]}" && git -C "$ROOT" commit -q -m "chore(leader-recovery): commit Worker's uncommitted $us_id changes (Bug #8 F-8)"; then
+      # D-20 (codex LOW): fail-safe on an empty file list. The upstream
+      # `[[ -z "$_bug8_worker_files" ]]` guard already makes this unreachable, but
+      # never let an empty array turn `git diff --quiet HEAD --` into a whole-tree
+      # check (which could falsely read "already committed" → false PASS). BLOCK.
+      if (( ${#_bug8_add} == 0 )); then
+        log_error "  Bug #8: empty worker-file list at auto-commit (unexpected) — refusing synthesis."
+        write_blocked_sentinel "worker_incomplete_uncommitted: empty file list at auto-commit" "$us_id" "metric_failure"
+        return 1
+      fi
+      if git -C "$ROOT" diff --quiet HEAD -- "${_bug8_add[@]}" 2>/dev/null; then
+        # D-20: the Worker committed these files itself in the window between the
+        # dirty-detection above and now (a reap/commit race) — the working tree is
+        # already clean vs HEAD for them, i.e. the work IS committed. The old code
+        # ran `git add … && git commit`, which exited non-zero ("nothing to commit")
+        # and BLOCKED a correct, fully-committed campaign. Treat "already committed"
+        # as success: proceed to synthesis (the Verifier still gates correctness).
+        log "  Bug #8 F-8 (D-20): Worker files already committed (commit race) — nothing to auto-commit; proceeding."
+        log_debug "[GOV] iter=$iter bug8=autocommit_noop_already_committed us_id=$us_id files='$_bug8_first5'"
+      elif git -C "$ROOT" add -- "${_bug8_add[@]}" && git -C "$ROOT" commit -q -m "chore(leader-recovery): commit Worker's uncommitted $us_id changes (Bug #8 F-8)"; then
         log "  Leader-recovery auto-commit OK (Worker files only) — Verifier will gate correctness."
       else
         log_error "  Bug #8: leader-recovery auto-commit failed. Refusing synthesis. files: $_bug8_first5"
@@ -1431,6 +1501,8 @@ typeset -gA PANE_PROMPT_STUCK_SINCE
 typeset -gA PANE_DISMISS_FAILED_COUNT
 PROMPT_STALL_TIMEOUT="${PROMPT_STALL_TIMEOUT:-300}"  # 5 min default
 PROMPT_DISMISS_FAIL_LIMIT="${PROMPT_DISMISS_FAIL_LIMIT:-20}"  # ~100s of fruitless dismiss attempts
+_validate_int_knob PROMPT_STALL_TIMEOUT 300 1      # D-19
+_validate_int_knob PROMPT_DISMISS_FAIL_LIMIT 20 1  # D-19
 # v5.7 §4.17: generic no-progress timeout (codex Critic HIGH — closes the gap
 # where an undetected prompt or alive-but-frozen Worker bypasses Layer 4).
@@ -1438,6 +1510,7 @@ PROMPT_DISMISS_FAIL_LIMIT="${PROMPT_DISMISS_FAIL_LIMIT:-20}"  # ~100s of fruitle
 # seconds AND signal file still missing, write BLOCKED `infra_failure` reason
 # `worker_no_progress` so silent infinite-wait is impossible.
 PROGRESS_NO_CHANGE_TIMEOUT="${PROGRESS_NO_CHANGE_TIMEOUT:-600}"  # 10 min default
+_validate_int_knob PROGRESS_NO_CHANGE_TIMEOUT 600 1  # D-19
 typeset -gA PANE_LAST_CHANGE_TS  # epoch when content last changed
 typeset -gA PANE_LAST_CONTENT_FOR_PROGRESS  # captured content for diff
@@ -1446,6 +1519,7 @@ typeset -gA PANE_LAST_CONTENT_FOR_PROGRESS  # captured content for diff
 # CODEX_IDLE_GRACE_S (default 120s) before BLOCK. Per-pane bookkeeping to
 # avoid granting it repeatedly. Bug Report #3 (BOS 2026-05-04).
 CODEX_IDLE_GRACE_S="${CODEX_IDLE_GRACE_S:-120}"
+_validate_int_knob CODEX_IDLE_GRACE_S 120 1  # D-19
 typeset -gA PANE_CODEX_IDLE_GRACED
 # v0.14.2: per-verifier-pane trace flag — log the verdict-lookup outcome
 # exactly once per byte-stasis transition. Bug Report #4 (BOS 2026-05-05).
@@ -2517,9 +2591,22 @@ poll_for_signal() {
     if [[ -n "$pane_output_for_retry" ]] &&
        ( echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])500([^[:digit:]]|$)' \
       || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])529([^[:digit:]]|$)' \
+      || echo "$pane_output_for_retry" | grep -qiE '(^|[^[:digit:]])429([^[:digit:]]|$)' \
       || echo "$pane_output_for_retry" | grep -qi 'overloaded' \
       || echo "$pane_output_for_retry" | grep -qi 'too many requests' \
-      || echo "$pane_output_for_retry" | grep -qi 'service unavailable' ); then
+      || echo "$pane_output_for_retry" | grep -qi 'service unavailable' \
+      || echo "$pane_output_for_retry" | grep -qiE 'api error.*temporarily limiting requests' ); then
+      # D-17a: the last pattern catches the claude TUI rate-limit banner
+      # ("API Error: Server is temporarily limiting requests (not your usage
+      # limit) · Rate limited") that previously fell through to the 600s
+      # frozen-pane BLOCK with a misleading "deadlock" reason. It requires BOTH
+      # the "API Error" banner prefix AND the distinctive multi-word phrase
+      # "temporarily limiting requests" on the SAME line (codex MEDIUM): a Worker
+      # implementing a rate-limiter feature, quoting the phrase, or merely
+      # discussing API rate-limit handling does NOT false-trigger backoff — only
+      # the actual error banner does. Routes to the bounded API backoff below
+      # (5×30s) → recovers a transient limit, else BLOCKs as infra (recoverable),
+      # not as a misleading frozen-pane deadlock.
       is_api_text_retry=1
     fi
@@ -2818,6 +2905,105 @@ run_single_verifier() {
 # --- Sequential final verify: run per-US scoped verifiers instead of one big ALL verify ---
 # Returns 0 if all US pass + integration check pass, 1 if any US fails, 2 if integration fails.
 # Sets FAILED_US global on failure.
+# D-16: true when every US in US_LIST is already present in VERIFIED_US.
+# Used to arm leader-driven finalize after the last per-US pass.
+_all_us_verified() {
+  [[ -n "$US_LIST" ]] || return 1
+  local _us
+  for _us in $(echo "$US_LIST" | tr ',' ' '); do
+    echo ",$VERIFIED_US," | grep -q ",$_us," || return 1
+  done
+  return 0
+}
+# D-18 helper: one final-verify pass for a single US. Returns 0=pass verdict,
+# 1=fail verdict, 2=infra-terminal (launch/poll hard fail — sentinel handling
+# already done by the poll). Reads/updates globals (VERIFIER_PANE, FINAL_VERIFIER_*,
+# SIGNAL_FILE, VERDICT_FILE, SESSION_CONFIG). Extracted from run_sequential_final_verify
+# so the caller can re-verify a per-US-passed US on a flake without duplicating the
+# dispatch/poll logic. Does NOT set FAILED_US (the caller owns that).
+_final_verify_one_us() {
+  local us="$1" iter="$2"
+  # Temporarily override signal file to scope verifier to this US
+  local orig_signal
+  orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
+  echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
+  # Write scoped verifier trigger
+  write_verifier_trigger "$iter"
+  local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
+  # Clean verifier pane
+  local verifier_cmd
+  verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
+  if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
+    tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
+    tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
+  fi
+  wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
+  # Launch verifier. D-1: the FINAL (ALL) verify uses FINAL_VERIFIER_* (the
+  # "final 엄격" knob — a configured stronger model, e.g. opus, for the final
+  # gate), NOT the lighter per-US VERIFIER_*. This is the configured-final-model
+  # distinction, distinct from the removed per-iteration verifier auto-upgrade.
+  local verifier_launch
+  if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
+    verifier_launch="${CODEX_BIN:-codex} -m $FINAL_VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$FINAL_VERIFIER_CODEX_REASONING\" -c mcp_servers='{}' --disable plugins --dangerously-bypass-approvals-and-sandbox"
+    launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
+  else
+    verifier_launch="$(build_claude_cmd tui "$FINAL_VERIFIER_MODEL" "" "" "$FINAL_VERIFIER_EFFORT")"
+    launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
+      log_error "Failed to launch final verifier for $us"
+      return 2
+    }
+  fi
+  # Poll for verdict. D-4: distinguish rc==2 (hard-fail, sentinel already
+  # written → terminal) from rc==1 (transient pane race/timeout) and give ONE
+  # replace-pane + re-dispatch retry before failing the US — the F-10 retry
+  # parity the per-US main verifier site has but this final-verify path lacked
+  # (a single transient poll miss falsely failed a US at the most expensive
+  # end-of-campaign moment, charging a bogus consecutive failure).
+  rm -f "$VERDICT_FILE"
+  local poll_rc=0
+  poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
+  if (( poll_rc == 2 )); then
+    log_error "Verifier hard-fail (rc=2, sentinel written) for $us in final verify"
+    return 2
+  fi
+  if (( poll_rc == 1 )); then
+    log "  Verifier-final transient poll fail for $us — replacing pane + retrying once (D-4)"
+    replace_worker_pane "$VERIFIER_PANE" "verifier"
+    VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
+    if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
+      launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
+    else
+      launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || return 2
+    fi
+    rm -f "$VERDICT_FILE"; poll_rc=0
+    poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
+    if (( poll_rc != 0 )); then
+      log_error "Verifier poll failed for $us after replace+retry (rc=$poll_rc)"
+      return 2
+    fi
+  fi
+  # Bug #7 Fix-Q/R: reap verifier pane between per-US final verifications so
+  # the previous codex/claude TUI cannot continue running while the next per-
+  # US verifier dispatch reuses the same pane.
+  _kill_pane_process "$VERIFIER_PANE" "verifier-final"
+  _lock_sentinel "$VERDICT_FILE"
+  # PR-0b-narrow: stamp leader handshake ack on the verdict (audit-only).
+  _stamp_ack_field "$VERDICT_FILE"
+  # Read verdict
+  local verdict
+  verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
+  [[ "$verdict" == "pass" ]] && return 0
+  return 1
+}
 run_sequential_final_verify() {
   local iter="$1"
   FAILED_US=""
@@ -2828,91 +3014,38 @@ run_sequential_final_verify() {
   for us in $(echo "$US_LIST" | tr ',' ' '); do
     log "  Final verify: checking $us..."
-    # Temporarily override signal file to scope verifier to this US
-    local orig_signal
-    orig_signal=$(cat "$SIGNAL_FILE" 2>/dev/null)
-    echo "{\"status\":\"verify\",\"us_id\":\"$us\",\"summary\":\"sequential final verify\"}" | atomic_write "$SIGNAL_FILE"
-    # Write scoped verifier trigger
-    write_verifier_trigger "$iter"
-    local verifier_prompt="$LOGS_DIR/iter-$(printf '%03d' $iter).verifier-prompt.md"
-    # Clean verifier pane
-    local verifier_cmd
-    verifier_cmd=$(tmux display-message -p -t "$VERIFIER_PANE" '#{pane_current_command}' 2>/dev/null)
-    if [[ "$verifier_cmd" == "node" || "$verifier_cmd" == "claude" || "$verifier_cmd" == "codex" ]]; then
-      tmux send-keys -t "$VERIFIER_PANE" C-c 2>/dev/null; sleep 0.5
-      tmux send-keys -t "$VERIFIER_PANE" "/exit" C-m 2>/dev/null; sleep 2
-    fi
-    wait_for_pane_ready "$VERIFIER_PANE" 10 2>/dev/null || true
-    # Launch verifier. D-1: the FINAL (ALL) verify uses FINAL_VERIFIER_* (the
-    # "final 엄격" knob — a configured stronger model, e.g. opus, for the final
-    # gate), NOT the lighter per-US VERIFIER_*. This is the configured-final-model
-    # distinction, distinct from the removed per-iteration verifier auto-upgrade.
-    local verifier_launch
-    if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
-      verifier_launch="${CODEX_BIN:-codex} -m $FINAL_VERIFIER_CODEX_MODEL -c model_reasoning_effort=\"$FINAL_VERIFIER_CODEX_REASONING\" -c mcp_servers='{}' --disable plugins --dangerously-bypass-approvals-and-sandbox"
-      launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
-    else
-      verifier_launch="$(build_claude_cmd tui "$FINAL_VERIFIER_MODEL" "" "" "$FINAL_VERIFIER_EFFORT")"
-      launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || {
-        log_error "Failed to launch final verifier for $us"
-        FAILED_US="$us"
-        return 1
-      }
-    fi
-    # Poll for verdict. D-4: distinguish rc==2 (hard-fail, sentinel already
-    # written → terminal) from rc==1 (transient pane race/timeout) and give ONE
-    # replace-pane + re-dispatch retry before failing the US — the F-10 retry
-    # parity the per-US main verifier site has but this final-verify path lacked
-    # (a single transient poll miss falsely failed a US at the most expensive
-    # end-of-campaign moment, charging a bogus consecutive failure).
-    rm -f "$VERDICT_FILE"
-    local poll_rc=0
-    poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
-    if (( poll_rc == 2 )); then
-      log_error "Verifier hard-fail (rc=2, sentinel written) for $us in final verify"
-      FAILED_US="$us"
-      return 1
-    fi
-    if (( poll_rc == 1 )); then
-      log "  Verifier-final transient poll fail for $us — replacing pane + retrying once (D-4)"
-      replace_worker_pane "$VERIFIER_PANE" "verifier"
-      VERIFIER_PANE=$(jq -r '.panes.verifier' "$SESSION_CONFIG")
-      if [[ "$FINAL_VERIFIER_ENGINE" = "codex" ]]; then
-        launch_verifier_codex "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch"
-      else
-        launch_verifier_claude "$VERIFIER_PANE" "$verifier_prompt" "$iter" "$verifier_launch" || { FAILED_US="$us"; return 1; }
-      fi
-      rm -f "$VERDICT_FILE"; poll_rc=0
-      poll_for_signal "$VERDICT_FILE" "$VERIFIER_HEARTBEAT" "$VERIFIER_PANE" "$verifier_launch" "Verifier-final" || poll_rc=$?
-      if (( poll_rc != 0 )); then
-        log_error "Verifier poll failed for $us after replace+retry (rc=$poll_rc)"
+    # D-18: a US that already passed per-US gets up to FINAL_VERIFY_MAX_ATTEMPTS
+    # final-verify attempts on a FAIL verdict (first pass wins). A verifier
+    # false-fail (non-determinism) on already-correct, per-US-passed work must
+    # REPRODUCE across all attempts before it charges a fix-loop failure — else a
+    # single flake defeats a complete, correct campaign (D-16 dogfood: codex
+    # false-failed pytest-36/36 work → fix-loop churn → stale BLOCK). A US that
+    # never passed per-US (or a genuine regression) fails on the first attempt.
+    local _fv_max=1
+    # FINAL_VERIFY_MAX_ATTEMPTS is validated to 1..10 at declaration, so no clamp here.
+    if echo ",$VERIFIED_US," | grep -q ",$us,"; then _fv_max=$FINAL_VERIFY_MAX_ATTEMPTS; fi
+    local _fv_attempt=0 _fv_rc=1
+    while (( _fv_attempt < _fv_max )); do
+      (( _fv_attempt++ ))
+      _final_verify_one_us "$us" "$iter"; _fv_rc=$?
+      if (( _fv_rc == 2 )); then   # infra-terminal (launch/poll hard fail) — no retry
         FAILED_US="$us"
+        log "  Sequential final verify FAILED at $us (infra)"
+        log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us reason=infra attempts=$_fv_attempt"
         return 1
       fi
-    fi
-    # Bug #7 Fix-Q/R: reap verifier pane between per-US final verifications so
-    # the previous codex/claude TUI cannot continue running while the next per-
-    # US verifier dispatch reuses the same pane.
-    _kill_pane_process "$VERIFIER_PANE" "verifier-final"
-    _lock_sentinel "$VERDICT_FILE"
-    # PR-0b-narrow: stamp leader handshake ack on the verdict (audit-only).
-    _stamp_ack_field "$VERDICT_FILE"
-    # Check verdict
-    local verdict
-    verdict=$(jq -r '.verdict' "$VERDICT_FILE" 2>/dev/null)
-    if [[ "$verdict" != "pass" ]]; then
+      (( _fv_rc == 0 )) && break   # pass verdict
+      log "  Sequential final verify: $us verdict=fail (attempt $_fv_attempt/$_fv_max)"
+      log_debug "[FLOW] iter=$iter phase=sequential_final_verify us=$us attempt=$_fv_attempt/$_fv_max verdict=fail"
+      (( _fv_attempt < _fv_max )) && log "  D-18: re-verifying $us — a per-US-passed US's fail must reproduce to count (verifier flake guard)."
+    done
+    if (( _fv_rc != 0 )); then
       FAILED_US="$us"
-      log "  Sequential final verify FAILED at $us"
-      log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=$verdict"
+      log "  Sequential final verify FAILED at $us (failed all $_fv_attempt/$_fv_max attempt(s))"
+      log_debug "[FLOW] iter=$iter phase=sequential_final_verify failed_us=$us verdict=fail attempts=$_fv_attempt max=$_fv_max"
       return 1
     fi
-    log "  Sequential final verify: $us PASSED"
+    log "  Sequential final verify: $us PASSED$([[ $_fv_attempt -gt 1 ]] && echo " (after $_fv_attempt attempts — earlier verdict was a verifier flake)")"
     # Archive per-US final verdict
     cp "$VERDICT_FILE" "$LOGS_DIR/iter-$(printf '%03d' $iter).final-verdict-${us}.json" 2>/dev/null
@@ -3487,6 +3620,28 @@ main() {
       fi
     fi
+    # D-16: leader-driven finalize. The previous iteration's last per-US pass
+    # completed coverage and armed _FINALIZE_PENDING instead of dispatching a
+    # worker round-trip to emit an ALL signal. Synthesize that ALL verify signal
+    # ourselves and skip the worker; the existing verify path (signal_us_id=ALL →
+    # run_sequential_final_verify) handles completion AND the fix-loop on failure.
+    # Operator recovery (PR-A) takes precedence — only finalize if it did not claim
+    # this iteration. A crash before this point loses the flag and safely falls
+    # back to the worker round-trip (the pre-D-16 path).
+    if (( _FINALIZE_PENDING )) && [[ "$SKIP_NEXT_WORKER" -eq 0 ]]; then
+      _FINALIZE_PENDING=0
+      log "  Leader finalize (D-16): all US verified ($VERIFIED_US) — synthesizing ALL verify signal, skipping worker round-trip."
+      log_debug "[FLOW] iter=$ITERATION d16_finalize=true verified_us=$VERIFIED_US"
+      printf '{"iteration": %d, "status": "verify", "us_id": "ALL", "summary": "leader finalize (D-16: all per-US verified)", "timestamp": "%s"}\n' \
+        "$ITERATION" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" | atomic_write "$SIGNAL_FILE"
+      update_status "verify" "running"
+      SKIP_NEXT_WORKER=1
+    else
+      # Any normally-dispatched iteration clears a stale arm (defensive; the flag
+      # is consumed above on the immediately-following iteration in practice).
+      _FINALIZE_PENDING=0
+    fi
     if (( ! SKIP_NEXT_WORKER )); then
       # --- governance.md s7 step 8 (cleanup): Clean previous iteration signals ---
       # Bug #7 Fix-R cleanup: unlock 0o444 sentinels written by the previous
@@ -3724,7 +3879,15 @@ main() {
         update_status "verifier" "running"
         # --- Sequential final verify: per-US scoped checks instead of one big ALL verify ---
-        if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]]; then
+        # D-21: do NOT take the sequential single-verifier path when consensus applies to
+        # the final verify — it would BYPASS consensus entirely (run_sequential_final_verify
+        # returns 0 before the consensus check below), so `--consensus final-only` was a
+        # silent no-op in the DEFAULT per-us mode (the recommended consensus config). When
+        # consensus is on, fall through to run_consensus_verification "ALL" (which uses the
+        # stricter FINAL_VERIFIER_MODEL + FINAL_CONSENSUS_MODEL and the designed
+        # claude+codex final path). The per-US timeout-prevention split is the off-consensus
+        # optimization only.
+        if [[ "$signal_us_id" == "ALL" && "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]] && ! _should_use_consensus "$signal_us_id"; then
           log "  Final ALL verify: using sequential per-US strategy (timeout prevention)"
           local seq_rc=0
           run_sequential_final_verify "$ITERATION" || seq_rc=$?
@@ -3967,7 +4130,18 @@ main() {
                   _append_verified_ledger "$signal_us_id"   # F-14: durable source-of-truth
                 fi
                 update_status "verifier" "pass_us"
-                # Worker will do next US on next iteration
+                # D-16: if this pass completed coverage (every US in US_LIST is now
+                # verified), arm leader-driven finalize so the NEXT loop top runs the
+                # sequential final verify DIRECTLY — instead of a worker round-trip
+                # whose only job is to emit an ALL signal (a fragile extra LLM
+                # iteration, observed hanging on an API rate-limit in SV CRITICAL).
+                if [[ "$VERIFY_MODE" == "per-us" && -n "$US_LIST" ]] && _all_us_verified; then
+                  _FINALIZE_PENDING=1
+                  log "  Coverage complete ($VERIFIED_US) — arming leader finalize (D-16, no worker round-trip)."
+                  log_debug "[FLOW] iter=$ITERATION d16_arm_finalize=true verified_us=$VERIFIED_US"
+                else
+                  : # more US remain → Worker will do next US on next iteration
+                fi
               fi
             elif [[ "$recommended" == (complete|completed|done) || "$signal_us_id" == "ALL" ]]; then
               # Final full verify passed or complete recommended